From db8716a2ec0b137191adcdcb622c9badbbcbede8 Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Tue, 21 Dec 2021 10:31:42 -0500 Subject: [PATCH 001/293] [libc] Show average runtime for math single-input-single-output performance tests. Run performance tests in denormal and normal ranges separately and show more detailed results. Reviewed By: sivachandra Differential Revision: https://reviews.llvm.org/D116112 --- .../SingleInputSingleOutputDiff.h | 42 +++++++++++++++---- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/libc/test/src/math/differential_testing/SingleInputSingleOutputDiff.h b/libc/test/src/math/differential_testing/SingleInputSingleOutputDiff.h index 9a516f9069ce..7cc7059db03a 100644 --- a/libc/test/src/math/differential_testing/SingleInputSingleOutputDiff.h +++ b/libc/test/src/math/differential_testing/SingleInputSingleOutputDiff.h @@ -46,28 +46,56 @@ template class SingleInputSingleOutputDiff { log << "Total number of differing results: " << diffCount << '\n'; } - static void runPerf(Func myFunc, Func otherFunc, const char *logFile) { - auto runner = [](Func func) { + static void runPerfInRange(Func myFunc, Func otherFunc, UIntType startingBit, + UIntType endingBit, + testutils::OutputFileStream &log) { + auto runner = [=](Func func) { volatile T result; - for (UIntType bits = 0;; ++bits) { + for (UIntType bits = startingBit;; ++bits) { T x = T(FPBits(bits)); result = func(x); - if (bits == UIntMax) + if (bits == endingBit) break; } }; - testutils::OutputFileStream log(logFile); Timer timer; timer.start(); runner(myFunc); timer.stop(); - log << " Run time of my function: " << timer.nanoseconds() << " ns \n"; + + UIntType numberOfRuns = endingBit - startingBit + 1; + double myAverage = static_cast(timer.nanoseconds()) / numberOfRuns; + log << "-- My function --\n"; + log << " Total time : " << timer.nanoseconds() << " ns \n"; + log << " Average runtime : " << myAverage << " ns/op \n"; + log << " Ops per second : " + << static_cast(1'000'000'000.0 / myAverage) << " op/s \n"; timer.start(); runner(otherFunc); timer.stop(); - log << "Run time of other function: " << timer.nanoseconds() << " ns \n"; + + double otherAverage = + static_cast(timer.nanoseconds()) / numberOfRuns; + log << "-- Other function --\n"; + log << " Total time : " << timer.nanoseconds() << " ns \n"; + log << " Average runtime : " << otherAverage << " ns/op \n"; + log << " Ops per second : " + << static_cast(1'000'000'000.0 / otherAverage) << " op/s \n"; + + log << "-- Average runtime ratio --\n"; + log << " Mine / Other's : " << myAverage / otherAverage << " \n"; + } + + static void runPerf(Func myFunc, Func otherFunc, const char *logFile) { + testutils::OutputFileStream log(logFile); + log << " Performance tests with inputs in denormal range:\n"; + runPerfInRange(myFunc, otherFunc, /* startingBit= */ UIntType(0), + /* endingBit= */ FPBits::MAX_SUBNORMAL, log); + log << "\n Performance tests with inputs in normal range:\n"; + runPerfInRange(myFunc, otherFunc, /* startingBit= */ FPBits::MIN_NORMAL, + /* endingBit= */ FPBits::MAX_NORMAL, log); } }; From a3ea9052d6a16b13607046df6a324403fb51888d Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Tue, 21 Dec 2021 13:35:17 -0600 Subject: [PATCH 002/293] [PowerPC] Do not increase cost for getUserCost with MMA types Commit 150681f increases cost of producing MMA types (vector pair and quad). However, it increases the cost for getUserCost() which is used in unrolling. As a result, loops that contain these types already (from the user code) cannot be unrolled (even with the user's unroll pragma). This was an unintended sideeffect. Reverting that portion of the commit to allow unrolling such loops. Differential revision: https://reviews.llvm.org/D115424 --- .../Target/PowerPC/PPCTargetTransformInfo.cpp | 4 - .../PowerPC/p10-respect-unroll-pragma.ll | 159 ++++++++++++++++++ 2 files changed, 159 insertions(+), 4 deletions(-) create mode 100644 llvm/test/Transforms/LoopUnroll/PowerPC/p10-respect-unroll-pragma.ll diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index c38dec08af90..deac0765b218 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -328,10 +328,6 @@ static bool isMMAType(Type *Ty) { InstructionCost PPCTTIImpl::getUserCost(const User *U, ArrayRef Operands, TTI::TargetCostKind CostKind) { - // Set the max cost if an MMA type is present (v256i1, v512i1). - if (isMMAType(U->getType())) - return InstructionCost::getMax(); - // We already implement getCastInstrCost and getMemoryOpCost where we perform // the vector adjustment there. if (isa(U) || isa(U) || isa(U)) diff --git a/llvm/test/Transforms/LoopUnroll/PowerPC/p10-respect-unroll-pragma.ll b/llvm/test/Transforms/LoopUnroll/PowerPC/p10-respect-unroll-pragma.ll new file mode 100644 index 000000000000..3e08b41744f8 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/PowerPC/p10-respect-unroll-pragma.ll @@ -0,0 +1,159 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes='default' -mtriple=powerpc64le-- -o - %s | \ +; RUN: FileCheck %s + +%0 = type <{ double }> + +define dso_local void @test(i32* %arg) #0 { +; CHECK-LABEL: @test( +; CHECK-NEXT: bb: +; CHECK-NEXT: br label [[BB16:%.*]] +; CHECK: bb16: +; CHECK-NEXT: [[I20:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20]]) +; CHECK-NEXT: [[I24_ELT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_1:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_1:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_1]]) +; CHECK-NEXT: [[I24_ELT_1:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_1]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_1]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_1:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_1]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_1]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_2:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_2:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_2]]) +; CHECK-NEXT: [[I24_ELT_2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_2]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_2]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_2]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_2]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_3:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_3:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_3]]) +; CHECK-NEXT: [[I24_ELT_3:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_3]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_3]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_3:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_3]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_3]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_4:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_4:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_4]]) +; CHECK-NEXT: [[I24_ELT_4:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_4]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_4]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_4:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_4]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_4]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_5:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_5:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_5]]) +; CHECK-NEXT: [[I24_ELT_5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_5]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_5]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_5]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_5]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_6:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_6:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_6]]) +; CHECK-NEXT: [[I24_ELT_6:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_6]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_6]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_6:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_6]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_6]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_7:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_7:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_7]]) +; CHECK-NEXT: [[I24_ELT_7:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_7]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_7]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_7:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_7]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_7]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_8:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_8:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_8]]) +; CHECK-NEXT: [[I24_ELT_8:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_8]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_8]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_8:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_8]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_8]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_9:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_9:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_9]]) +; CHECK-NEXT: [[I24_ELT_9:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_9]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_9]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_9:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_9]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_9]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_10:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_10:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_10]]) +; CHECK-NEXT: [[I24_ELT_10:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_10]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_10]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_10:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_10]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_10]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_11:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_11:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_11]]) +; CHECK-NEXT: [[I24_ELT_11:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_11]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_11]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_11:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_11]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_11]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_12:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_12:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_12]]) +; CHECK-NEXT: [[I24_ELT_12:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_12]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_12]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_12:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_12]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_12]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_13:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_13:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_13]]) +; CHECK-NEXT: [[I24_ELT_13:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_13]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_13]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_13:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_13]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_13]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_14:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_14:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_14]]) +; CHECK-NEXT: [[I24_ELT_14:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_14]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_14]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_14:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_14]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_14]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: [[I20_15:%.*]] = tail call <256 x i1> @llvm.ppc.vsx.lxvp(i8* nonnull inttoptr (i64 -32 to i8*)) +; CHECK-NEXT: [[I24_15:%.*]] = tail call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> [[I20_15]]) +; CHECK-NEXT: [[I24_ELT_15:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_15]], 0 +; CHECK-NEXT: store <16 x i8> [[I24_ELT_15]], <16 x i8>* inttoptr (i64 48 to <16 x i8>*), align 16 +; CHECK-NEXT: [[I24_ELT1_15:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[I24_15]], 1 +; CHECK-NEXT: store <16 x i8> [[I24_ELT1_15]], <16 x i8>* inttoptr (i64 64 to <16 x i8>*), align 64 +; CHECK-NEXT: br label [[BB16]], !llvm.loop [[LOOP0:![0-9]+]] +; +bb: + %i = alloca i32*, align 8 + store i32* %arg, i32** %i, align 8 + %i1 = alloca [0 x %0]*, align 8 + %i2 = alloca double*, align 8 + %i3 = alloca i32, align 4 + %i4 = alloca i32, align 4 + %i5 = alloca i64, align 8 + %i6 = alloca i64, align 8 + %i7 = alloca <256 x i1>, align 32 + %i8 = load i32*, i32** %i, align 8 + %i9 = load i32, i32* %i8, align 4 + %i10 = sub nsw i32 %i9, 0 + store i32 %i10, i32* %i4, align 4 + %i11 = load i32, i32* %i4, align 4 + %i12 = ashr i32 %i11, 5 + %i13 = sext i32 %i12 to i64 + %i14 = load i64, i64* %i6, align 8 + %i15 = sub nsw i64 %i14, 1 + br label %bb16 + +bb16: ; preds = %bb16, %bb + %i17 = load i64, i64* %i5, align 8 + %i18 = icmp sge i64 %i17, 1 + %i19 = getelementptr i8, i8* null, i64 -32 + %i20 = call <256 x i1> @llvm.ppc.vsx.lxvp(i8* %i19) + store <256 x i1> %i20, <256 x i1>* %i7, align 32 + %i21 = getelementptr inbounds i8, i8* null, i64 48 + %i22 = bitcast i8* %i21 to <2 x double>* + %i23 = load <256 x i1>, <256 x i1>* %i7, align 32 + %i24 = call { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1> %i23) + %i25 = bitcast <2 x double>* %i22 to { <16 x i8>, <16 x i8> }* + store { <16 x i8>, <16 x i8> } %i24, { <16 x i8>, <16 x i8> }* %i25, align 16 + br label %bb16, !llvm.loop !1 +} + +; Function Attrs: argmemonly nounwind readonly +declare <256 x i1> @llvm.ppc.vsx.lxvp(i8*) #1 + +; Function Attrs: nounwind readnone +declare { <16 x i8>, <16 x i8> } @llvm.ppc.vsx.disassemble.pair(<256 x i1>) #2 + +attributes #0 = { "no-trapping-math"="true" } +attributes #1 = { argmemonly nounwind readonly } +attributes #2 = { nounwind readnone } + +!1 = distinct !{!1, !2, !3, !4} +!2 = !{!"llvm.loop.vectorize.width", i32 1} +!3 = !{!"llvm.loop.interleave.count", i32 1} +!4 = !{!"llvm.loop.unroll.count", i32 16} From 1674d9b6b2da914619c7c197336bb74f7988cf38 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Tue, 21 Dec 2021 14:28:41 -0600 Subject: [PATCH 003/293] [PowerPC] Fix vector equality comparison for v2i64 pre-Power8 The current code makes the assumption that equality comparison can be performed with a word comparison instruction. While this is true if the entire 64-bit results are used, it does not generally work. It is possible that the low order words and high order words produce different results and a user of only one will get the wrong result. This patch adds an and of the result words so that each word has the result of the comparison of the entire doubleword that contains it. Differential revision: https://reviews.llvm.org/D115678 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 19 ++++---- .../CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll | 48 +++++++++++++++++++ 2 files changed, 58 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index d6ee6a9d6525..8d6edf07bc53 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3500,15 +3500,16 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { if (LHS.getValueType() == MVT::v2i64) { // Equality can be handled by casting to the legal type for Altivec // comparisons, everything else needs to be expanded. - if (CC == ISD::SETEQ || CC == ISD::SETNE) { - return DAG.getNode( - ISD::BITCAST, dl, MVT::v2i64, - DAG.getSetCC(dl, MVT::v4i32, - DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS), - DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC)); - } - - return SDValue(); + if (CC != ISD::SETEQ && CC != ISD::SETNE) + return SDValue(); + SDValue SetCC32 = DAG.getSetCC( + dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS), + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC); + int ShuffV[] = {1, 0, 3, 2}; + SDValue Shuff = + DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV); + return DAG.getBitcast( + MVT::v2i64, DAG.getNode(ISD::AND, dl, MVT::v4i32, Shuff, SetCC32)); } // We handle most of these in the usual way. diff --git a/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll b/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll new file mode 100644 index 000000000000..b63b142793ee --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc-aix- < %s | \ +; RUN: FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64le-- < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK_LE +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-- < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK_P8LE +define i1 @foo(<2 x i64> %a) #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxswapd 35, 34 +; CHECK-NEXT: lwz 3, L..C0(2) # %const.0 +; CHECK-NEXT: vcmpequw 2, 2, 3 +; CHECK-NEXT: lxvw4x 35, 0, 3 +; CHECK-NEXT: addi 3, 1, -16 +; CHECK-NEXT: vperm 3, 2, 2, 3 +; CHECK-NEXT: xxland 0, 35, 34 +; CHECK-NEXT: stxvw4x 0, 0, 3 +; CHECK-NEXT: lwz 3, -12(1) +; CHECK-NEXT: blr +; +; CHECK_LE-LABEL: foo: +; CHECK_LE: # %bb.0: # %entry +; CHECK_LE-NEXT: xxswapd 35, 34 +; CHECK_LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; CHECK_LE-NEXT: addi 3, 3, .LCPI0_0@toc@l +; CHECK_LE-NEXT: vcmpequw 2, 2, 3 +; CHECK_LE-NEXT: lvx 3, 0, 3 +; CHECK_LE-NEXT: addi 3, 1, -16 +; CHECK_LE-NEXT: vperm 3, 2, 2, 3 +; CHECK_LE-NEXT: xxland 34, 35, 34 +; CHECK_LE-NEXT: stvx 2, 0, 3 +; CHECK_LE-NEXT: ld 3, -16(1) +; CHECK_LE-NEXT: blr +; +; CHECK_P8LE-LABEL: foo: +; CHECK_P8LE: # %bb.0: # %entry +; CHECK_P8LE-NEXT: xxswapd 35, 34 +; CHECK_P8LE-NEXT: vcmpequd 2, 2, 3 +; CHECK_P8LE-NEXT: xxswapd 0, 34 +; CHECK_P8LE-NEXT: mffprd 3, 0 +; CHECK_P8LE-NEXT: blr +entry: + %0 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> + %1 = icmp eq <2 x i64> %a, %0 + %2 = extractelement <2 x i1> %1, i32 0 + ret i1 %2 +} From 346ef5e5879e9143fe008c35c3dd6ea3c402634a Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 21 Dec 2021 15:31:18 -0500 Subject: [PATCH 004/293] [libc++][NFC] Improve documentation of the various random_device implementations Also, move the setting of the macro closer to its point of use, which also has the benefit of uncluttering `__config`. --- libcxx/include/__config | 19 ----------- libcxx/include/__random/random_device.h | 44 +++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 19 deletions(-) diff --git a/libcxx/include/__config b/libcxx/include/__config index 84ce728ac3a0..7d6cc16f7ac0 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -333,25 +333,6 @@ # define _LIBCPP_SHORT_WCHAR 1 #endif -#if defined(__OpenBSD__) - // Certain architectures provide arc4random(). Prefer using - // arc4random() over /dev/{u,}random to make it possible to obtain - // random data even when using sandboxing mechanisms such as chroots, - // Capsicum, etc. -# define _LIBCPP_USING_ARC4_RANDOM -#elif defined(__Fuchsia__) || defined(__wasi__) -# define _LIBCPP_USING_GETENTROPY -#elif defined(__native_client__) - // NaCl's sandbox (which PNaCl also runs in) doesn't allow filesystem access, - // including accesses to the special files under /dev. C++11's - // std::random_device is instead exposed through a NaCl syscall. -# define _LIBCPP_USING_NACL_RANDOM -#elif defined(_LIBCPP_WIN32API) -# define _LIBCPP_USING_WIN32_RANDOM -#else -# define _LIBCPP_USING_DEV_RANDOM -#endif - #if !defined(_LIBCPP_LITTLE_ENDIAN) && !defined(_LIBCPP_BIG_ENDIAN) # include # if __BYTE_ORDER == __LITTLE_ENDIAN diff --git a/libcxx/include/__random/random_device.h b/libcxx/include/__random/random_device.h index 835f726fdbcc..82dd1a111723 100644 --- a/libcxx/include/__random/random_device.h +++ b/libcxx/include/__random/random_device.h @@ -23,6 +23,50 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if !defined(_LIBCPP_HAS_NO_RANDOM_DEVICE) +// Libc++ supports various implementations of std::random_device. +// +// _LIBCPP_USING_DEV_RANDOM +// Read entropy from the given file, by default `/dev/urandom`. +// If a token is provided, it is assumed to be the path to a file +// to read entropy from. This is the default behavior if nothing +// else is specified. This implementation requires storing state +// inside `std::random_device`. +// +// _LIBCPP_USING_ARC4_RANDOM +// Use arc4random(). This allows obtaining random data even when +// using sandboxing mechanisms. On some platforms like Apple, this +// is the recommended source of entropy for user-space programs. +// When this option is used, the token passed to `std::random_device`'s +// constructor *must* be "/dev/urandom" -- anything else is an error. +// +// _LIBCPP_USING_GETENTROPY +// Use getentropy(). +// When this option is used, the token passed to `std::random_device`'s +// constructor *must* be "/dev/urandom" -- anything else is an error. +// +// _LIBCPP_USING_NACL_RANDOM +// NaCl's sandbox (which PNaCl also runs in) doesn't allow filesystem access, +// including accesses to the special files under `/dev`. This implementation +// uses the NaCL syscall `nacl_secure_random_init()` to get entropy. +// When this option is used, the token passed to `std::random_device`'s +// constructor *must* be "/dev/urandom" -- anything else is an error. +// +// _LIBCPP_USING_WIN32_RANDOM +// Use rand_s(), for use on Windows. +// When this option is used, the token passed to `std::random_device`'s +// constructor *must* be "/dev/urandom" -- anything else is an error. +#if defined(__OpenBSD__) +# define _LIBCPP_USING_ARC4_RANDOM +#elif defined(__Fuchsia__) || defined(__wasi__) +# define _LIBCPP_USING_GETENTROPY +#elif defined(__native_client__) +# define _LIBCPP_USING_NACL_RANDOM +#elif defined(_LIBCPP_WIN32API) +# define _LIBCPP_USING_WIN32_RANDOM +#else +# define _LIBCPP_USING_DEV_RANDOM +#endif + class _LIBCPP_TYPE_VIS random_device { #ifdef _LIBCPP_USING_DEV_RANDOM From de4e0195ae1c39f1c3b07834b8e32c113f4f20eb Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Tue, 21 Dec 2021 14:53:55 -0600 Subject: [PATCH 005/293] [PowerPC] Add missed test case updates In commit 1674d9b6b2da914619c7c197336bb74f7988cf38, I missed adding the updates to existing test cases. This should bring the bots back to green. --- llvm/test/CodeGen/PowerPC/vec_cmpd_p7.ll | 10 ++++++ .../PowerPC/vector-popcnt-128-ult-ugt.ll | 10 ++++++ llvm/test/CodeGen/PowerPC/vsx.ll | 34 +++++++++++++++++-- 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/PowerPC/vec_cmpd_p7.ll b/llvm/test/CodeGen/PowerPC/vec_cmpd_p7.ll index 727c2d328406..3a7bfcfb19f1 100644 --- a/llvm/test/CodeGen/PowerPC/vec_cmpd_p7.ll +++ b/llvm/test/CodeGen/PowerPC/vec_cmpd_p7.ll @@ -10,11 +10,21 @@ define <2 x i64> @v2si64_cmp(<2 x i64> %x, <2 x i64> %y) nounwind readnone { ; CHECK-LABEL: v2si64_cmp: ; CHECK: # %bb.0: ; CHECK-NEXT: vcmpequw 2, 2, 3 +; CHECK-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI0_0@toc@l +; CHECK-NEXT: lvx 3, 0, 3 +; CHECK-NEXT: vperm 3, 2, 2, 3 +; CHECK-NEXT: xxland 34, 35, 34 ; CHECK-NEXT: blr ; ; CHECK-BE-LABEL: v2si64_cmp: ; CHECK-BE: # %bb.0: ; CHECK-BE-NEXT: vcmpequw 2, 2, 3 +; CHECK-BE-NEXT: addis 3, 2, .LCPI0_0@toc@ha +; CHECK-BE-NEXT: addi 3, 3, .LCPI0_0@toc@l +; CHECK-BE-NEXT: lxvw4x 35, 0, 3 +; CHECK-BE-NEXT: vperm 3, 2, 2, 3 +; CHECK-BE-NEXT: xxland 34, 35, 34 ; CHECK-BE-NEXT: blr %cmp = icmp eq <2 x i64> %x, %y %result = sext <2 x i1> %cmp to <2 x i64> diff --git a/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll b/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll index 0bcf03450b5d..eee3ff42bd13 100644 --- a/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll +++ b/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll @@ -11979,9 +11979,14 @@ define <2 x i64> @ugt_1_v2i64(<2 x i64> %0) { ; PWR7-NEXT: std 3, -16(1) ; PWR7-NEXT: addi 3, 1, -16 ; PWR7-NEXT: lxvw4x 0, 0, 3 +; PWR7-NEXT: addis 3, 2, .LCPI100_0@toc@ha +; PWR7-NEXT: addi 3, 3, .LCPI100_0@toc@l ; PWR7-NEXT: xxland 34, 34, 0 ; PWR7-NEXT: vcmpequw 2, 2, 3 +; PWR7-NEXT: lxvw4x 35, 0, 3 ; PWR7-NEXT: xxlnor 34, 34, 34 +; PWR7-NEXT: vperm 3, 2, 2, 3 +; PWR7-NEXT: xxland 34, 35, 34 ; PWR7-NEXT: blr ; ; PWR8-LABEL: ugt_1_v2i64: @@ -12045,8 +12050,13 @@ define <2 x i64> @ult_2_v2i64(<2 x i64> %0) { ; PWR7-NEXT: std 3, -16(1) ; PWR7-NEXT: addi 3, 1, -16 ; PWR7-NEXT: lxvw4x 0, 0, 3 +; PWR7-NEXT: addis 3, 2, .LCPI101_0@toc@ha +; PWR7-NEXT: addi 3, 3, .LCPI101_0@toc@l ; PWR7-NEXT: xxland 34, 34, 0 ; PWR7-NEXT: vcmpequw 2, 2, 3 +; PWR7-NEXT: lxvw4x 35, 0, 3 +; PWR7-NEXT: vperm 3, 2, 2, 3 +; PWR7-NEXT: xxland 34, 35, 34 ; PWR7-NEXT: blr ; ; PWR8-LABEL: ult_2_v2i64: diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll index a62f006eaeb1..d882050b2e5a 100644 --- a/llvm/test/CodeGen/PowerPC/vsx.ll +++ b/llvm/test/CodeGen/PowerPC/vsx.ll @@ -2032,16 +2032,31 @@ define <2 x i1> @test65(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test65: ; CHECK: # %bb.0: ; CHECK-NEXT: vcmpequw v2, v2, v3 +; CHECK-NEXT: addis r3, r2, .LCPI59_0@toc@ha +; CHECK-NEXT: addi r3, r3, .LCPI59_0@toc@l +; CHECK-NEXT: lxvw4x v3, 0, r3 +; CHECK-NEXT: vperm v3, v2, v2, v3 +; CHECK-NEXT: xxland v2, v3, v2 ; CHECK-NEXT: blr ; ; CHECK-REG-LABEL: test65: ; CHECK-REG: # %bb.0: ; CHECK-REG-NEXT: vcmpequw v2, v2, v3 +; CHECK-REG-NEXT: addis r3, r2, .LCPI59_0@toc@ha +; CHECK-REG-NEXT: addi r3, r3, .LCPI59_0@toc@l +; CHECK-REG-NEXT: lxvw4x v3, 0, r3 +; CHECK-REG-NEXT: vperm v3, v2, v2, v3 +; CHECK-REG-NEXT: xxland v2, v3, v2 ; CHECK-REG-NEXT: blr ; ; CHECK-FISL-LABEL: test65: ; CHECK-FISL: # %bb.0: -; CHECK-FISL-NEXT: vcmpequw v2, v2, v3 +; CHECK-FISL-NEXT: vcmpequw v3, v2, v3 +; CHECK-FISL-NEXT: addis r3, r2, .LCPI59_0@toc@ha +; CHECK-FISL-NEXT: addi r3, r3, .LCPI59_0@toc@l +; CHECK-FISL-NEXT: lxvw4x v2, 0, r3 +; CHECK-FISL-NEXT: vperm v2, v3, v3, v2 +; CHECK-FISL-NEXT: xxland v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test65: @@ -2059,19 +2074,34 @@ define <2 x i1> @test66(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: test66: ; CHECK: # %bb.0: ; CHECK-NEXT: vcmpequw v2, v2, v3 +; CHECK-NEXT: addis r3, r2, .LCPI60_0@toc@ha +; CHECK-NEXT: addi r3, r3, .LCPI60_0@toc@l +; CHECK-NEXT: lxvw4x v3, 0, r3 ; CHECK-NEXT: xxlnor v2, v2, v2 +; CHECK-NEXT: vperm v3, v2, v2, v3 +; CHECK-NEXT: xxland v2, v3, v2 ; CHECK-NEXT: blr ; ; CHECK-REG-LABEL: test66: ; CHECK-REG: # %bb.0: ; CHECK-REG-NEXT: vcmpequw v2, v2, v3 +; CHECK-REG-NEXT: addis r3, r2, .LCPI60_0@toc@ha +; CHECK-REG-NEXT: addi r3, r3, .LCPI60_0@toc@l +; CHECK-REG-NEXT: lxvw4x v3, 0, r3 ; CHECK-REG-NEXT: xxlnor v2, v2, v2 +; CHECK-REG-NEXT: vperm v3, v2, v2, v3 +; CHECK-REG-NEXT: xxland v2, v3, v2 ; CHECK-REG-NEXT: blr ; ; CHECK-FISL-LABEL: test66: ; CHECK-FISL: # %bb.0: ; CHECK-FISL-NEXT: vcmpequw v2, v2, v3 -; CHECK-FISL-NEXT: xxlnor v2, v2, v2 +; CHECK-FISL-NEXT: xxlnor v3, v2, v2 +; CHECK-FISL-NEXT: addis r3, r2, .LCPI60_0@toc@ha +; CHECK-FISL-NEXT: addi r3, r3, .LCPI60_0@toc@l +; CHECK-FISL-NEXT: lxvw4x v2, 0, r3 +; CHECK-FISL-NEXT: vperm v2, v3, v3, v2 +; CHECK-FISL-NEXT: xxland v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test66: From 2477e69c6df5ae172107107171b1f062b8c0c031 Mon Sep 17 00:00:00 2001 From: Kirill Stoimenov Date: Tue, 21 Dec 2021 20:43:31 +0000 Subject: [PATCH 006/293] [ASan] Change optimized callbacks visibility to hidden. Making callbacks hidden will remove PLT calls. Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D116121 --- compiler-rt/lib/asan/asan_interface.inc | 33 ------------------------- compiler-rt/lib/asan/asan_rtl_x86_64.S | 1 + 2 files changed, 1 insertion(+), 33 deletions(-) diff --git a/compiler-rt/lib/asan/asan_interface.inc b/compiler-rt/lib/asan/asan_interface.inc index 54ddfe5a26f0..89ef552b7117 100644 --- a/compiler-rt/lib/asan/asan_interface.inc +++ b/compiler-rt/lib/asan/asan_interface.inc @@ -180,36 +180,3 @@ INTERFACE_FUNCTION(__asan_update_allocation_context) INTERFACE_WEAK_FUNCTION(__asan_default_options) INTERFACE_WEAK_FUNCTION(__asan_default_suppressions) INTERFACE_WEAK_FUNCTION(__asan_on_error) - -#if defined(__x86_64__) && !defined(__APPLE__) && !defined(SANITIZER_WINDOWS) - -# define ASAN_MEMORY_ACCESS_CALLBACK_ADD(s, reg, op) \ - INTERFACE_FUNCTION(__asan_check_##op##_add_##s##_##reg) - -# define ASAN_MEMORY_ACCESS_CALLBACKS_ADD(reg) \ - ASAN_MEMORY_ACCESS_CALLBACK_ADD(1, reg, load) \ - ASAN_MEMORY_ACCESS_CALLBACK_ADD(1, reg, store) \ - ASAN_MEMORY_ACCESS_CALLBACK_ADD(2, reg, load) \ - ASAN_MEMORY_ACCESS_CALLBACK_ADD(2, reg, store) \ - ASAN_MEMORY_ACCESS_CALLBACK_ADD(4, reg, load) \ - ASAN_MEMORY_ACCESS_CALLBACK_ADD(4, reg, store) \ - ASAN_MEMORY_ACCESS_CALLBACK_ADD(8, reg, load) \ - ASAN_MEMORY_ACCESS_CALLBACK_ADD(8, reg, store) \ - ASAN_MEMORY_ACCESS_CALLBACK_ADD(16, reg, load) \ - ASAN_MEMORY_ACCESS_CALLBACK_ADD(16, reg, store) - -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(RAX) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(RBX) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(RCX) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(RDX) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(RSI) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(RDI) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(RBP) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(R8) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(R9) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(R12) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(R13) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(R14) -ASAN_MEMORY_ACCESS_CALLBACKS_ADD(R15) - -#endif // defined(__x86_64__) diff --git a/compiler-rt/lib/asan/asan_rtl_x86_64.S b/compiler-rt/lib/asan/asan_rtl_x86_64.S index 9dcb8627ba28..d27db745ed67 100644 --- a/compiler-rt/lib/asan/asan_rtl_x86_64.S +++ b/compiler-rt/lib/asan/asan_rtl_x86_64.S @@ -16,6 +16,7 @@ #define BEGINF(reg, op, s, i) \ .globl FNAME(reg, op, s, i) ;\ +.hidden FNAME(reg, op, s, i) ;\ ASM_TYPE_FUNCTION(FNAME(reg, op, s, i)) ;\ .cfi_startproc ;\ FNAME(reg, op, s, i): ;\ From beff71520b5be766ef06a0e0c1e67629fa39f429 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 21 Dec 2021 16:50:34 -0500 Subject: [PATCH 007/293] [libc++] Partially revert 346ef5e5879e This moves the macro definitions back to __config, but keeps the improved documentation. 346ef5e5879e had broken the MinGW build. --- libcxx/include/__config | 44 +++++++++++++++++++++++++ libcxx/include/__random/random_device.h | 44 ------------------------- 2 files changed, 44 insertions(+), 44 deletions(-) diff --git a/libcxx/include/__config b/libcxx/include/__config index 7d6cc16f7ac0..720e12eac0dd 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -333,6 +333,50 @@ # define _LIBCPP_SHORT_WCHAR 1 #endif +// Libc++ supports various implementations of std::random_device. +// +// _LIBCPP_USING_DEV_RANDOM +// Read entropy from the given file, by default `/dev/urandom`. +// If a token is provided, it is assumed to be the path to a file +// to read entropy from. This is the default behavior if nothing +// else is specified. This implementation requires storing state +// inside `std::random_device`. +// +// _LIBCPP_USING_ARC4_RANDOM +// Use arc4random(). This allows obtaining random data even when +// using sandboxing mechanisms. On some platforms like Apple, this +// is the recommended source of entropy for user-space programs. +// When this option is used, the token passed to `std::random_device`'s +// constructor *must* be "/dev/urandom" -- anything else is an error. +// +// _LIBCPP_USING_GETENTROPY +// Use getentropy(). +// When this option is used, the token passed to `std::random_device`'s +// constructor *must* be "/dev/urandom" -- anything else is an error. +// +// _LIBCPP_USING_NACL_RANDOM +// NaCl's sandbox (which PNaCl also runs in) doesn't allow filesystem access, +// including accesses to the special files under `/dev`. This implementation +// uses the NaCL syscall `nacl_secure_random_init()` to get entropy. +// When this option is used, the token passed to `std::random_device`'s +// constructor *must* be "/dev/urandom" -- anything else is an error. +// +// _LIBCPP_USING_WIN32_RANDOM +// Use rand_s(), for use on Windows. +// When this option is used, the token passed to `std::random_device`'s +// constructor *must* be "/dev/urandom" -- anything else is an error. +#if defined(__OpenBSD__) +# define _LIBCPP_USING_ARC4_RANDOM +#elif defined(__Fuchsia__) || defined(__wasi__) +# define _LIBCPP_USING_GETENTROPY +#elif defined(__native_client__) +# define _LIBCPP_USING_NACL_RANDOM +#elif defined(_LIBCPP_WIN32API) +# define _LIBCPP_USING_WIN32_RANDOM +#else +# define _LIBCPP_USING_DEV_RANDOM +#endif + #if !defined(_LIBCPP_LITTLE_ENDIAN) && !defined(_LIBCPP_BIG_ENDIAN) # include # if __BYTE_ORDER == __LITTLE_ENDIAN diff --git a/libcxx/include/__random/random_device.h b/libcxx/include/__random/random_device.h index 82dd1a111723..835f726fdbcc 100644 --- a/libcxx/include/__random/random_device.h +++ b/libcxx/include/__random/random_device.h @@ -23,50 +23,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if !defined(_LIBCPP_HAS_NO_RANDOM_DEVICE) -// Libc++ supports various implementations of std::random_device. -// -// _LIBCPP_USING_DEV_RANDOM -// Read entropy from the given file, by default `/dev/urandom`. -// If a token is provided, it is assumed to be the path to a file -// to read entropy from. This is the default behavior if nothing -// else is specified. This implementation requires storing state -// inside `std::random_device`. -// -// _LIBCPP_USING_ARC4_RANDOM -// Use arc4random(). This allows obtaining random data even when -// using sandboxing mechanisms. On some platforms like Apple, this -// is the recommended source of entropy for user-space programs. -// When this option is used, the token passed to `std::random_device`'s -// constructor *must* be "/dev/urandom" -- anything else is an error. -// -// _LIBCPP_USING_GETENTROPY -// Use getentropy(). -// When this option is used, the token passed to `std::random_device`'s -// constructor *must* be "/dev/urandom" -- anything else is an error. -// -// _LIBCPP_USING_NACL_RANDOM -// NaCl's sandbox (which PNaCl also runs in) doesn't allow filesystem access, -// including accesses to the special files under `/dev`. This implementation -// uses the NaCL syscall `nacl_secure_random_init()` to get entropy. -// When this option is used, the token passed to `std::random_device`'s -// constructor *must* be "/dev/urandom" -- anything else is an error. -// -// _LIBCPP_USING_WIN32_RANDOM -// Use rand_s(), for use on Windows. -// When this option is used, the token passed to `std::random_device`'s -// constructor *must* be "/dev/urandom" -- anything else is an error. -#if defined(__OpenBSD__) -# define _LIBCPP_USING_ARC4_RANDOM -#elif defined(__Fuchsia__) || defined(__wasi__) -# define _LIBCPP_USING_GETENTROPY -#elif defined(__native_client__) -# define _LIBCPP_USING_NACL_RANDOM -#elif defined(_LIBCPP_WIN32API) -# define _LIBCPP_USING_WIN32_RANDOM -#else -# define _LIBCPP_USING_DEV_RANDOM -#endif - class _LIBCPP_TYPE_VIS random_device { #ifdef _LIBCPP_USING_DEV_RANDOM From d26520f6f78785b0c4c296a8a992f2adb656c6ec Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Tue, 21 Dec 2021 17:41:26 -0500 Subject: [PATCH 008/293] [Clang] Own the CommandLineArgs in CodeGenOptions Fixes PR52704 : https://github.com/llvm/llvm-project/issues/52704 Differential Revision: https://reviews.llvm.org/D116011 --- clang/include/clang/Basic/CodeGenOptions.h | 2 +- clang/lib/Driver/Job.cpp | 2 ++ clang/lib/Frontend/CompilerInvocation.cpp | 2 +- llvm/include/llvm/MC/MCTargetOptions.h | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index d4781b647b87..33ec03a17136 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -398,7 +398,7 @@ class CodeGenOptions : public CodeGenOptionsBase { /// Executable and command-line used to create a given CompilerInvocation. /// Most of the time this will be the full -cc1 command. const char *Argv0 = nullptr; - ArrayRef CommandLineArgs; + std::vector CommandLineArgs; /// The minimum hotness value a diagnostic needs in order to be included in /// optimization diagnostics. diff --git a/clang/lib/Driver/Job.cpp b/clang/lib/Driver/Job.cpp index 5b87106b6565..f63763effaff 100644 --- a/clang/lib/Driver/Job.cpp +++ b/clang/lib/Driver/Job.cpp @@ -388,6 +388,8 @@ int CC1Command::Execute(ArrayRef> Redirects, Argv.push_back(getExecutable()); Argv.append(getArguments().begin(), getArguments().end()); Argv.push_back(nullptr); + Argv.pop_back(); // The terminating null element shall not be part of the + // slice (main() behavior). // This flag simply indicates that the program couldn't start, which isn't // applicable here. diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 106da642f361..b71addd84bfd 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -4518,7 +4518,7 @@ bool CompilerInvocation::CreateFromArgsImpl( // Store the command-line for using in the CodeView backend. Res.getCodeGenOpts().Argv0 = Argv0; - Res.getCodeGenOpts().CommandLineArgs = CommandLineArgs; + append_range(Res.getCodeGenOpts().CommandLineArgs, CommandLineArgs); FixupInvocation(Res, Diags, Args, DashX); diff --git a/llvm/include/llvm/MC/MCTargetOptions.h b/llvm/include/llvm/MC/MCTargetOptions.h index b006bb321819..3510eeca8953 100644 --- a/llvm/include/llvm/MC/MCTargetOptions.h +++ b/llvm/include/llvm/MC/MCTargetOptions.h @@ -65,7 +65,7 @@ class MCTargetOptions { std::string COFFOutputFilename; const char *Argv0 = nullptr; - ArrayRef CommandLineArgs; + ArrayRef CommandLineArgs; /// Additional paths to search for `.include` directives when using the /// integrated assembler. From c1a14a5c3e6fef181f920b66ec159b6bfac4d457 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Wed, 8 Dec 2021 23:29:18 +0200 Subject: [PATCH 009/293] [libcxx] Use LIBCXX_EXECUTOR in new test configs This allows cross-testing (by setting LIBCXX_EXECUTOR to point to ssh.py) without making an entirely new test config file. Implicitly, this also fixes quoting of the python executable name (which is quoted in test/CMakeLists.txt). Differential Revision: https://reviews.llvm.org/D115398 --- libcxx/test/configs/apple-libc++-shared.cfg.in | 4 +--- libcxx/test/configs/cmake-bridge.cfg.in | 1 + libcxx/test/configs/ibm-libc++-shared.cfg.in | 4 +--- libcxx/test/configs/llvm-libc++-shared-gcc.cfg.in | 4 +--- libcxx/test/configs/llvm-libc++-shared.cfg.in | 4 +--- libcxx/test/configs/llvm-libc++-static.cfg.in | 4 +--- libcxxabi/test/configs/apple-libc++abi-shared.cfg.in | 4 +--- libcxxabi/test/configs/cmake-bridge.cfg.in | 1 + libcxxabi/test/configs/ibm-libc++abi-shared.cfg.in | 4 +--- 9 files changed, 9 insertions(+), 21 deletions(-) diff --git a/libcxx/test/configs/apple-libc++-shared.cfg.in b/libcxx/test/configs/apple-libc++-shared.cfg.in index f47ffa88354f..c48c19134a2b 100644 --- a/libcxx/test/configs/apple-libc++-shared.cfg.in +++ b/libcxx/test/configs/apple-libc++-shared.cfg.in @@ -7,8 +7,6 @@ # We also don't use a per-target include directory layout, so we have only one # include directory for the libc++ headers. -import sys - lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') config.substitutions.append(('%{flags}', @@ -21,7 +19,7 @@ config.substitutions.append(('%{link_flags}', '-nostdlib++ -L %{install}/lib -lc++' )) config.substitutions.append(('%{exec}', - '{} %{{libcxx}}/utils/run.py --execdir %T --env DYLD_LIBRARY_PATH=%{{install}}/lib -- '.format(sys.executable) + '%{executor} --execdir %T --env DYLD_LIBRARY_PATH=%{install}/lib -- ' )) import os, site diff --git a/libcxx/test/configs/cmake-bridge.cfg.in b/libcxx/test/configs/cmake-bridge.cfg.in index e60845e0beb3..9f031bf2148e 100644 --- a/libcxx/test/configs/cmake-bridge.cfg.in +++ b/libcxx/test/configs/cmake-bridge.cfg.in @@ -29,3 +29,4 @@ config.substitutions.append(('%{install}', '@CMAKE_BINARY_DIR@')) config.substitutions.append(('%{include}', '%{install}/@LIBCXX_INSTALL_INCLUDE_DIR@')) config.substitutions.append(('%{target-include}', '%{install}/@LIBCXX_INSTALL_INCLUDE_TARGET_DIR@')) config.substitutions.append(('%{lib}', '%{install}/@LIBCXX_INSTALL_LIBRARY_DIR@')) +config.substitutions.append(('%{executor}', '@LIBCXX_EXECUTOR@')) diff --git a/libcxx/test/configs/ibm-libc++-shared.cfg.in b/libcxx/test/configs/ibm-libc++-shared.cfg.in index 4efdf423207e..c0675b83726a 100644 --- a/libcxx/test/configs/ibm-libc++-shared.cfg.in +++ b/libcxx/test/configs/ibm-libc++-shared.cfg.in @@ -2,8 +2,6 @@ # AIX using a shared library. # -import sys - lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') config.substitutions.append(('%{flags}', '')) @@ -14,7 +12,7 @@ config.substitutions.append(('%{link_flags}', '-nostdlib++ -L %{install}/lib -lc++ -lc++abi -latomic -Wl,-bbigtoc' )) config.substitutions.append(('%{exec}', - '{} %{{libcxx}}/utils/run.py --execdir %T --env LIBPATH=%{{install}}/lib -- '.format(sys.executable) + '%{executor} --execdir %T --env LIBPATH=%{install}/lib -- ' )) # LIBCXX-AIX-FIXME is the feature name used to XFAIL the diff --git a/libcxx/test/configs/llvm-libc++-shared-gcc.cfg.in b/libcxx/test/configs/llvm-libc++-shared-gcc.cfg.in index d6fcaea2da89..246e84837a45 100644 --- a/libcxx/test/configs/llvm-libc++-shared-gcc.cfg.in +++ b/libcxx/test/configs/llvm-libc++-shared-gcc.cfg.in @@ -2,8 +2,6 @@ # using a shared library, with GCC. This is done differently from Clang because # GCC does not support the -nostdlib++ command-line flag. -import sys - lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') config.substitutions.append(('%{flags}', '')) @@ -14,7 +12,7 @@ config.substitutions.append(('%{link_flags}', '-L %{lib} -Wl,-rpath,%{lib} -nodefaultlibs -lc++ -lm -lgcc_s -lgcc -lpthread -lc -lgcc_s -lgcc -latomic' )) config.substitutions.append(('%{exec}', - '{} %{{libcxx}}/utils/run.py --execdir %T -- '.format(sys.executable) + '%{executor} --execdir %T -- ' )) import os, site diff --git a/libcxx/test/configs/llvm-libc++-shared.cfg.in b/libcxx/test/configs/llvm-libc++-shared.cfg.in index 37e5c7b6709a..4d15c970bd29 100644 --- a/libcxx/test/configs/llvm-libc++-shared.cfg.in +++ b/libcxx/test/configs/llvm-libc++-shared.cfg.in @@ -1,8 +1,6 @@ # This testing configuration handles running the test suite against LLVM's libc++ # using a shared library. -import sys - lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') config.substitutions.append(('%{flags}', @@ -15,7 +13,7 @@ config.substitutions.append(('%{link_flags}', '-nostdlib++ -L %{lib} -Wl,-rpath,%{lib} -lc++ -pthread' )) config.substitutions.append(('%{exec}', - '{} %{{libcxx}}/utils/run.py --execdir %T -- '.format(sys.executable) + '%{executor} --execdir %T -- ' )) import os, site diff --git a/libcxx/test/configs/llvm-libc++-static.cfg.in b/libcxx/test/configs/llvm-libc++-static.cfg.in index 57229ddf9023..eaa711a82908 100644 --- a/libcxx/test/configs/llvm-libc++-static.cfg.in +++ b/libcxx/test/configs/llvm-libc++-static.cfg.in @@ -1,8 +1,6 @@ # This testing configuration handles running the test suite against LLVM's libc++ # using a static library. -import sys - lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') config.substitutions.append(('%{flags}', @@ -15,7 +13,7 @@ config.substitutions.append(('%{link_flags}', '-nostdlib++ -L %{lib} -lc++ -lc++abi -pthread' )) config.substitutions.append(('%{exec}', - '{} %{{libcxx}}/utils/run.py --execdir %T -- '.format(sys.executable) + '%{executor} --execdir %T -- ' )) import os, site diff --git a/libcxxabi/test/configs/apple-libc++abi-shared.cfg.in b/libcxxabi/test/configs/apple-libc++abi-shared.cfg.in index 4165d7a9c249..4d0df23e0d67 100644 --- a/libcxxabi/test/configs/apple-libc++abi-shared.cfg.in +++ b/libcxxabi/test/configs/apple-libc++abi-shared.cfg.in @@ -1,7 +1,5 @@ # Testing configuration for Apple's system libc++abi. -import sys - lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') config.substitutions.append(('%{flags}', @@ -15,7 +13,7 @@ config.substitutions.append(('%{link_flags}', '-nostdlib++ -L %{install}/lib -lc++ -lc++abi' )) config.substitutions.append(('%{exec}', - '{} %{{libcxx}}/utils/run.py --execdir %T --env DYLD_LIBRARY_PATH=%{{install}}/lib -- '.format(sys.executable) + '%{executor} --execdir %T --env DYLD_LIBRARY_PATH=%{install}/lib -- ' )) import os, site diff --git a/libcxxabi/test/configs/cmake-bridge.cfg.in b/libcxxabi/test/configs/cmake-bridge.cfg.in index 469872c7060b..f7ede5f16acf 100644 --- a/libcxxabi/test/configs/cmake-bridge.cfg.in +++ b/libcxxabi/test/configs/cmake-bridge.cfg.in @@ -29,3 +29,4 @@ config.host_triple = '@LLVM_HOST_TRIPLE@' config.substitutions.append(('%{cxx}', '@CMAKE_CXX_COMPILER@')) config.substitutions.append(('%{libcxx}', '@LIBCXXABI_LIBCXX_PATH@')) config.substitutions.append(('%{install}', '@CMAKE_BINARY_DIR@')) +config.substitutions.append(('%{executor}', '@LIBCXXABI_EXECUTOR@')) diff --git a/libcxxabi/test/configs/ibm-libc++abi-shared.cfg.in b/libcxxabi/test/configs/ibm-libc++abi-shared.cfg.in index feb5e7b3e9dc..06d9aca508a4 100644 --- a/libcxxabi/test/configs/ibm-libc++abi-shared.cfg.in +++ b/libcxxabi/test/configs/ibm-libc++abi-shared.cfg.in @@ -1,7 +1,5 @@ # Testing configuration for libc++abi on AIX. -import sys - lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') config.substitutions.append(('%{flags}','')) @@ -15,7 +13,7 @@ config.substitutions.append(('%{link_flags}', '-nostdlib++ -L %{install}/lib -lc++ -lc++abi -Wl,-bbigtoc' )) config.substitutions.append(('%{exec}', - '{} %{{libcxx}}/utils/run.py --execdir %T --env LIBPATH=%{{install}}/lib -- '.format(sys.executable) + '%{executor} --execdir %T --env LIBPATH=%{install}/lib -- ' )) import os, site From f68e89044a2ee14babd1af8fc406c9bab080992a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 21 Dec 2021 01:19:34 +0200 Subject: [PATCH 010/293] [libcxx] Add LIBCXX_EXTRA_SITE_DEFINES for adding extra defines in __config_site This is similar to the existing setting LIBCXX_ABI_DEFINES, with the difference that this also allows setting other defines than ones that start with "_LIBCPP_ABI_", and allows setting defines to a specific value. This allows avoiding using LIBCXX_TEST_COMPILER_FLAGS in two CI configurations. Differential Revision: https://reviews.llvm.org/D116109 --- libcxx/CMakeLists.txt | 12 ++++++++++++ libcxx/cmake/caches/MinGW.cmake | 6 ++---- libcxx/include/__config_site.in | 1 + libcxx/utils/ci/run-buildbot | 3 +-- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt index 39744bb21559..03a6a0781b5a 100644 --- a/libcxx/CMakeLists.txt +++ b/libcxx/CMakeLists.txt @@ -196,6 +196,7 @@ endif() option(LIBCXX_HIDE_FROM_ABI_PER_TU_BY_DEFAULT "Enable per TU ABI insulation by default. To be used by vendors." OFF) set(LIBCXX_ABI_DEFINES "" CACHE STRING "A semicolon separated list of ABI macros to define in the site config header.") +option(LIBCXX_EXTRA_SITE_DEFINES "Extra defines to add into __config_site") option(LIBCXX_USE_COMPILER_RT "Use compiler-rt instead of libgcc" OFF) set(LIBCXX_LIBCPPABI_VERSION "2" CACHE STRING "Version of libc++abi's ABI to re-export from libc++ when re-exporting is enabled. Note that this is not related to the version of libc++'s ABI itself!") @@ -909,6 +910,17 @@ if (LIBCXX_ABI_DEFINES) config_define(${abi_defines} _LIBCPP_ABI_DEFINES) endif() +if (LIBCXX_EXTRA_SITE_DEFINES) + set(extra_site_defines) + foreach (extra_site_define ${LIBCXX_EXTRA_SITE_DEFINES}) + # Allow defines such as DEFINE=VAL, transformed into "#define DEFINE VAL". + string(REPLACE "=" " " extra_site_define "${extra_site_define}") + list(APPEND extra_site_defines "#define ${extra_site_define}") + endforeach() + string(REPLACE ";" "\n" extra_site_defines "${extra_site_defines}") + config_define(${extra_site_defines} _LIBCPP_EXTRA_SITE_DEFINES) +endif() + # By default libc++ on Windows expects to use a shared library, which requires # the headers to use DLL import/export semantics. However when building a # static library only we modify the headers to disable DLL import/export. diff --git a/libcxx/cmake/caches/MinGW.cmake b/libcxx/cmake/caches/MinGW.cmake index 458eb012ad15..b9ac28006eb8 100644 --- a/libcxx/cmake/caches/MinGW.cmake +++ b/libcxx/cmake/caches/MinGW.cmake @@ -13,8 +13,6 @@ set(LIBUNWIND_USE_COMPILER_RT ON CACHE BOOL "") set(LIBCXX_TARGET_INFO "libcxx.test.target_info.MingwLocalTI" CACHE STRING "") -# Without these flags, 'long double' (which is 80 bit on x86 mingw, but +# Without this flag, 'long double' (which is 80 bit on x86 mingw, but # 64 bit in MSVC) isn't handled correctly in printf. -set(CMAKE_C_FLAGS "-D__USE_MINGW_ANSI_STDIO=1" CACHE STRING "") -set(CMAKE_CXX_FLAGS "-D__USE_MINGW_ANSI_STDIO=1" CACHE STRING "") -set(LIBCXX_TEST_COMPILER_FLAGS "-D__USE_MINGW_ANSI_STDIO=1" CACHE STRING "") +set(LIBCXX_EXTRA_SITE_DEFINES "__USE_MINGW_ANSI_STDIO=1" CACHE STRING "") diff --git a/libcxx/include/__config_site.in b/libcxx/include/__config_site.in index a8b17ad72b73..59ca22981242 100644 --- a/libcxx/include/__config_site.in +++ b/libcxx/include/__config_site.in @@ -35,5 +35,6 @@ #cmakedefine _LIBCPP_HAS_NO_INCOMPLETE_RANGES @_LIBCPP_ABI_DEFINES@ +@_LIBCPP_EXTRA_SITE_DEFINES@ #endif // _LIBCPP_CONFIG_SITE diff --git a/libcxx/utils/ci/run-buildbot b/libcxx/utils/ci/run-buildbot index 5c5b1f964859..d1bff8e592aa 100755 --- a/libcxx/utils/ci/run-buildbot +++ b/libcxx/utils/ci/run-buildbot @@ -108,8 +108,7 @@ function generate-cmake-libcxx-win() { -DCMAKE_C_COMPILER=clang-cl \ -DCMAKE_CXX_COMPILER=clang-cl \ -DLIBCXX_ENABLE_FILESYSTEM=YES \ - -DCMAKE_CXX_FLAGS="-D_LIBCPP_HAS_NO_INT128" \ - -DLIBCXX_TEST_COMPILER_FLAGS="-D_LIBCPP_HAS_NO_INT128" \ + -DLIBCXX_EXTRA_SITE_DEFINES="_LIBCPP_HAS_NO_INT128" \ "${@}" } From d67b25e7f6d9eb320382933ff650de97161f801e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Tue, 21 Dec 2021 13:14:28 +0000 Subject: [PATCH 011/293] [libcxx] [test] Extend test for bash for executor-has-no-bash If %{exec} sets "--env PATH=single-dir", the directory containing bash and related shell utils is omitted from the path, which means that most shell scripts would fail. (Setting PATH is needed for DLL builds on Windows; PATH fills the same role as e.g. LD_LIBRARY_PATH on Linux.) This condition is missed in the current test, because the executor run.py first resolves the executable to run using the original path, then invokes that executable with an environment with a restricted path. Thus the executor is able to run bash, but that bash is then unable to run further shell commands (other than bash builtins). Extend the test from "bash --version" to "bash -c 'bash --version'". This correctly identifies the executor-has-no-bash condition in the current Windows CI configs, allowing removing 6 cases of LIBCXX-WINDOWS-FIXME. Another longterm fix would be to extend run.py with an option like "--env-prepend PATH=dir", to allow keeping the current path while adding a directory to it. Differential Revision: https://reviews.llvm.org/D116117 --- .../iostream.objects/narrow.stream.objects/cerr.sh.cpp | 2 -- .../iostream.objects/narrow.stream.objects/clog.sh.cpp | 2 -- .../iostream.objects/narrow.stream.objects/cout.sh.cpp | 2 -- .../iostream.objects/wide.stream.objects/wcerr.sh.cpp | 1 - .../iostream.objects/wide.stream.objects/wclog.sh.cpp | 1 - .../iostream.objects/wide.stream.objects/wcout.sh.cpp | 1 - libcxx/utils/libcxx/test/features.py | 8 +++++++- 7 files changed, 7 insertions(+), 10 deletions(-) diff --git a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cerr.sh.cpp b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cerr.sh.cpp index 36380f64a9fe..d86d4f1590c0 100644 --- a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cerr.sh.cpp +++ b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cerr.sh.cpp @@ -10,8 +10,6 @@ // istream cerr; -// XFAIL: LIBCXX-WINDOWS-FIXME - // UNSUPPORTED: executor-has-no-bash // FILE_DEPENDENCIES: ../check-stderr.sh // RUN: %{build} diff --git a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/clog.sh.cpp b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/clog.sh.cpp index b740300e8d32..f1f52d971eec 100644 --- a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/clog.sh.cpp +++ b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/clog.sh.cpp @@ -10,8 +10,6 @@ // istream clog; -// XFAIL: LIBCXX-WINDOWS-FIXME - // UNSUPPORTED: executor-has-no-bash // FILE_DEPENDENCIES: ../check-stderr.sh // RUN: %{build} diff --git a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cout.sh.cpp b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cout.sh.cpp index c606312fa054..e5efa3070e88 100644 --- a/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cout.sh.cpp +++ b/libcxx/test/std/input.output/iostream.objects/narrow.stream.objects/cout.sh.cpp @@ -10,8 +10,6 @@ // istream cout; -// XFAIL: LIBCXX-WINDOWS-FIXME - // UNSUPPORTED: executor-has-no-bash // FILE_DEPENDENCIES: ../check-stdout.sh // RUN: %{build} diff --git a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcerr.sh.cpp b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcerr.sh.cpp index 617b2d45c066..65c9cd52f58f 100644 --- a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcerr.sh.cpp +++ b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcerr.sh.cpp @@ -11,7 +11,6 @@ // istream wcerr; // XFAIL: libcpp-has-no-wide-characters -// XFAIL: LIBCXX-WINDOWS-FIXME // UNSUPPORTED: executor-has-no-bash // FILE_DEPENDENCIES: ../check-stderr.sh diff --git a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wclog.sh.cpp b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wclog.sh.cpp index e0ce4646ea9d..8ce29180b0fd 100644 --- a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wclog.sh.cpp +++ b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wclog.sh.cpp @@ -11,7 +11,6 @@ // istream wclog; // XFAIL: libcpp-has-no-wide-characters -// XFAIL: LIBCXX-WINDOWS-FIXME // UNSUPPORTED: executor-has-no-bash // FILE_DEPENDENCIES: ../check-stderr.sh diff --git a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcout.sh.cpp b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcout.sh.cpp index 6089432b0f56..a58feb400735 100644 --- a/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcout.sh.cpp +++ b/libcxx/test/std/input.output/iostream.objects/wide.stream.objects/wcout.sh.cpp @@ -11,7 +11,6 @@ // istream wcout; // XFAIL: libcpp-has-no-wide-characters -// XFAIL: LIBCXX-WINDOWS-FIXME // UNSUPPORTED: executor-has-no-bash // FILE_DEPENDENCIES: ../check-stdout.sh diff --git a/libcxx/utils/libcxx/test/features.py b/libcxx/utils/libcxx/test/features.py index e4755fec2316..429e2d82c97b 100644 --- a/libcxx/utils/libcxx/test/features.py +++ b/libcxx/utils/libcxx/test/features.py @@ -75,8 +75,14 @@ # Whether Bash can run on the executor. # This is not always the case, for example when running on embedded systems. + # + # For the corner case of bash existing, but it being missing in the path + # set in %{exec} as "--env PATH=one-single-dir", the executor does find + # and executes bash, but bash then can't find any other common shell + # utilities. Test executing "bash -c 'bash --version'" to see if bash + # manages to find binaries to execute. Feature(name='executor-has-no-bash', - when=lambda cfg: runScriptExitCode(cfg, ['%{exec} bash --version']) != 0), + when=lambda cfg: runScriptExitCode(cfg, ['%{exec} bash -c \'bash --version\'']) != 0), Feature(name='apple-clang', when=_isAppleClang), Feature(name=lambda cfg: 'apple-clang-{__clang_major__}'.format(**compilerMacros(cfg)), when=_isAppleClang), From 0e9393f5221cefe76a737b53c724e484a764dba5 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 21 Dec 2021 18:59:37 -0500 Subject: [PATCH 012/293] [gn build] (manually) port f68e89044a2ee --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 85a9353958b9..b356af422f78 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -18,6 +18,7 @@ if (current_toolchain == default_toolchain) { values = [ "_LIBCPP_ABI_FORCE_ITANIUM=", "_LIBCPP_ABI_FORCE_MICROSOFT=", + "_LIBCPP_EXTRA_SITE_DEFINES=", "_LIBCPP_HIDE_FROM_ABI_PER_TU_BY_DEFAULT=", "_LIBCPP_HAS_NO_FILESYSTEM_LIBRARY=", "_LIBCPP_HAS_NO_INCOMPLETE_FORMAT=", From 5bb5142e80c9c6eb1a948d6d2ff4834e4e69741f Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Tue, 21 Dec 2021 18:36:54 -0500 Subject: [PATCH 013/293] Revert [CodeView] Emit S_OBJNAME record Also revert all subsequent fixes: - abd1cbf5e543f0f114d2742e109ead7d7ddbf9c4 [Clang] Disable debug-info-objname.cpp test on Unix until I sort out the issue. - 00ec441253048f5e30540ea26bb0a28c42a5fc18 [Clang] debug-info-objname.cpp test: explictly encode a x86 target when using %clang_cl to avoid falling back to a native CPU triple. - cd407f6e52b09cce2bef24c74b7f36fedc94991b [Clang] Fix build by restricting debug-info-objname.cpp test to x86. --- clang/include/clang/Basic/CodeGenOptions.h | 3 - clang/include/clang/Driver/Job.h | 4 - clang/include/clang/Driver/Options.td | 5 -- clang/lib/CodeGen/BackendUtil.cpp | 4 +- clang/lib/Driver/ToolChains/Clang.cpp | 80 +++---------------- clang/test/CMakeLists.txt | 1 - clang/test/CodeGenCXX/debug-info-objname.cpp | 41 ---------- llvm/include/llvm/MC/MCTargetOptions.h | 1 - llvm/include/llvm/Support/Caching.h | 5 +- llvm/include/llvm/Support/ToolOutputFile.h | 5 +- llvm/include/llvm/Target/TargetOptions.h | 5 -- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp | 24 ------ llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h | 2 - llvm/lib/LTO/LTOBackend.cpp | 2 - llvm/lib/Support/Caching.cpp | 13 +-- llvm/test/DebugInfo/COFF/globals.ll | 9 +-- llvm/test/DebugInfo/COFF/multifunction.ll | 24 +++--- llvm/test/DebugInfo/COFF/pr28747.ll | 6 +- llvm/test/DebugInfo/COFF/simple.ll | 30 +++---- llvm/test/DebugInfo/COFF/vframe-fpo.ll | 4 +- llvm/test/MC/AArch64/coff-debug.ll | 10 +-- llvm/test/MC/ARM/coff-debugging-secrel.ll | 8 +- llvm/test/MC/COFF/cv-compiler-info.ll | 32 +++----- llvm/tools/llc/llc.cpp | 3 - llvm/tools/llvm-lto2/llvm-lto2.cpp | 2 +- 25 files changed, 68 insertions(+), 255 deletions(-) delete mode 100644 clang/test/CodeGenCXX/debug-info-objname.cpp diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index 33ec03a17136..960aa419b490 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -227,9 +227,6 @@ class CodeGenOptions : public CodeGenOptionsBase { /// Output filename for the split debug info, not used in the skeleton CU. std::string SplitDwarfOutput; - /// Output filename used in the COFF debug information. - std::string ObjectFilenameForDebug; - /// The name of the relocation model to use. llvm::Reloc::Model RelocationModel; diff --git a/clang/include/clang/Driver/Job.h b/clang/include/clang/Driver/Job.h index 6e3b51f2a799..8b287638a271 100644 --- a/clang/include/clang/Driver/Job.h +++ b/clang/include/clang/Driver/Job.h @@ -204,10 +204,6 @@ class Command { /// from the parent process will be used. virtual void setEnvironment(llvm::ArrayRef NewEnvironment); - void replaceArguments(llvm::opt::ArgStringList List) { - Arguments = std::move(List); - } - const char *getExecutable() const { return Executable; } const llvm::opt::ArgStringList &getArguments() const { return Arguments; } diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 08e9e1a3432a..3b47512501d3 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3802,11 +3802,6 @@ def o : JoinedOrSeparate<["-"], "o">, Flags<[NoXarchOption, RenderAsInput, CC1Option, CC1AsOption, FC1Option, FlangOption]>, HelpText<"Write output to ">, MetaVarName<"">, MarshallingInfoString>; -def object_file_name_EQ : Joined<["-"], "object-file-name=">, Flags<[CC1Option, CC1AsOption, CoreOption]>, - HelpText<"Set the output for debug infos">, MetaVarName<"">, - MarshallingInfoString>; -def object_file_name : Separate<["-"], "object-file-name">, Flags<[CC1Option, CC1AsOption, CoreOption]>, - Alias; def pagezero__size : JoinedOrSeparate<["-"], "pagezero_size">; def pass_exit_codes : Flag<["-", "--"], "pass-exit-codes">, Flags<[Unsupported]>; def pedantic_errors : Flag<["-", "--"], "pedantic-errors">, Group, Flags<[CC1Option]>, diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 5e16d3525b38..3195615ae561 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -646,7 +646,6 @@ static bool initTargetOptions(DiagnosticsEngine &Diags, Options.MCOptions.Argv0 = CodeGenOpts.Argv0; Options.MCOptions.CommandLineArgs = CodeGenOpts.CommandLineArgs; Options.DebugStrictDwarf = CodeGenOpts.DebugStrictDwarf; - Options.ObjectFilenameForDebug = CodeGenOpts.ObjectFilenameForDebug; return true; } @@ -1584,8 +1583,7 @@ static void runThinLTOBackend( return; auto AddStream = [&](size_t Task) { - return std::make_unique(std::move(OS), - CGOpts.ObjectFilenameForDebug); + return std::make_unique(std::move(OS)); }; lto::Config Conf; if (CGOpts.SaveTempsFilePrefix != "") { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 2a3723975568..d76f810f1a7f 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -625,9 +625,8 @@ getFramePointerKind(const ArgList &Args, const llvm::Triple &Triple) { } /// Add a CC1 option to specify the debug compilation directory. -static const char *addDebugCompDirArg(const ArgList &Args, - ArgStringList &CmdArgs, - const llvm::vfs::FileSystem &VFS) { +static void addDebugCompDirArg(const ArgList &Args, ArgStringList &CmdArgs, + const llvm::vfs::FileSystem &VFS) { if (Arg *A = Args.getLastArg(options::OPT_ffile_compilation_dir_EQ, options::OPT_fdebug_compilation_dir_EQ)) { if (A->getOption().matches(options::OPT_ffile_compilation_dir_EQ)) @@ -639,31 +638,6 @@ static const char *addDebugCompDirArg(const ArgList &Args, VFS.getCurrentWorkingDirectory()) { CmdArgs.push_back(Args.MakeArgString("-fdebug-compilation-dir=" + *CWD)); } - StringRef Path(CmdArgs.back()); - return Path.substr(Path.find('=') + 1).data(); -} - -static void addDebugObjectName(const ArgList &Args, ArgStringList &CmdArgs, - const char *DebugCompilationDir, - const char *OutputFileName) { - // No need to generate a value for -object-file-name if it was provided. - for (auto *Arg : Args.filtered(options::OPT_Xclang)) - if (StringRef(Arg->getValue()).startswith("-object-file-name")) - return; - - if (Args.hasArg(options::OPT_object_file_name_EQ)) - return; - - SmallString<128> ObjFileNameForDebug(OutputFileName); - if (ObjFileNameForDebug != "-" && - !llvm::sys::path::is_absolute(ObjFileNameForDebug) && - (!DebugCompilationDir || - llvm::sys::path::is_absolute(DebugCompilationDir))) { - // Make the path absolute in the debug infos like MSVC does. - llvm::sys::fs::make_absolute(ObjFileNameForDebug); - } - CmdArgs.push_back( - Args.MakeArgString(Twine("-object-file-name=") + ObjFileNameForDebug)); } /// Add a CC1 and CC1AS option to specify the debug file path prefix map. @@ -5675,8 +5649,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fno-autolink"); // Add in -fdebug-compilation-dir if necessary. - const char *DebugCompilationDir = - addDebugCompDirArg(Args, CmdArgs, D.getVFS()); + addDebugCompDirArg(Args, CmdArgs, D.getVFS()); addDebugPrefixMapArg(D, Args, CmdArgs); @@ -7048,11 +7021,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString(Str)); } - // Add the output path to the object file for CodeView debug infos. - if (EmitCodeView && Output.isFilename()) - addDebugObjectName(Args, CmdArgs, DebugCompilationDir, - Output.getFilename()); - // Add the "-o out -x type src.c" flags last. This is done primarily to make // the -cc1 command easier to edit when reproducing compiler crashes. if (Output.getType() == types::TY_Dependencies) { @@ -7670,14 +7638,11 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, Args.AddAllArgs(CmdArgs, options::OPT_I_Group); // Determine the original source input. - auto FindSource = [](const Action *S) -> const Action * { - while (S->getKind() != Action::InputClass) { - assert(!S->getInputs().empty() && "unexpected root action!"); - S = S->getInputs()[0]; - } - return S; - }; - const Action *SourceAction = FindSource(&JA); + const Action *SourceAction = &JA; + while (SourceAction->getKind() != Action::InputClass) { + assert(!SourceAction->getInputs().empty() && "unexpected root action!"); + SourceAction = SourceAction->getInputs()[0]; + } // Forward -g and handle debug info related flags, assuming we are dealing // with an actual assembly file. @@ -7696,10 +7661,6 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo; - // Add the -fdebug-compilation-dir flag if needed. - const char *DebugCompilationDir = - addDebugCompDirArg(Args, CmdArgs, C.getDriver().getVFS()); - if (SourceAction->getType() == types::TY_Asm || SourceAction->getType() == types::TY_PP_Asm) { // You might think that it would be ok to set DebugInfoKind outside of @@ -7708,6 +7669,8 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, // and it's not clear whether that test is just overly restrictive. DebugInfoKind = (WantDebug ? codegenoptions::DebugInfoConstructor : codegenoptions::NoDebugInfo); + // Add the -fdebug-compilation-dir flag if needed. + addDebugCompDirArg(Args, CmdArgs, C.getDriver().getVFS()); addDebugPrefixMapArg(getToolChain().getDriver(), Args, CmdArgs); @@ -7818,29 +7781,6 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, Args.AddAllArgs(CmdArgs, options::OPT_mllvm); - if (DebugInfoKind > codegenoptions::NoDebugInfo && Output.isFilename()) - addDebugObjectName(Args, CmdArgs, DebugCompilationDir, - Output.getFilename()); - - // Fixup any previous commands that use -object-file-name because when we - // generated them, the final .obj name wasn't yet known. - for (Command &J : C.getJobs()) { - if (SourceAction != FindSource(&J.getSource())) - continue; - auto &JArgs = J.getArguments(); - for (unsigned I = 0; I < JArgs.size(); ++I) { - if (StringRef(JArgs[I]).startswith("-object-file-name=") && - Output.isFilename()) { - ArgStringList NewArgs(JArgs.begin(), JArgs.begin() + I); - addDebugObjectName(Args, NewArgs, DebugCompilationDir, - Output.getFilename()); - NewArgs.append(JArgs.begin() + I + 1, JArgs.end()); - J.replaceArguments(NewArgs); - break; - } - } - } - assert(Output.isFilename() && "Unexpected lipo output."); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt index 07de12b7c190..32405fad820a 100644 --- a/clang/test/CMakeLists.txt +++ b/clang/test/CMakeLists.txt @@ -126,7 +126,6 @@ if( NOT CLANG_BUILT_STANDALONE ) llvm-nm llvm-objcopy llvm-objdump - llvm-pdbutil llvm-profdata llvm-rc llvm-readelf diff --git a/clang/test/CodeGenCXX/debug-info-objname.cpp b/clang/test/CodeGenCXX/debug-info-objname.cpp deleted file mode 100644 index d80d805b8b41..000000000000 --- a/clang/test/CodeGenCXX/debug-info-objname.cpp +++ /dev/null @@ -1,41 +0,0 @@ -// REQUIRES: x86-registered-target -// RUN: cp %s %T/debug-info-objname.cpp -// RUN: cd %T - -// No output file provided, input file is relative, we emit an absolute path (MSVC behavior). -// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc debug-info-objname.cpp -// RUN: llvm-pdbutil dump -all debug-info-objname.obj | FileCheck %s --check-prefix=ABSOLUTE - -// No output file provided, input file is absolute, we emit an absolute path (MSVC behavior). -// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc -- %T/debug-info-objname.cpp -// RUN: llvm-pdbutil dump -all debug-info-objname.obj | FileCheck %s --check-prefix=ABSOLUTE - -// The output file is provided as an absolute path, we emit an absolute path. -// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc /Fo%T/debug-info-objname.obj -- %T/debug-info-objname.cpp -// RUN: llvm-pdbutil dump -all debug-info-objname.obj | FileCheck %s --check-prefix=ABSOLUTE - -// The output file is provided as relative path, -working-dir is provided, we emit an absolute path. -// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc -working-dir=%T debug-info-objname.cpp -// RUN: llvm-pdbutil dump -all debug-info-objname.obj | FileCheck %s --check-prefix=ABSOLUTE - -// The input file name is relative and we specify -fdebug-compilation-dir, we emit a relative path. -// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc -fdebug-compilation-dir=. debug-info-objname.cpp -// RUN: llvm-pdbutil dump -all debug-info-objname.obj | FileCheck %s --check-prefix=RELATIVE - -// Ensure /FA emits an .asm file which contains the path to the final .obj, not the .asm -// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc -fdebug-compilation-dir=. /FA debug-info-objname.cpp -// RUN: cat debug-info-objname.asm | FileCheck %s --check-prefix=ASM - -// Same thing for -save-temps -// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc -fdebug-compilation-dir=. /clang:-save-temps debug-info-objname.cpp -// RUN: cat debug-info-objname.asm | FileCheck %s --check-prefix=ASM - -int main() { - return 1; -} - -// ABSOLUTE: S_OBJNAME [size = {{[0-9]+}}] sig=0, `{{.+}}debug-info-objname.obj` -// RELATIVE: S_OBJNAME [size = {{[0-9]+}}] sig=0, `debug-info-objname.obj` -// ASM: Record kind: S_OBJNAME -// ASM-NEXT: .long 0 -// ASM-NEXT: .asciz "debug-info-objname.obj" diff --git a/llvm/include/llvm/MC/MCTargetOptions.h b/llvm/include/llvm/MC/MCTargetOptions.h index 3510eeca8953..db50dc6749e2 100644 --- a/llvm/include/llvm/MC/MCTargetOptions.h +++ b/llvm/include/llvm/MC/MCTargetOptions.h @@ -62,7 +62,6 @@ class MCTargetOptions { std::string ABIName; std::string AssemblyLanguage; std::string SplitDwarfFile; - std::string COFFOutputFilename; const char *Argv0 = nullptr; ArrayRef CommandLineArgs; diff --git a/llvm/include/llvm/Support/Caching.h b/llvm/include/llvm/Support/Caching.h index 5c30a822ef38..fbf5f3b5687a 100644 --- a/llvm/include/llvm/Support/Caching.h +++ b/llvm/include/llvm/Support/Caching.h @@ -27,11 +27,8 @@ class MemoryBuffer; /// that can be done by deriving from this class and overriding the destructor. class CachedFileStream { public: - CachedFileStream(std::unique_ptr OS, - std::string OSPath = "") - : OS(std::move(OS)), ObjectPathName(OSPath) {} + CachedFileStream(std::unique_ptr OS) : OS(std::move(OS)) {} std::unique_ptr OS; - std::string ObjectPathName; virtual ~CachedFileStream() = default; }; diff --git a/llvm/include/llvm/Support/ToolOutputFile.h b/llvm/include/llvm/Support/ToolOutputFile.h index 6b7222550b9f..ec1d6ae52268 100644 --- a/llvm/include/llvm/Support/ToolOutputFile.h +++ b/llvm/include/llvm/Support/ToolOutputFile.h @@ -29,10 +29,9 @@ class ToolOutputFile { /// raw_fd_ostream is destructed. It installs cleanups in its constructor and /// uninstalls them in its destructor. class CleanupInstaller { - public: /// The name of the file. std::string Filename; - + public: /// The flag which indicates whether we should not delete the file. bool Keep; @@ -65,8 +64,6 @@ class ToolOutputFile { /// Indicate that the tool's job wrt this output file has been successful and /// the file should not be deleted. void keep() { Installer.Keep = true; } - - const std::string &outputFilename() { return Installer.Filename; } }; } // end llvm namespace diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index c639f326abc9..912f6d1c153a 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -418,11 +418,6 @@ namespace llvm { /// Machine level options. MCTargetOptions MCOptions; - - /// Stores the filename/path of the final .o/.obj file, to be written in the - /// debug information. This is used for emitting the CodeView S_OBJNAME - /// record. - std::string ObjectFilenameForDebug; }; } // End llvm namespace diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index d621108408f0..928252a3d041 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -649,7 +649,6 @@ void CodeViewDebug::endModule() { switchToDebugSectionForSymbol(nullptr); MCSymbol *CompilerInfo = beginCVSubsection(DebugSubsectionKind::Symbols); - emitObjName(); emitCompilerInformation(); endCVSubsection(CompilerInfo); @@ -785,29 +784,6 @@ void CodeViewDebug::emitTypeGlobalHashes() { } } -void CodeViewDebug::emitObjName() { - MCSymbol *CompilerEnd = beginSymbolRecord(SymbolKind::S_OBJNAME); - - StringRef PathRef(Asm->TM.Options.ObjectFilenameForDebug); - llvm::SmallString<256> PathStore(PathRef); - - if (PathRef.empty() || PathRef == "-") { - // Don't emit the filename if we're writing to stdout or to /dev/null. - PathRef = {}; - } else { - llvm::sys::path::remove_dots(PathStore, /*remove_dot_dot=*/true); - PathRef = PathStore; - } - - OS.AddComment("Signature"); - OS.emitIntValue(0, 4); - - OS.AddComment("Object name"); - emitNullTerminatedSymbolName(OS, PathRef); - - endSymbolRecord(CompilerEnd); -} - namespace { struct Version { int Part[4]; diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h index d1fc3cdccb20..6f88e15ee8fe 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -302,8 +302,6 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { void emitTypeGlobalHashes(); - void emitObjName(); - void emitCompilerInformation(); void emitBuildInfo(); diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index 855d0fc8a8be..15e20fec565f 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -412,8 +412,6 @@ static void codegen(const Config &Conf, TargetMachine *TM, if (Error Err = StreamOrErr.takeError()) report_fatal_error(std::move(Err)); std::unique_ptr &Stream = *StreamOrErr; - TM->Options.ObjectFilenameForDebug = Stream->ObjectPathName; - legacy::PassManager CodeGenPasses; CodeGenPasses.add( createImmutableModuleSummaryIndexWrapperPass(&CombinedIndex)); diff --git a/llvm/lib/Support/Caching.cpp b/llvm/lib/Support/Caching.cpp index 8c685640f791..635647d72468 100644 --- a/llvm/lib/Support/Caching.cpp +++ b/llvm/lib/Support/Caching.cpp @@ -79,13 +79,14 @@ Expected llvm::localCache(Twine CacheNameRef, struct CacheStream : CachedFileStream { AddBufferFn AddBuffer; sys::fs::TempFile TempFile; + std::string EntryPath; unsigned Task; CacheStream(std::unique_ptr OS, AddBufferFn AddBuffer, sys::fs::TempFile TempFile, std::string EntryPath, unsigned Task) - : CachedFileStream(std::move(OS), std::move(EntryPath)), - AddBuffer(std::move(AddBuffer)), TempFile(std::move(TempFile)), + : CachedFileStream(std::move(OS)), AddBuffer(std::move(AddBuffer)), + TempFile(std::move(TempFile)), EntryPath(std::move(EntryPath)), Task(Task) {} ~CacheStream() { @@ -98,7 +99,7 @@ Expected llvm::localCache(Twine CacheNameRef, // Open the file first to avoid racing with a cache pruner. ErrorOr> MBOrErr = MemoryBuffer::getOpenFile( - sys::fs::convertFDToNativeFile(TempFile.FD), ObjectPathName, + sys::fs::convertFDToNativeFile(TempFile.FD), EntryPath, /*FileSize=*/-1, /*RequiresNullTerminator=*/false); if (!MBOrErr) report_fatal_error(Twine("Failed to open new cache file ") + @@ -114,14 +115,14 @@ Expected llvm::localCache(Twine CacheNameRef, // AddBuffer a copy of the bytes we wrote in that case. We do this // instead of just using the existing file, because the pruner might // delete the file before we get a chance to use it. - Error E = TempFile.keep(ObjectPathName); + Error E = TempFile.keep(EntryPath); E = handleErrors(std::move(E), [&](const ECError &E) -> Error { std::error_code EC = E.convertToErrorCode(); if (EC != errc::permission_denied) return errorCodeToError(EC); auto MBCopy = MemoryBuffer::getMemBufferCopy((*MBOrErr)->getBuffer(), - ObjectPathName); + EntryPath); MBOrErr = std::move(MBCopy); // FIXME: should we consume the discard error? @@ -132,7 +133,7 @@ Expected llvm::localCache(Twine CacheNameRef, if (E) report_fatal_error(Twine("Failed to rename temporary file ") + - TempFile.TmpName + " to " + ObjectPathName + ": " + + TempFile.TmpName + " to " + EntryPath + ": " + toString(std::move(E)) + "\n"); AddBuffer(Task, std::move(*MBOrErr)); diff --git a/llvm/test/DebugInfo/COFF/globals.ll b/llvm/test/DebugInfo/COFF/globals.ll index 71c18a76f4b5..7960f63b1474 100644 --- a/llvm/test/DebugInfo/COFF/globals.ll +++ b/llvm/test/DebugInfo/COFF/globals.ll @@ -1,9 +1,7 @@ ; RUN: llc < %s | FileCheck %s --check-prefix=ASM ; RUN: llc < %s -filetype=obj | llvm-readobj - --codeview | FileCheck %s --check-prefix=OBJ ; RUN: llc < %s | llvm-mc -filetype=obj --triple=x86_64-windows | llvm-readobj - --codeview | FileCheck %s --check-prefix=OBJ -; RUN: llc < %s -filetype=obj | obj2yaml | FileCheck %s --check-prefixes=YAML,YAML-STDOUT -; RUN: llc < %s -filetype=obj -o %t -; RUN: obj2yaml < %t | FileCheck %s --check-prefixes=YAML,YAML-FILE +; RUN: llc < %s -filetype=obj | obj2yaml | FileCheck %s --check-prefix=YAML ; C++ source to regenerate: ; $ cat a.cpp @@ -248,11 +246,6 @@ ; YAML: Subsections: ; YAML: - !Symbols ; YAML: Records: -; YAML: - Kind: S_OBJNAME -; YAML: ObjNameSym: -; YAML: Signature: 0 -; YAML-STDOUT: ObjectName: '' -; YAML-FILE: ObjectName: '{{.*}}' ; YAML: - Kind: S_COMPILE3 ; YAML: Compile3Sym: diff --git a/llvm/test/DebugInfo/COFF/multifunction.ll b/llvm/test/DebugInfo/COFF/multifunction.ll index fa93b1349756..9f99ba05047e 100644 --- a/llvm/test/DebugInfo/COFF/multifunction.ll +++ b/llvm/test/DebugInfo/COFF/multifunction.ll @@ -498,18 +498,18 @@ ; OBJ64: Characteristics [ (0x42300040) ; OBJ64: ] ; OBJ64: Relocations [ -; OBJ64-NEXT: 0x70 IMAGE_REL_AMD64_SECREL x -; OBJ64-NEXT: 0x74 IMAGE_REL_AMD64_SECTION x -; OBJ64-NEXT: 0xA8 IMAGE_REL_AMD64_SECREL x -; OBJ64-NEXT: 0xAC IMAGE_REL_AMD64_SECTION x -; OBJ64-NEXT: 0x10C IMAGE_REL_AMD64_SECREL y -; OBJ64-NEXT: 0x110 IMAGE_REL_AMD64_SECTION y -; OBJ64-NEXT: 0x144 IMAGE_REL_AMD64_SECREL y -; OBJ64-NEXT: 0x148 IMAGE_REL_AMD64_SECTION y -; OBJ64-NEXT: 0x1A8 IMAGE_REL_AMD64_SECREL f -; OBJ64-NEXT: 0x1AC IMAGE_REL_AMD64_SECTION f -; OBJ64-NEXT: 0x1E0 IMAGE_REL_AMD64_SECREL f -; OBJ64-NEXT: 0x1E4 IMAGE_REL_AMD64_SECTION f +; OBJ64-NEXT: 0x64 IMAGE_REL_AMD64_SECREL x +; OBJ64-NEXT: 0x68 IMAGE_REL_AMD64_SECTION x +; OBJ64-NEXT: 0x9C IMAGE_REL_AMD64_SECREL x +; OBJ64-NEXT: 0xA0 IMAGE_REL_AMD64_SECTION x +; OBJ64-NEXT: 0x100 IMAGE_REL_AMD64_SECREL y +; OBJ64-NEXT: 0x104 IMAGE_REL_AMD64_SECTION y +; OBJ64-NEXT: 0x138 IMAGE_REL_AMD64_SECREL y +; OBJ64-NEXT: 0x13C IMAGE_REL_AMD64_SECTION y +; OBJ64-NEXT: 0x19C IMAGE_REL_AMD64_SECREL f +; OBJ64-NEXT: 0x1A0 IMAGE_REL_AMD64_SECTION f +; OBJ64-NEXT: 0x1D4 IMAGE_REL_AMD64_SECREL f +; OBJ64-NEXT: 0x1D8 IMAGE_REL_AMD64_SECTION f ; OBJ64-NEXT: ] ; OBJ64: Subsection [ ; OBJ64-NEXT: SubSectionType: Symbols (0xF1) diff --git a/llvm/test/DebugInfo/COFF/pr28747.ll b/llvm/test/DebugInfo/COFF/pr28747.ll index e9b3a8eae91b..c20e50e1992e 100644 --- a/llvm/test/DebugInfo/COFF/pr28747.ll +++ b/llvm/test/DebugInfo/COFF/pr28747.ll @@ -5,10 +5,8 @@ ; CHECK-NEXT: .long 241 ; CHECK-NEXT: .long [[SUBSEC_END:.*]]-[[SUBSEC_START:.*]] # Subsection size ; CHECK-NEXT: [[SUBSEC_START]]: -; CHECK-NEXT: .short [[OBJNAME_END:.*]]-[[OBJNAME_START:.*]] # Record length -; CHECK: [[OBJNAME_END]]: -; CHECK-NEXT: .short [[COMPILE3_END:.*]]-[[COMPILE3_START:.*]] # Record length -; CHECK: [[COMPILE3_END]]: +; CHECK-NEXT: .short [[C1_END:.*]]-[[C1_START:.*]] # Record length +; CHECK: [[C1_END]]: ; CHECK-NEXT: [[SUBSEC_END]]: ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: .cv_filechecksums diff --git a/llvm/test/DebugInfo/COFF/simple.ll b/llvm/test/DebugInfo/COFF/simple.ll index 70878ac33ef1..082640182dc2 100644 --- a/llvm/test/DebugInfo/COFF/simple.ll +++ b/llvm/test/DebugInfo/COFF/simple.ll @@ -36,10 +36,8 @@ ; X86-NEXT: .long [[COMPILE_END:.*]]-[[COMPILE_START:.*]] # ; Compiler information record ; X86-NEXT: [[COMPILE_START]]: -; X86-NEXT: .short [[OBJNAME_END:.*]]-[[OBJNAME_START:.*]] # -; X86: [[OBJNAME_END]]: -; X86-NEXT: .short [[COMPILE3_END:.*]]-[[COMPILE3_START:.*]] # -; X86: [[COMPILE3_END]]: +; X86-NEXT: .short [[C1_END:.*]]-[[C1_START:.*]] # +; X86: [[C1_END]]: ; X86-NEXT: [[COMPILE_END]]: ; X86-NEXT: .p2align 2 ; X86-NEXT: .cv_fpo_data _f @@ -90,11 +88,11 @@ ; OBJ32: Characteristics [ (0x42300040) ; OBJ32: ] ; OBJ32: Relocations [ -; OBJ32-NEXT: 0x50 IMAGE_REL_I386_DIR32NB _f -; OBJ32-NEXT: 0x9C IMAGE_REL_I386_SECREL _f -; OBJ32-NEXT: 0xA0 IMAGE_REL_I386_SECTION _f -; OBJ32-NEXT: 0xD4 IMAGE_REL_I386_SECREL _f -; OBJ32-NEXT: 0xD8 IMAGE_REL_I386_SECTION _f +; OBJ32-NEXT: 0x44 IMAGE_REL_I386_DIR32NB _f +; OBJ32-NEXT: 0x90 IMAGE_REL_I386_SECREL _f +; OBJ32-NEXT: 0x94 IMAGE_REL_I386_SECTION _f +; OBJ32-NEXT: 0xC8 IMAGE_REL_I386_SECREL _f +; OBJ32-NEXT: 0xCC IMAGE_REL_I386_SECTION _f ; OBJ32-NEXT: ] ; OBJ32: Subsection [ ; OBJ32-NEXT: SubSectionType: Symbols (0xF1) @@ -167,10 +165,8 @@ ; X64-NEXT: .long [[COMPILE_END:.*]]-[[COMPILE_START:.*]] # ; Compiler information record ; X64-NEXT: [[COMPILE_START]]: -; X64-NEXT: .short [[OBJNAME_END:.*]]-[[OBJNAME_START:.*]] # -; X64: [[OBJNAME_END]]: -; X64-NEXT: .short [[COMPILE3_END:.*]]-[[COMPILE3_START:.*]] # -; X64: [[COMPILE3_END]]: +; X64-NEXT: .short [[C1_END:.*]]-[[C1_START:.*]] # +; X64: [[C1_END]]: ; X64-NEXT: [[COMPILE_END]]: ; X64-NEXT: .p2align 2 ; X64-NEXT: .long 241 # Symbol subsection for f @@ -220,10 +216,10 @@ ; OBJ64: Characteristics [ (0x42300040) ; OBJ64: ] ; OBJ64: Relocations [ -; OBJ64-NEXT: 0x70 IMAGE_REL_AMD64_SECREL f -; OBJ64-NEXT: 0x74 IMAGE_REL_AMD64_SECTION f -; OBJ64-NEXT: 0xA8 IMAGE_REL_AMD64_SECREL f -; OBJ64-NEXT: 0xAC IMAGE_REL_AMD64_SECTION f +; OBJ64-NEXT: 0x64 IMAGE_REL_AMD64_SECREL f +; OBJ64-NEXT: 0x68 IMAGE_REL_AMD64_SECTION f +; OBJ64-NEXT: 0x9C IMAGE_REL_AMD64_SECREL f +; OBJ64-NEXT: 0xA0 IMAGE_REL_AMD64_SECTION f ; OBJ64-NEXT: ] ; OBJ64: Subsection [ ; OBJ64-NEXT: SubSectionType: Symbols (0xF1) diff --git a/llvm/test/DebugInfo/COFF/vframe-fpo.ll b/llvm/test/DebugInfo/COFF/vframe-fpo.ll index 599d9f07f9d5..6b77ac579cb3 100644 --- a/llvm/test/DebugInfo/COFF/vframe-fpo.ll +++ b/llvm/test/DebugInfo/COFF/vframe-fpo.ll @@ -65,9 +65,7 @@ ; CODEVIEW-NEXT: Subsection [ ; CODEVIEW-NEXT: SubSectionType: Symbols (0xF1) ; CODEVIEW-NEXT: SubSectionSize: -; CODEVIEW-NEXT: ObjNameSym { -; CODEVIEW-NEXT: Kind: S_OBJNAME (0x1101) -; CODEVIEW: Compile3Sym { +; CODEVIEW-NEXT: Compile3Sym { ; CODEVIEW-NEXT: Kind: S_COMPILE3 (0x113C) ; CODEVIEW: } ; CODEVIEW: ] diff --git a/llvm/test/MC/AArch64/coff-debug.ll b/llvm/test/MC/AArch64/coff-debug.ll index 6099b3d570b4..bb7cc4db3f93 100644 --- a/llvm/test/MC/AArch64/coff-debug.ll +++ b/llvm/test/MC/AArch64/coff-debug.ll @@ -1,7 +1,5 @@ ; RUN: llc -mtriple=aarch64-windows -filetype=obj -o - %s | \ -; RUN: llvm-readobj --codeview - | FileCheck %s --check-prefixes=CHECK,CHECK-STDOUT -; RUN: llc -mtriple=aarch64-windows -filetype=obj -o %t.o %s -; RUN: llvm-readobj --codeview %t.o | FileCheck %s --check-prefixes=CHECK,CHECK-FILE +; RUN: llvm-readobj --codeview - | FileCheck %s ; ModuleID = 'a.c' source_filename = "a.c" @@ -68,12 +66,6 @@ attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-ma ; CHECK: Magic: 0x4 ; CHECK: Subsection [ ; CHECK: SubSectionType: Symbols (0xF1) -; CHECK: ObjNameSym { -; CHECK: Kind: S_OBJNAME (0x1101) -; CHECK: Signature: 0x0 -; CHECK-STDOUT: ObjectName: {{$}} -; CHECK-FILE: ObjectName: {{.*}}.o -; CHECK: } ; CHECK: Compile3Sym { ; CHECK: Kind: S_COMPILE3 (0x113C) ; CHECK: Language: C (0x0) diff --git a/llvm/test/MC/ARM/coff-debugging-secrel.ll b/llvm/test/MC/ARM/coff-debugging-secrel.ll index 1e00b0547235..dbd251adae19 100644 --- a/llvm/test/MC/ARM/coff-debugging-secrel.ll +++ b/llvm/test/MC/ARM/coff-debugging-secrel.ll @@ -42,10 +42,10 @@ entry: ; CHECK-MSVC: Relocations [ ; CHECK-MSVC: Section {{.*}} .debug$S { -; CHECK-MSVC: 0x70 IMAGE_REL_ARM_SECREL function -; CHECK-MSVC: 0x74 IMAGE_REL_ARM_SECTION function -; CHECK-MSVC: 0xAC IMAGE_REL_ARM_SECREL function -; CHECK-MSVC: 0xB0 IMAGE_REL_ARM_SECTION function +; CHECK-MSVC: 0x64 IMAGE_REL_ARM_SECREL function +; CHECK-MSVC: 0x68 IMAGE_REL_ARM_SECTION function +; CHECK-MSVC: 0xA0 IMAGE_REL_ARM_SECREL function +; CHECK-MSVC: 0xA4 IMAGE_REL_ARM_SECTION function ; CHECK-MSVC: } ; CHECK-MSVC: ] diff --git a/llvm/test/MC/COFF/cv-compiler-info.ll b/llvm/test/MC/COFF/cv-compiler-info.ll index a9deb74bc9b9..d8cfe99736cf 100644 --- a/llvm/test/MC/COFF/cv-compiler-info.ll +++ b/llvm/test/MC/COFF/cv-compiler-info.ll @@ -1,6 +1,4 @@ -; RUN: llc -mtriple i686-pc-windows-msvc < %s | FileCheck %s --check-prefixes=CHECK,STDOUT -; RUN: llc -mtriple i686-pc-windows-msvc < %s -o %t -; RUN: FileCheck %s --input-file=%t --check-prefixes=CHECK,FILE +; RUN: llc -mtriple i686-pc-windows-msvc < %s | FileCheck %s ; ModuleID = 'D:\src\scopes\foo.cpp' source_filename = "D:\5Csrc\5Cscopes\5Cfoo.cpp" target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" @@ -22,23 +20,19 @@ attributes #0 = { nounwind sspstrong "correctly-rounded-divide-sqrt-fp-math"="fa ; One .debug$S section should contain an S_COMPILE3 record that identifies the ; source language and the version of the compiler based on the DICompileUnit. ; CHECK: .section .debug$S,"dr" -; CHECK: .short 4353 # Record kind: S_OBJNAME -; CHECK-NEXT: .long 0 # Signature -; STDOUT-NEXT: .byte 0 # Object name -; FILE-NEXT: .asciz "{{.*}}{{\\\\|/}}cv-compiler-info.ll.tmp" # Object name ; CHECK: .short 4412 # Record kind: S_COMPILE3 -; CHECK-NEXT: .long 1 # Flags and language -; CHECK-NEXT: .short 7 # CPUType -; CHECK-NEXT: .short 4 # Frontend version -; CHECK-NEXT: .short 0 -; CHECK-NEXT: .short 0 -; CHECK-NEXT: .short 0 -; CHECK-NEXT: .short [[BACKEND_VERSION:[0-9]+]] # Backend version -; CHECK-NEXT: .short 0 -; CHECK-NEXT: .short 0 -; CHECK-NEXT: .short 0 -; CHECK-NEXT: .asciz "clang version 4.0.0 " # Null-terminated compiler version string -; CHECK-NOT: .short 4412 # Record kind: S_COMPILE3 +; CHECK: .long 1 # Flags and language +; CHECK: .short 7 # CPUType +; CHECK: .short 4 # Frontend version +; CHECK: .short 0 +; CHECK: .short 0 +; CHECK: .short 0 +; CHECK: .short [[BACKEND_VERSION:[0-9]+]] # Backend version +; CHECK: .short 0 +; CHECK: .short 0 +; CHECK: .short 0 +; CHECK: .asciz "clang version 4.0.0 " # Null-terminated compiler version string +; CHECK-NOT: .short 4412 # Record kind: S_COMPILE3 !1 = !DIFile(filename: "D:\5Csrc\5Cscopes\5Cfoo.cpp", directory: "D:\5Csrc\5Cscopes\5Cclang") !2 = !{} !7 = !{i32 2, !"CodeView", i32 1} diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index c07f4e66486c..9d80f062c8f9 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -605,9 +605,6 @@ static int compileModule(char **argv, LLVMContext &Context) { GetOutputStream(TheTarget->getName(), TheTriple.getOS(), argv[0]); if (!Out) return 1; - // Ensure the filename is passed down to CodeViewDebug. - Target->Options.ObjectFilenameForDebug = Out->outputFilename(); - std::unique_ptr DwoOut; if (!SplitDwarfOutputFile.empty()) { std::error_code EC; diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp index 7416e5850944..6830bd8f1b2c 100644 --- a/llvm/tools/llvm-lto2/llvm-lto2.cpp +++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp @@ -378,7 +378,7 @@ static int run(int argc, char **argv) { std::error_code EC; auto S = std::make_unique(Path, EC, sys::fs::OF_None); check(EC, Path); - return std::make_unique(std::move(S), Path); + return std::make_unique(std::move(S)); }; auto AddBuffer = [&](size_t Task, std::unique_ptr MB) { From a282ea4898efe2b2e57a93b44e90c9e497520cfb Mon Sep 17 00:00:00 2001 From: Alexandre Ganea Date: Tue, 21 Dec 2021 18:59:47 -0500 Subject: [PATCH 014/293] Reland - [CodeView] Emit S_OBJNAME record Reland integrates build fixes & further review suggestions. Thanks to @zturner for the initial S_OBJNAME patch! Differential Revision: https://reviews.llvm.org/D43002 --- clang/include/clang/Basic/CodeGenOptions.h | 3 + clang/include/clang/Driver/Job.h | 4 + clang/include/clang/Driver/Options.td | 5 ++ clang/lib/CodeGen/BackendUtil.cpp | 4 +- clang/lib/Driver/ToolChains/Clang.cpp | 80 ++++++++++++++++--- clang/test/CMakeLists.txt | 1 + clang/test/CodeGenCXX/debug-info-objname.cpp | 41 ++++++++++ llvm/include/llvm/MC/MCTargetOptions.h | 1 + llvm/include/llvm/Support/Caching.h | 5 +- llvm/include/llvm/Support/ToolOutputFile.h | 5 +- llvm/include/llvm/Target/TargetOptions.h | 5 ++ llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp | 24 ++++++ llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h | 2 + llvm/lib/LTO/LTOBackend.cpp | 2 + llvm/lib/Support/Caching.cpp | 13 ++- llvm/test/DebugInfo/COFF/globals.ll | 9 ++- llvm/test/DebugInfo/COFF/multifunction.ll | 24 +++--- llvm/test/DebugInfo/COFF/pr28747.ll | 6 +- llvm/test/DebugInfo/COFF/simple.ll | 30 ++++--- llvm/test/DebugInfo/COFF/vframe-fpo.ll | 4 +- llvm/test/MC/AArch64/coff-debug.ll | 10 ++- llvm/test/MC/ARM/coff-debugging-secrel.ll | 8 +- llvm/test/MC/COFF/cv-compiler-info.ll | 32 +++++--- llvm/tools/llc/llc.cpp | 3 + llvm/tools/llvm-lto2/llvm-lto2.cpp | 2 +- 25 files changed, 255 insertions(+), 68 deletions(-) create mode 100644 clang/test/CodeGenCXX/debug-info-objname.cpp diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index 960aa419b490..33ec03a17136 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -227,6 +227,9 @@ class CodeGenOptions : public CodeGenOptionsBase { /// Output filename for the split debug info, not used in the skeleton CU. std::string SplitDwarfOutput; + /// Output filename used in the COFF debug information. + std::string ObjectFilenameForDebug; + /// The name of the relocation model to use. llvm::Reloc::Model RelocationModel; diff --git a/clang/include/clang/Driver/Job.h b/clang/include/clang/Driver/Job.h index 8b287638a271..6e3b51f2a799 100644 --- a/clang/include/clang/Driver/Job.h +++ b/clang/include/clang/Driver/Job.h @@ -204,6 +204,10 @@ class Command { /// from the parent process will be used. virtual void setEnvironment(llvm::ArrayRef NewEnvironment); + void replaceArguments(llvm::opt::ArgStringList List) { + Arguments = std::move(List); + } + const char *getExecutable() const { return Executable; } const llvm::opt::ArgStringList &getArguments() const { return Arguments; } diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 3b47512501d3..08e9e1a3432a 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3802,6 +3802,11 @@ def o : JoinedOrSeparate<["-"], "o">, Flags<[NoXarchOption, RenderAsInput, CC1Option, CC1AsOption, FC1Option, FlangOption]>, HelpText<"Write output to ">, MetaVarName<"">, MarshallingInfoString>; +def object_file_name_EQ : Joined<["-"], "object-file-name=">, Flags<[CC1Option, CC1AsOption, CoreOption]>, + HelpText<"Set the output for debug infos">, MetaVarName<"">, + MarshallingInfoString>; +def object_file_name : Separate<["-"], "object-file-name">, Flags<[CC1Option, CC1AsOption, CoreOption]>, + Alias; def pagezero__size : JoinedOrSeparate<["-"], "pagezero_size">; def pass_exit_codes : Flag<["-", "--"], "pass-exit-codes">, Flags<[Unsupported]>; def pedantic_errors : Flag<["-", "--"], "pedantic-errors">, Group, Flags<[CC1Option]>, diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 3195615ae561..5e16d3525b38 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -646,6 +646,7 @@ static bool initTargetOptions(DiagnosticsEngine &Diags, Options.MCOptions.Argv0 = CodeGenOpts.Argv0; Options.MCOptions.CommandLineArgs = CodeGenOpts.CommandLineArgs; Options.DebugStrictDwarf = CodeGenOpts.DebugStrictDwarf; + Options.ObjectFilenameForDebug = CodeGenOpts.ObjectFilenameForDebug; return true; } @@ -1583,7 +1584,8 @@ static void runThinLTOBackend( return; auto AddStream = [&](size_t Task) { - return std::make_unique(std::move(OS)); + return std::make_unique(std::move(OS), + CGOpts.ObjectFilenameForDebug); }; lto::Config Conf; if (CGOpts.SaveTempsFilePrefix != "") { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index d76f810f1a7f..2a3723975568 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -625,8 +625,9 @@ getFramePointerKind(const ArgList &Args, const llvm::Triple &Triple) { } /// Add a CC1 option to specify the debug compilation directory. -static void addDebugCompDirArg(const ArgList &Args, ArgStringList &CmdArgs, - const llvm::vfs::FileSystem &VFS) { +static const char *addDebugCompDirArg(const ArgList &Args, + ArgStringList &CmdArgs, + const llvm::vfs::FileSystem &VFS) { if (Arg *A = Args.getLastArg(options::OPT_ffile_compilation_dir_EQ, options::OPT_fdebug_compilation_dir_EQ)) { if (A->getOption().matches(options::OPT_ffile_compilation_dir_EQ)) @@ -638,6 +639,31 @@ static void addDebugCompDirArg(const ArgList &Args, ArgStringList &CmdArgs, VFS.getCurrentWorkingDirectory()) { CmdArgs.push_back(Args.MakeArgString("-fdebug-compilation-dir=" + *CWD)); } + StringRef Path(CmdArgs.back()); + return Path.substr(Path.find('=') + 1).data(); +} + +static void addDebugObjectName(const ArgList &Args, ArgStringList &CmdArgs, + const char *DebugCompilationDir, + const char *OutputFileName) { + // No need to generate a value for -object-file-name if it was provided. + for (auto *Arg : Args.filtered(options::OPT_Xclang)) + if (StringRef(Arg->getValue()).startswith("-object-file-name")) + return; + + if (Args.hasArg(options::OPT_object_file_name_EQ)) + return; + + SmallString<128> ObjFileNameForDebug(OutputFileName); + if (ObjFileNameForDebug != "-" && + !llvm::sys::path::is_absolute(ObjFileNameForDebug) && + (!DebugCompilationDir || + llvm::sys::path::is_absolute(DebugCompilationDir))) { + // Make the path absolute in the debug infos like MSVC does. + llvm::sys::fs::make_absolute(ObjFileNameForDebug); + } + CmdArgs.push_back( + Args.MakeArgString(Twine("-object-file-name=") + ObjFileNameForDebug)); } /// Add a CC1 and CC1AS option to specify the debug file path prefix map. @@ -5649,7 +5675,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fno-autolink"); // Add in -fdebug-compilation-dir if necessary. - addDebugCompDirArg(Args, CmdArgs, D.getVFS()); + const char *DebugCompilationDir = + addDebugCompDirArg(Args, CmdArgs, D.getVFS()); addDebugPrefixMapArg(D, Args, CmdArgs); @@ -7021,6 +7048,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString(Str)); } + // Add the output path to the object file for CodeView debug infos. + if (EmitCodeView && Output.isFilename()) + addDebugObjectName(Args, CmdArgs, DebugCompilationDir, + Output.getFilename()); + // Add the "-o out -x type src.c" flags last. This is done primarily to make // the -cc1 command easier to edit when reproducing compiler crashes. if (Output.getType() == types::TY_Dependencies) { @@ -7638,11 +7670,14 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, Args.AddAllArgs(CmdArgs, options::OPT_I_Group); // Determine the original source input. - const Action *SourceAction = &JA; - while (SourceAction->getKind() != Action::InputClass) { - assert(!SourceAction->getInputs().empty() && "unexpected root action!"); - SourceAction = SourceAction->getInputs()[0]; - } + auto FindSource = [](const Action *S) -> const Action * { + while (S->getKind() != Action::InputClass) { + assert(!S->getInputs().empty() && "unexpected root action!"); + S = S->getInputs()[0]; + } + return S; + }; + const Action *SourceAction = FindSource(&JA); // Forward -g and handle debug info related flags, assuming we are dealing // with an actual assembly file. @@ -7661,6 +7696,10 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, codegenoptions::DebugInfoKind DebugInfoKind = codegenoptions::NoDebugInfo; + // Add the -fdebug-compilation-dir flag if needed. + const char *DebugCompilationDir = + addDebugCompDirArg(Args, CmdArgs, C.getDriver().getVFS()); + if (SourceAction->getType() == types::TY_Asm || SourceAction->getType() == types::TY_PP_Asm) { // You might think that it would be ok to set DebugInfoKind outside of @@ -7669,8 +7708,6 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, // and it's not clear whether that test is just overly restrictive. DebugInfoKind = (WantDebug ? codegenoptions::DebugInfoConstructor : codegenoptions::NoDebugInfo); - // Add the -fdebug-compilation-dir flag if needed. - addDebugCompDirArg(Args, CmdArgs, C.getDriver().getVFS()); addDebugPrefixMapArg(getToolChain().getDriver(), Args, CmdArgs); @@ -7781,6 +7818,29 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, Args.AddAllArgs(CmdArgs, options::OPT_mllvm); + if (DebugInfoKind > codegenoptions::NoDebugInfo && Output.isFilename()) + addDebugObjectName(Args, CmdArgs, DebugCompilationDir, + Output.getFilename()); + + // Fixup any previous commands that use -object-file-name because when we + // generated them, the final .obj name wasn't yet known. + for (Command &J : C.getJobs()) { + if (SourceAction != FindSource(&J.getSource())) + continue; + auto &JArgs = J.getArguments(); + for (unsigned I = 0; I < JArgs.size(); ++I) { + if (StringRef(JArgs[I]).startswith("-object-file-name=") && + Output.isFilename()) { + ArgStringList NewArgs(JArgs.begin(), JArgs.begin() + I); + addDebugObjectName(Args, NewArgs, DebugCompilationDir, + Output.getFilename()); + NewArgs.append(JArgs.begin() + I + 1, JArgs.end()); + J.replaceArguments(NewArgs); + break; + } + } + } + assert(Output.isFilename() && "Unexpected lipo output."); CmdArgs.push_back("-o"); CmdArgs.push_back(Output.getFilename()); diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt index 32405fad820a..07de12b7c190 100644 --- a/clang/test/CMakeLists.txt +++ b/clang/test/CMakeLists.txt @@ -126,6 +126,7 @@ if( NOT CLANG_BUILT_STANDALONE ) llvm-nm llvm-objcopy llvm-objdump + llvm-pdbutil llvm-profdata llvm-rc llvm-readelf diff --git a/clang/test/CodeGenCXX/debug-info-objname.cpp b/clang/test/CodeGenCXX/debug-info-objname.cpp new file mode 100644 index 000000000000..73d3bb4ec4eb --- /dev/null +++ b/clang/test/CodeGenCXX/debug-info-objname.cpp @@ -0,0 +1,41 @@ +// REQUIRES: x86-registered-target +// RUN: rm -rf %t && mkdir %t && cd %t +// RUN: cp %s debug-info-objname.cpp + +/// No output file provided, input file is relative, we emit an absolute path (MSVC behavior). +// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc debug-info-objname.cpp +// RUN: llvm-pdbutil dump -all debug-info-objname.obj | FileCheck %s --check-prefix=ABSOLUTE + +/// No output file provided, input file is absolute, we emit an absolute path (MSVC behavior). +// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc -- %t/debug-info-objname.cpp +// RUN: llvm-pdbutil dump -all debug-info-objname.obj | FileCheck %s --check-prefix=ABSOLUTE + +/// The output file is provided as an absolute path, we emit an absolute path. +// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc /Fo%t/debug-info-objname.obj -- %t/debug-info-objname.cpp +// RUN: llvm-pdbutil dump -all debug-info-objname.obj | FileCheck %s --check-prefix=ABSOLUTE + +/// The output file is provided as relative path, -working-dir is provided, we emit an absolute path. +// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc -working-dir=%t debug-info-objname.cpp +// RUN: llvm-pdbutil dump -all debug-info-objname.obj | FileCheck %s --check-prefix=ABSOLUTE + +/// The input file name is relative and we specify -fdebug-compilation-dir, we emit a relative path. +// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc -fdebug-compilation-dir=. debug-info-objname.cpp +// RUN: llvm-pdbutil dump -all debug-info-objname.obj | FileCheck %s --check-prefix=RELATIVE + +/// Ensure /FA emits an .asm file which contains the path to the final .obj, not the .asm +// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc -fdebug-compilation-dir=. /FA debug-info-objname.cpp +// RUN: FileCheck --input-file=debug-info-objname.asm --check-prefix=ASM %s + +/// Same thing for -save-temps +// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -nostdinc -fdebug-compilation-dir=. /clang:-save-temps debug-info-objname.cpp +// RUN: FileCheck --input-file=debug-info-objname.asm --check-prefix=ASM %s + +int main() { + return 1; +} + +// ABSOLUTE: S_OBJNAME [size = [[#]]] sig=0, `{{.+}}debug-info-objname.obj` +// RELATIVE: S_OBJNAME [size = [[#]]] sig=0, `debug-info-objname.obj` +// ASM: Record kind: S_OBJNAME +// ASM-NEXT: .long 0 +// ASM-NEXT: .asciz "debug-info-objname.obj" diff --git a/llvm/include/llvm/MC/MCTargetOptions.h b/llvm/include/llvm/MC/MCTargetOptions.h index db50dc6749e2..3510eeca8953 100644 --- a/llvm/include/llvm/MC/MCTargetOptions.h +++ b/llvm/include/llvm/MC/MCTargetOptions.h @@ -62,6 +62,7 @@ class MCTargetOptions { std::string ABIName; std::string AssemblyLanguage; std::string SplitDwarfFile; + std::string COFFOutputFilename; const char *Argv0 = nullptr; ArrayRef CommandLineArgs; diff --git a/llvm/include/llvm/Support/Caching.h b/llvm/include/llvm/Support/Caching.h index fbf5f3b5687a..5c30a822ef38 100644 --- a/llvm/include/llvm/Support/Caching.h +++ b/llvm/include/llvm/Support/Caching.h @@ -27,8 +27,11 @@ class MemoryBuffer; /// that can be done by deriving from this class and overriding the destructor. class CachedFileStream { public: - CachedFileStream(std::unique_ptr OS) : OS(std::move(OS)) {} + CachedFileStream(std::unique_ptr OS, + std::string OSPath = "") + : OS(std::move(OS)), ObjectPathName(OSPath) {} std::unique_ptr OS; + std::string ObjectPathName; virtual ~CachedFileStream() = default; }; diff --git a/llvm/include/llvm/Support/ToolOutputFile.h b/llvm/include/llvm/Support/ToolOutputFile.h index ec1d6ae52268..6b7222550b9f 100644 --- a/llvm/include/llvm/Support/ToolOutputFile.h +++ b/llvm/include/llvm/Support/ToolOutputFile.h @@ -29,9 +29,10 @@ class ToolOutputFile { /// raw_fd_ostream is destructed. It installs cleanups in its constructor and /// uninstalls them in its destructor. class CleanupInstaller { + public: /// The name of the file. std::string Filename; - public: + /// The flag which indicates whether we should not delete the file. bool Keep; @@ -64,6 +65,8 @@ class ToolOutputFile { /// Indicate that the tool's job wrt this output file has been successful and /// the file should not be deleted. void keep() { Installer.Keep = true; } + + const std::string &outputFilename() { return Installer.Filename; } }; } // end llvm namespace diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index 912f6d1c153a..c639f326abc9 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -418,6 +418,11 @@ namespace llvm { /// Machine level options. MCTargetOptions MCOptions; + + /// Stores the filename/path of the final .o/.obj file, to be written in the + /// debug information. This is used for emitting the CodeView S_OBJNAME + /// record. + std::string ObjectFilenameForDebug; }; } // End llvm namespace diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 928252a3d041..d621108408f0 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -649,6 +649,7 @@ void CodeViewDebug::endModule() { switchToDebugSectionForSymbol(nullptr); MCSymbol *CompilerInfo = beginCVSubsection(DebugSubsectionKind::Symbols); + emitObjName(); emitCompilerInformation(); endCVSubsection(CompilerInfo); @@ -784,6 +785,29 @@ void CodeViewDebug::emitTypeGlobalHashes() { } } +void CodeViewDebug::emitObjName() { + MCSymbol *CompilerEnd = beginSymbolRecord(SymbolKind::S_OBJNAME); + + StringRef PathRef(Asm->TM.Options.ObjectFilenameForDebug); + llvm::SmallString<256> PathStore(PathRef); + + if (PathRef.empty() || PathRef == "-") { + // Don't emit the filename if we're writing to stdout or to /dev/null. + PathRef = {}; + } else { + llvm::sys::path::remove_dots(PathStore, /*remove_dot_dot=*/true); + PathRef = PathStore; + } + + OS.AddComment("Signature"); + OS.emitIntValue(0, 4); + + OS.AddComment("Object name"); + emitNullTerminatedSymbolName(OS, PathRef); + + endSymbolRecord(CompilerEnd); +} + namespace { struct Version { int Part[4]; diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h index 6f88e15ee8fe..d1fc3cdccb20 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -302,6 +302,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { void emitTypeGlobalHashes(); + void emitObjName(); + void emitCompilerInformation(); void emitBuildInfo(); diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index 15e20fec565f..855d0fc8a8be 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -412,6 +412,8 @@ static void codegen(const Config &Conf, TargetMachine *TM, if (Error Err = StreamOrErr.takeError()) report_fatal_error(std::move(Err)); std::unique_ptr &Stream = *StreamOrErr; + TM->Options.ObjectFilenameForDebug = Stream->ObjectPathName; + legacy::PassManager CodeGenPasses; CodeGenPasses.add( createImmutableModuleSummaryIndexWrapperPass(&CombinedIndex)); diff --git a/llvm/lib/Support/Caching.cpp b/llvm/lib/Support/Caching.cpp index 635647d72468..8c685640f791 100644 --- a/llvm/lib/Support/Caching.cpp +++ b/llvm/lib/Support/Caching.cpp @@ -79,14 +79,13 @@ Expected llvm::localCache(Twine CacheNameRef, struct CacheStream : CachedFileStream { AddBufferFn AddBuffer; sys::fs::TempFile TempFile; - std::string EntryPath; unsigned Task; CacheStream(std::unique_ptr OS, AddBufferFn AddBuffer, sys::fs::TempFile TempFile, std::string EntryPath, unsigned Task) - : CachedFileStream(std::move(OS)), AddBuffer(std::move(AddBuffer)), - TempFile(std::move(TempFile)), EntryPath(std::move(EntryPath)), + : CachedFileStream(std::move(OS), std::move(EntryPath)), + AddBuffer(std::move(AddBuffer)), TempFile(std::move(TempFile)), Task(Task) {} ~CacheStream() { @@ -99,7 +98,7 @@ Expected llvm::localCache(Twine CacheNameRef, // Open the file first to avoid racing with a cache pruner. ErrorOr> MBOrErr = MemoryBuffer::getOpenFile( - sys::fs::convertFDToNativeFile(TempFile.FD), EntryPath, + sys::fs::convertFDToNativeFile(TempFile.FD), ObjectPathName, /*FileSize=*/-1, /*RequiresNullTerminator=*/false); if (!MBOrErr) report_fatal_error(Twine("Failed to open new cache file ") + @@ -115,14 +114,14 @@ Expected llvm::localCache(Twine CacheNameRef, // AddBuffer a copy of the bytes we wrote in that case. We do this // instead of just using the existing file, because the pruner might // delete the file before we get a chance to use it. - Error E = TempFile.keep(EntryPath); + Error E = TempFile.keep(ObjectPathName); E = handleErrors(std::move(E), [&](const ECError &E) -> Error { std::error_code EC = E.convertToErrorCode(); if (EC != errc::permission_denied) return errorCodeToError(EC); auto MBCopy = MemoryBuffer::getMemBufferCopy((*MBOrErr)->getBuffer(), - EntryPath); + ObjectPathName); MBOrErr = std::move(MBCopy); // FIXME: should we consume the discard error? @@ -133,7 +132,7 @@ Expected llvm::localCache(Twine CacheNameRef, if (E) report_fatal_error(Twine("Failed to rename temporary file ") + - TempFile.TmpName + " to " + EntryPath + ": " + + TempFile.TmpName + " to " + ObjectPathName + ": " + toString(std::move(E)) + "\n"); AddBuffer(Task, std::move(*MBOrErr)); diff --git a/llvm/test/DebugInfo/COFF/globals.ll b/llvm/test/DebugInfo/COFF/globals.ll index 7960f63b1474..71c18a76f4b5 100644 --- a/llvm/test/DebugInfo/COFF/globals.ll +++ b/llvm/test/DebugInfo/COFF/globals.ll @@ -1,7 +1,9 @@ ; RUN: llc < %s | FileCheck %s --check-prefix=ASM ; RUN: llc < %s -filetype=obj | llvm-readobj - --codeview | FileCheck %s --check-prefix=OBJ ; RUN: llc < %s | llvm-mc -filetype=obj --triple=x86_64-windows | llvm-readobj - --codeview | FileCheck %s --check-prefix=OBJ -; RUN: llc < %s -filetype=obj | obj2yaml | FileCheck %s --check-prefix=YAML +; RUN: llc < %s -filetype=obj | obj2yaml | FileCheck %s --check-prefixes=YAML,YAML-STDOUT +; RUN: llc < %s -filetype=obj -o %t +; RUN: obj2yaml < %t | FileCheck %s --check-prefixes=YAML,YAML-FILE ; C++ source to regenerate: ; $ cat a.cpp @@ -246,6 +248,11 @@ ; YAML: Subsections: ; YAML: - !Symbols ; YAML: Records: +; YAML: - Kind: S_OBJNAME +; YAML: ObjNameSym: +; YAML: Signature: 0 +; YAML-STDOUT: ObjectName: '' +; YAML-FILE: ObjectName: '{{.*}}' ; YAML: - Kind: S_COMPILE3 ; YAML: Compile3Sym: diff --git a/llvm/test/DebugInfo/COFF/multifunction.ll b/llvm/test/DebugInfo/COFF/multifunction.ll index 9f99ba05047e..fa93b1349756 100644 --- a/llvm/test/DebugInfo/COFF/multifunction.ll +++ b/llvm/test/DebugInfo/COFF/multifunction.ll @@ -498,18 +498,18 @@ ; OBJ64: Characteristics [ (0x42300040) ; OBJ64: ] ; OBJ64: Relocations [ -; OBJ64-NEXT: 0x64 IMAGE_REL_AMD64_SECREL x -; OBJ64-NEXT: 0x68 IMAGE_REL_AMD64_SECTION x -; OBJ64-NEXT: 0x9C IMAGE_REL_AMD64_SECREL x -; OBJ64-NEXT: 0xA0 IMAGE_REL_AMD64_SECTION x -; OBJ64-NEXT: 0x100 IMAGE_REL_AMD64_SECREL y -; OBJ64-NEXT: 0x104 IMAGE_REL_AMD64_SECTION y -; OBJ64-NEXT: 0x138 IMAGE_REL_AMD64_SECREL y -; OBJ64-NEXT: 0x13C IMAGE_REL_AMD64_SECTION y -; OBJ64-NEXT: 0x19C IMAGE_REL_AMD64_SECREL f -; OBJ64-NEXT: 0x1A0 IMAGE_REL_AMD64_SECTION f -; OBJ64-NEXT: 0x1D4 IMAGE_REL_AMD64_SECREL f -; OBJ64-NEXT: 0x1D8 IMAGE_REL_AMD64_SECTION f +; OBJ64-NEXT: 0x70 IMAGE_REL_AMD64_SECREL x +; OBJ64-NEXT: 0x74 IMAGE_REL_AMD64_SECTION x +; OBJ64-NEXT: 0xA8 IMAGE_REL_AMD64_SECREL x +; OBJ64-NEXT: 0xAC IMAGE_REL_AMD64_SECTION x +; OBJ64-NEXT: 0x10C IMAGE_REL_AMD64_SECREL y +; OBJ64-NEXT: 0x110 IMAGE_REL_AMD64_SECTION y +; OBJ64-NEXT: 0x144 IMAGE_REL_AMD64_SECREL y +; OBJ64-NEXT: 0x148 IMAGE_REL_AMD64_SECTION y +; OBJ64-NEXT: 0x1A8 IMAGE_REL_AMD64_SECREL f +; OBJ64-NEXT: 0x1AC IMAGE_REL_AMD64_SECTION f +; OBJ64-NEXT: 0x1E0 IMAGE_REL_AMD64_SECREL f +; OBJ64-NEXT: 0x1E4 IMAGE_REL_AMD64_SECTION f ; OBJ64-NEXT: ] ; OBJ64: Subsection [ ; OBJ64-NEXT: SubSectionType: Symbols (0xF1) diff --git a/llvm/test/DebugInfo/COFF/pr28747.ll b/llvm/test/DebugInfo/COFF/pr28747.ll index c20e50e1992e..e9b3a8eae91b 100644 --- a/llvm/test/DebugInfo/COFF/pr28747.ll +++ b/llvm/test/DebugInfo/COFF/pr28747.ll @@ -5,8 +5,10 @@ ; CHECK-NEXT: .long 241 ; CHECK-NEXT: .long [[SUBSEC_END:.*]]-[[SUBSEC_START:.*]] # Subsection size ; CHECK-NEXT: [[SUBSEC_START]]: -; CHECK-NEXT: .short [[C1_END:.*]]-[[C1_START:.*]] # Record length -; CHECK: [[C1_END]]: +; CHECK-NEXT: .short [[OBJNAME_END:.*]]-[[OBJNAME_START:.*]] # Record length +; CHECK: [[OBJNAME_END]]: +; CHECK-NEXT: .short [[COMPILE3_END:.*]]-[[COMPILE3_START:.*]] # Record length +; CHECK: [[COMPILE3_END]]: ; CHECK-NEXT: [[SUBSEC_END]]: ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: .cv_filechecksums diff --git a/llvm/test/DebugInfo/COFF/simple.ll b/llvm/test/DebugInfo/COFF/simple.ll index 082640182dc2..70878ac33ef1 100644 --- a/llvm/test/DebugInfo/COFF/simple.ll +++ b/llvm/test/DebugInfo/COFF/simple.ll @@ -36,8 +36,10 @@ ; X86-NEXT: .long [[COMPILE_END:.*]]-[[COMPILE_START:.*]] # ; Compiler information record ; X86-NEXT: [[COMPILE_START]]: -; X86-NEXT: .short [[C1_END:.*]]-[[C1_START:.*]] # -; X86: [[C1_END]]: +; X86-NEXT: .short [[OBJNAME_END:.*]]-[[OBJNAME_START:.*]] # +; X86: [[OBJNAME_END]]: +; X86-NEXT: .short [[COMPILE3_END:.*]]-[[COMPILE3_START:.*]] # +; X86: [[COMPILE3_END]]: ; X86-NEXT: [[COMPILE_END]]: ; X86-NEXT: .p2align 2 ; X86-NEXT: .cv_fpo_data _f @@ -88,11 +90,11 @@ ; OBJ32: Characteristics [ (0x42300040) ; OBJ32: ] ; OBJ32: Relocations [ -; OBJ32-NEXT: 0x44 IMAGE_REL_I386_DIR32NB _f -; OBJ32-NEXT: 0x90 IMAGE_REL_I386_SECREL _f -; OBJ32-NEXT: 0x94 IMAGE_REL_I386_SECTION _f -; OBJ32-NEXT: 0xC8 IMAGE_REL_I386_SECREL _f -; OBJ32-NEXT: 0xCC IMAGE_REL_I386_SECTION _f +; OBJ32-NEXT: 0x50 IMAGE_REL_I386_DIR32NB _f +; OBJ32-NEXT: 0x9C IMAGE_REL_I386_SECREL _f +; OBJ32-NEXT: 0xA0 IMAGE_REL_I386_SECTION _f +; OBJ32-NEXT: 0xD4 IMAGE_REL_I386_SECREL _f +; OBJ32-NEXT: 0xD8 IMAGE_REL_I386_SECTION _f ; OBJ32-NEXT: ] ; OBJ32: Subsection [ ; OBJ32-NEXT: SubSectionType: Symbols (0xF1) @@ -165,8 +167,10 @@ ; X64-NEXT: .long [[COMPILE_END:.*]]-[[COMPILE_START:.*]] # ; Compiler information record ; X64-NEXT: [[COMPILE_START]]: -; X64-NEXT: .short [[C1_END:.*]]-[[C1_START:.*]] # -; X64: [[C1_END]]: +; X64-NEXT: .short [[OBJNAME_END:.*]]-[[OBJNAME_START:.*]] # +; X64: [[OBJNAME_END]]: +; X64-NEXT: .short [[COMPILE3_END:.*]]-[[COMPILE3_START:.*]] # +; X64: [[COMPILE3_END]]: ; X64-NEXT: [[COMPILE_END]]: ; X64-NEXT: .p2align 2 ; X64-NEXT: .long 241 # Symbol subsection for f @@ -216,10 +220,10 @@ ; OBJ64: Characteristics [ (0x42300040) ; OBJ64: ] ; OBJ64: Relocations [ -; OBJ64-NEXT: 0x64 IMAGE_REL_AMD64_SECREL f -; OBJ64-NEXT: 0x68 IMAGE_REL_AMD64_SECTION f -; OBJ64-NEXT: 0x9C IMAGE_REL_AMD64_SECREL f -; OBJ64-NEXT: 0xA0 IMAGE_REL_AMD64_SECTION f +; OBJ64-NEXT: 0x70 IMAGE_REL_AMD64_SECREL f +; OBJ64-NEXT: 0x74 IMAGE_REL_AMD64_SECTION f +; OBJ64-NEXT: 0xA8 IMAGE_REL_AMD64_SECREL f +; OBJ64-NEXT: 0xAC IMAGE_REL_AMD64_SECTION f ; OBJ64-NEXT: ] ; OBJ64: Subsection [ ; OBJ64-NEXT: SubSectionType: Symbols (0xF1) diff --git a/llvm/test/DebugInfo/COFF/vframe-fpo.ll b/llvm/test/DebugInfo/COFF/vframe-fpo.ll index 6b77ac579cb3..599d9f07f9d5 100644 --- a/llvm/test/DebugInfo/COFF/vframe-fpo.ll +++ b/llvm/test/DebugInfo/COFF/vframe-fpo.ll @@ -65,7 +65,9 @@ ; CODEVIEW-NEXT: Subsection [ ; CODEVIEW-NEXT: SubSectionType: Symbols (0xF1) ; CODEVIEW-NEXT: SubSectionSize: -; CODEVIEW-NEXT: Compile3Sym { +; CODEVIEW-NEXT: ObjNameSym { +; CODEVIEW-NEXT: Kind: S_OBJNAME (0x1101) +; CODEVIEW: Compile3Sym { ; CODEVIEW-NEXT: Kind: S_COMPILE3 (0x113C) ; CODEVIEW: } ; CODEVIEW: ] diff --git a/llvm/test/MC/AArch64/coff-debug.ll b/llvm/test/MC/AArch64/coff-debug.ll index bb7cc4db3f93..6099b3d570b4 100644 --- a/llvm/test/MC/AArch64/coff-debug.ll +++ b/llvm/test/MC/AArch64/coff-debug.ll @@ -1,5 +1,7 @@ ; RUN: llc -mtriple=aarch64-windows -filetype=obj -o - %s | \ -; RUN: llvm-readobj --codeview - | FileCheck %s +; RUN: llvm-readobj --codeview - | FileCheck %s --check-prefixes=CHECK,CHECK-STDOUT +; RUN: llc -mtriple=aarch64-windows -filetype=obj -o %t.o %s +; RUN: llvm-readobj --codeview %t.o | FileCheck %s --check-prefixes=CHECK,CHECK-FILE ; ModuleID = 'a.c' source_filename = "a.c" @@ -66,6 +68,12 @@ attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-ma ; CHECK: Magic: 0x4 ; CHECK: Subsection [ ; CHECK: SubSectionType: Symbols (0xF1) +; CHECK: ObjNameSym { +; CHECK: Kind: S_OBJNAME (0x1101) +; CHECK: Signature: 0x0 +; CHECK-STDOUT: ObjectName: {{$}} +; CHECK-FILE: ObjectName: {{.*}}.o +; CHECK: } ; CHECK: Compile3Sym { ; CHECK: Kind: S_COMPILE3 (0x113C) ; CHECK: Language: C (0x0) diff --git a/llvm/test/MC/ARM/coff-debugging-secrel.ll b/llvm/test/MC/ARM/coff-debugging-secrel.ll index dbd251adae19..1e00b0547235 100644 --- a/llvm/test/MC/ARM/coff-debugging-secrel.ll +++ b/llvm/test/MC/ARM/coff-debugging-secrel.ll @@ -42,10 +42,10 @@ entry: ; CHECK-MSVC: Relocations [ ; CHECK-MSVC: Section {{.*}} .debug$S { -; CHECK-MSVC: 0x64 IMAGE_REL_ARM_SECREL function -; CHECK-MSVC: 0x68 IMAGE_REL_ARM_SECTION function -; CHECK-MSVC: 0xA0 IMAGE_REL_ARM_SECREL function -; CHECK-MSVC: 0xA4 IMAGE_REL_ARM_SECTION function +; CHECK-MSVC: 0x70 IMAGE_REL_ARM_SECREL function +; CHECK-MSVC: 0x74 IMAGE_REL_ARM_SECTION function +; CHECK-MSVC: 0xAC IMAGE_REL_ARM_SECREL function +; CHECK-MSVC: 0xB0 IMAGE_REL_ARM_SECTION function ; CHECK-MSVC: } ; CHECK-MSVC: ] diff --git a/llvm/test/MC/COFF/cv-compiler-info.ll b/llvm/test/MC/COFF/cv-compiler-info.ll index d8cfe99736cf..a9deb74bc9b9 100644 --- a/llvm/test/MC/COFF/cv-compiler-info.ll +++ b/llvm/test/MC/COFF/cv-compiler-info.ll @@ -1,4 +1,6 @@ -; RUN: llc -mtriple i686-pc-windows-msvc < %s | FileCheck %s +; RUN: llc -mtriple i686-pc-windows-msvc < %s | FileCheck %s --check-prefixes=CHECK,STDOUT +; RUN: llc -mtriple i686-pc-windows-msvc < %s -o %t +; RUN: FileCheck %s --input-file=%t --check-prefixes=CHECK,FILE ; ModuleID = 'D:\src\scopes\foo.cpp' source_filename = "D:\5Csrc\5Cscopes\5Cfoo.cpp" target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" @@ -20,19 +22,23 @@ attributes #0 = { nounwind sspstrong "correctly-rounded-divide-sqrt-fp-math"="fa ; One .debug$S section should contain an S_COMPILE3 record that identifies the ; source language and the version of the compiler based on the DICompileUnit. ; CHECK: .section .debug$S,"dr" +; CHECK: .short 4353 # Record kind: S_OBJNAME +; CHECK-NEXT: .long 0 # Signature +; STDOUT-NEXT: .byte 0 # Object name +; FILE-NEXT: .asciz "{{.*}}{{\\\\|/}}cv-compiler-info.ll.tmp" # Object name ; CHECK: .short 4412 # Record kind: S_COMPILE3 -; CHECK: .long 1 # Flags and language -; CHECK: .short 7 # CPUType -; CHECK: .short 4 # Frontend version -; CHECK: .short 0 -; CHECK: .short 0 -; CHECK: .short 0 -; CHECK: .short [[BACKEND_VERSION:[0-9]+]] # Backend version -; CHECK: .short 0 -; CHECK: .short 0 -; CHECK: .short 0 -; CHECK: .asciz "clang version 4.0.0 " # Null-terminated compiler version string -; CHECK-NOT: .short 4412 # Record kind: S_COMPILE3 +; CHECK-NEXT: .long 1 # Flags and language +; CHECK-NEXT: .short 7 # CPUType +; CHECK-NEXT: .short 4 # Frontend version +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short [[BACKEND_VERSION:[0-9]+]] # Backend version +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .short 0 +; CHECK-NEXT: .asciz "clang version 4.0.0 " # Null-terminated compiler version string +; CHECK-NOT: .short 4412 # Record kind: S_COMPILE3 !1 = !DIFile(filename: "D:\5Csrc\5Cscopes\5Cfoo.cpp", directory: "D:\5Csrc\5Cscopes\5Cclang") !2 = !{} !7 = !{i32 2, !"CodeView", i32 1} diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index 9d80f062c8f9..c07f4e66486c 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -605,6 +605,9 @@ static int compileModule(char **argv, LLVMContext &Context) { GetOutputStream(TheTarget->getName(), TheTriple.getOS(), argv[0]); if (!Out) return 1; + // Ensure the filename is passed down to CodeViewDebug. + Target->Options.ObjectFilenameForDebug = Out->outputFilename(); + std::unique_ptr DwoOut; if (!SplitDwarfOutputFile.empty()) { std::error_code EC; diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp index 6830bd8f1b2c..7416e5850944 100644 --- a/llvm/tools/llvm-lto2/llvm-lto2.cpp +++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp @@ -378,7 +378,7 @@ static int run(int argc, char **argv) { std::error_code EC; auto S = std::make_unique(Path, EC, sys::fs::OF_None); check(EC, Path); - return std::make_unique(std::move(S)); + return std::make_unique(std::move(S), Path); }; auto AddBuffer = [&](size_t Task, std::unique_ptr MB) { From 95311468997160401c027e3bad3b14a391c1ead6 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sun, 4 Apr 2021 13:02:18 -0400 Subject: [PATCH 015/293] [lldb] Use `GNUInstallDirs` to support custom installation dirs. Extracted from D99484. My new plan is to start from the outside and work inward. Reviewed By: compnerd Differential Revision: https://reviews.llvm.org/D115570 --- lldb/CMakeLists.txt | 2 ++ lldb/cmake/modules/AddLLDB.cmake | 4 +++- lldb/cmake/modules/LLDBConfig.cmake | 4 ++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/lldb/CMakeLists.txt b/lldb/CMakeLists.txt index 5c2dac3f51f6..79d451965ed4 100644 --- a/lldb/CMakeLists.txt +++ b/lldb/CMakeLists.txt @@ -1,5 +1,7 @@ cmake_minimum_required(VERSION 3.13.4) +include(GNUInstallDirs) + # Add path for custom modules. set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} diff --git a/lldb/cmake/modules/AddLLDB.cmake b/lldb/cmake/modules/AddLLDB.cmake index 8be214a8509a..3291a7c808e1 100644 --- a/lldb/cmake/modules/AddLLDB.cmake +++ b/lldb/cmake/modules/AddLLDB.cmake @@ -1,3 +1,5 @@ +include(GNUInstallDirs) + function(lldb_tablegen) # Syntax: # lldb_tablegen output-file [tablegen-arg ...] SOURCE source-file @@ -113,7 +115,7 @@ function(add_lldb_library name) endif() # RUNTIME is relevant for DLL platforms, FRAMEWORK for macOS install(TARGETS ${name} COMPONENT ${name} - RUNTIME DESTINATION bin + RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" LIBRARY DESTINATION ${install_dest} ARCHIVE DESTINATION ${install_dest} FRAMEWORK DESTINATION ${install_dest}) diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake index 01c501a7f717..e12e548ad0c4 100644 --- a/lldb/cmake/modules/LLDBConfig.cmake +++ b/lldb/cmake/modules/LLDBConfig.cmake @@ -231,7 +231,7 @@ include_directories(BEFORE if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) install(DIRECTORY include/ COMPONENT lldb-headers - DESTINATION include + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" FILES_MATCHING PATTERN "*.h" PATTERN ".cmake" EXCLUDE @@ -239,7 +239,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/ COMPONENT lldb-headers - DESTINATION include + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" FILES_MATCHING PATTERN "*.h" PATTERN ".cmake" EXCLUDE From b9f6e09b850acf2ade04f22fa423e7cc0f075e62 Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Fri, 17 Dec 2021 12:00:48 -0800 Subject: [PATCH 016/293] [clang-format][NFC] Handle wrapping after => in mustBreakBefore() Move the handling of brace wrapping after => from unwrapped line parser to token annotator and clean up the parser. Differential Revision: https://reviews.llvm.org/D115967 --- clang/lib/Format/TokenAnnotator.cpp | 3 ++ clang/lib/Format/UnwrappedLineParser.cpp | 56 ++++++------------------ clang/lib/Format/UnwrappedLineParser.h | 2 +- 3 files changed, 18 insertions(+), 43 deletions(-) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 0a51a53da400..505a7250572b 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -3725,6 +3725,9 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, return true; if (Style.isCSharp()) { + if (Left.is(TT_FatArrow) && Right.is(tok::l_brace) && + Style.BraceWrapping.AfterFunction) + return true; if (Right.is(TT_CSharpNamedArgumentColon) || Left.is(TT_CSharpNamedArgumentColon)) return false; diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index c2ae691bfd3d..6010d276af05 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -1637,19 +1637,9 @@ void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) { break; } case tok::equal: - // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType - // TT_FatArrow. They always start an expression or a child block if - // followed by a curly brace. - if (FormatTok->is(TT_FatArrow)) { - nextToken(); - if (FormatTok->is(tok::l_brace)) { - // C# may break after => if the next character is a newline. - if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) { - // calling `addUnwrappedLine()` here causes odd parsing errors. - FormatTok->MustBreakBefore = true; - } - parseChildBlock(); - } + if ((Style.isJavaScript() || Style.isCSharp()) && + FormatTok->is(TT_FatArrow)) { + tryToParseChildBlock(); break; } @@ -1725,7 +1715,7 @@ bool UnwrappedLineParser::tryToParsePropertyAccessor() { // Try to parse the property accessor: // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties Tokens->setPosition(StoredPosition); - if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction == true) + if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction) addUnwrappedLine(); nextToken(); do { @@ -1940,18 +1930,15 @@ bool UnwrappedLineParser::tryToParseBracedList() { return true; } -bool UnwrappedLineParser::tryToParseCSharpLambda() { - // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType - // TT_FatArrow. They always start an expression or a child block if - // followed by a curly brace. +bool UnwrappedLineParser::tryToParseChildBlock() { + assert(Style.isJavaScript() || Style.isCSharp()); + assert(FormatTok->is(TT_FatArrow)); + // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow. + // They always start an expression or a child block if followed by a curly + // brace. nextToken(); if (FormatTok->isNot(tok::l_brace)) return false; - // C# may break after => if the next character is a newline. - if (Style.BraceWrapping.AfterFunction) { - // calling `addUnwrappedLine()` here causes odd parsing errors. - FormatTok->MustBreakBefore = true; - } parseChildBlock(); return true; } @@ -1964,24 +1951,15 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, // FIXME: Once we have an expression parser in the UnwrappedLineParser, // replace this by using parseAssignmentExpression() inside. do { - if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) - if (tryToParseCSharpLambda()) - continue; + if (Style.isCSharp() && FormatTok->is(TT_FatArrow) && + tryToParseChildBlock()) + continue; if (Style.isJavaScript()) { if (FormatTok->is(Keywords.kw_function) || FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { tryToParseJSFunction(); continue; } - if (FormatTok->is(TT_FatArrow)) { - nextToken(); - // Fat arrows can be followed by simple expressions or by child blocks - // in curly braces. - if (FormatTok->is(tok::l_brace)) { - parseChildBlock(); - continue; - } - } if (FormatTok->is(tok::l_brace)) { // Could be a method inside of a braced list `{a() { return 1; }}`. if (tryToParseBracedList()) @@ -1996,12 +1974,6 @@ bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, return !HasError; } switch (FormatTok->Tok.getKind()) { - case tok::caret: - nextToken(); - if (FormatTok->is(tok::l_brace)) { - parseChildBlock(); - } - break; case tok::l_square: if (Style.isCSharp()) parseSquare(); @@ -2093,7 +2065,7 @@ void UnwrappedLineParser::parseParens() { break; case tok::equal: if (Style.isCSharp() && FormatTok->is(TT_FatArrow)) - tryToParseCSharpLambda(); + tryToParseChildBlock(); else nextToken(); break; diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h index 3c07fbf2e8d3..0c79723d50fc 100644 --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -138,7 +138,7 @@ class UnwrappedLineParser { // https://docs.microsoft.com/en-us/dotnet/csharp/language-reference/keywords/where-generic-type-constraint void parseCSharpGenericTypeConstraint(); bool tryToParseLambda(); - bool tryToParseCSharpLambda(); + bool tryToParseChildBlock(); bool tryToParseLambdaIntroducer(); bool tryToParsePropertyAccessor(); void tryToParseJSFunction(); From 63ddf0baf37edbfdf482853870238f9f88bfeb0b Mon Sep 17 00:00:00 2001 From: Julian Lettner Date: Tue, 21 Dec 2021 16:44:12 -0800 Subject: [PATCH 017/293] [TSan] Don't instrument code that is executed from __tsan_on_report() See also: https://reviews.llvm.org/D111157 --- compiler-rt/test/tsan/Darwin/main_tid.mm | 1 + compiler-rt/test/tsan/libdispatch/dispatch_once_deadlock.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/compiler-rt/test/tsan/Darwin/main_tid.mm b/compiler-rt/test/tsan/Darwin/main_tid.mm index 6dea58e53326..886880edeb2f 100644 --- a/compiler-rt/test/tsan/Darwin/main_tid.mm +++ b/compiler-rt/test/tsan/Darwin/main_tid.mm @@ -13,6 +13,7 @@ int __tsan_get_report_thread(void *report, unsigned long idx, int *tid, unsigned long trace_size); } +__attribute__((disable_sanitizer_instrumentation)) void __tsan_on_report(void *report) { fprintf(stderr, "__tsan_on_report(%p)\n", report); diff --git a/compiler-rt/test/tsan/libdispatch/dispatch_once_deadlock.c b/compiler-rt/test/tsan/libdispatch/dispatch_once_deadlock.c index 9c7c47e9235b..8139605c753c 100644 --- a/compiler-rt/test/tsan/libdispatch/dispatch_once_deadlock.c +++ b/compiler-rt/test/tsan/libdispatch/dispatch_once_deadlock.c @@ -10,6 +10,8 @@ long g = 0; long h = 0; + +__attribute__((disable_sanitizer_instrumentation)) void f() { static dispatch_once_t onceToken; dispatch_once(&onceToken, ^{ @@ -18,6 +20,7 @@ void f() { h++; } +__attribute__((disable_sanitizer_instrumentation)) void __tsan_on_report() { fprintf(stderr, "Report.\n"); f(); From 1fe897dffd315c811052ecc9cf37d81f66744afa Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Wed, 6 Oct 2021 22:30:47 -0400 Subject: [PATCH 018/293] [libc++] [test] Simplify some ranges tests. Eliminate a bogus operator== overload. Also, check more intermediate steps in the logic we're checking here. Some of this simplification is possible only now that we've implemented more of . Differential Revision: https://reviews.llvm.org/D116002 --- .../range.common.view/begin.pass.cpp | 9 +-- .../range.take/sentinel/base.pass.cpp | 17 ++--- .../range.take/sentinel/ctor.pass.cpp | 42 +++++++---- .../range.transform/end.pass.cpp | 70 +++++++++++-------- .../range.transform/iterator/base.pass.cpp | 36 +++++----- .../range.adaptors/range.transform/types.h | 9 ++- 6 files changed, 99 insertions(+), 84 deletions(-) diff --git a/libcxx/test/std/ranges/range.adaptors/range.common.view/begin.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.common.view/begin.pass.cpp index 9fc24a66ee02..f210c619ca68 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.common.view/begin.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.common.view/begin.pass.cpp @@ -69,13 +69,8 @@ int main(int, char**) { using CommonIter = std::common_iterator>; std::same_as auto begin = common.begin(); assert(begin == std::ranges::begin(view)); - } - { - SizedForwardView view{buf, buf + 8}; - std::ranges::common_view const common(view); - using CommonIter = std::common_iterator>; - std::same_as auto begin = common.begin(); - assert(begin == std::ranges::begin(view)); + std::same_as auto cbegin = std::as_const(common).begin(); + assert(cbegin == std::ranges::begin(view)); } { diff --git a/libcxx/test/std/ranges/range.adaptors/range.take/sentinel/base.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.take/sentinel/base.pass.cpp index 1562a188a802..8da2ec613e97 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.take/sentinel/base.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.take/sentinel/base.pass.cpp @@ -24,20 +24,13 @@ constexpr bool test() { int buffer[8] = {1, 2, 3, 4, 5, 6, 7, 8}; - auto sw = sentinel_wrapper(buffer + 8); // Note: not 4, but that's OK. - { - const std::ranges::take_view tv(MoveOnlyView{buffer}, 4); - assert(base(tv.end().base()) == base(sw)); - ASSERT_SAME_TYPE(decltype(tv.end().base()), sentinel_wrapper); + std::ranges::take_view tv(MoveOnlyView(buffer), 4); + std::same_as> auto sw1 = tv.end().base(); + assert(base(sw1) == buffer + 8); + std::same_as> auto sw2 = std::as_const(tv).end().base(); + assert(base(sw2) == buffer + 8); } - - { - std::ranges::take_view tv(MoveOnlyView{buffer}, 4); - assert(base(tv.end().base()) == base(sw)); - ASSERT_SAME_TYPE(decltype(tv.end().base()), sentinel_wrapper); - } - return true; } diff --git a/libcxx/test/std/ranges/range.adaptors/range.take/sentinel/ctor.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.take/sentinel/ctor.pass.cpp index 92a6c1770510..2b7b15d1092f 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.take/sentinel/ctor.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.take/sentinel/ctor.pass.cpp @@ -27,27 +27,39 @@ constexpr bool test() { { // Test the default ctor. - std::ranges::take_view tv(MoveOnlyView{buffer}, 4); - assert(decltype(tv.end()){} == std::ranges::next(tv.begin(), 4)); + using TakeView = std::ranges::take_view; + using Sentinel = std::ranges::sentinel_t; + Sentinel s; + TakeView tv = TakeView(MoveOnlyView(buffer), 4); + assert(tv.begin() + 4 == s); } { - std::ranges::take_view nonConst(MoveOnlyView{buffer}, 5); - const std::ranges::take_view tvConst(MoveOnlyView{buffer}, 5); - auto sent1 = nonConst.end(); - // Convert to const. Note, we cannot go the other way. - std::remove_cv_t sent2 = sent1; - - assert(sent1 == std::ranges::next(tvConst.begin(), 5)); - assert(sent2 == std::ranges::next(tvConst.begin(), 5)); + // Test the conversion from "sentinel" to "sentinel-to-const". + using TakeView = std::ranges::take_view; + using Sentinel = std::ranges::sentinel_t; + using ConstSentinel = std::ranges::sentinel_t; + static_assert(std::is_convertible_v); + TakeView tv = TakeView(MoveOnlyView(buffer), 4); + Sentinel s = tv.end(); + ConstSentinel cs = s; + cs = s; // test assignment also + assert(tv.begin() + 4 == s); + assert(tv.begin() + 4 == cs); + assert(std::as_const(tv).begin() + 4 == s); + assert(std::as_const(tv).begin() + 4 == cs); } { - std::ranges::take_view tv(CopyableView{buffer}, 6); - auto sw = sentinel_wrapper(buffer + 6); - using Sent = decltype(tv.end()); - Sent sent = Sent(sw); - assert(base(sent.base()) == base(sw)); + // Test the constructor from "base-sentinel" to "sentinel". + using TakeView = std::ranges::take_view; + using Sentinel = std::ranges::sentinel_t; + sentinel_wrapper sw1 = MoveOnlyView(buffer).end(); + static_assert( std::is_constructible_v>); + static_assert(!std::is_convertible_v, Sentinel>); + auto s = Sentinel(sw1); + std::same_as> auto sw2 = s.base(); + assert(base(sw2) == base(sw1)); } return true; diff --git a/libcxx/test/std/ranges/range.adaptors/range.transform/end.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.transform/end.pass.cpp index c933e44f1a78..80b8c92bf251 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.transform/end.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.transform/end.pass.cpp @@ -25,44 +25,58 @@ #include "types.h" template -concept EndInvocable = requires(T t) { t.end(); }; - -template -concept EndIsIter = requires(T t) { ++t.end(); }; +concept HasConstQualifiedEnd = requires(const T& t) { t.end(); }; constexpr bool test() { { - std::ranges::transform_view transformView(MoveOnlyView{}, PlusOneMutable{}); - assert(transformView.end().base() == globalBuff + 8); + using TransformView = std::ranges::transform_view; + static_assert(std::ranges::common_range); + TransformView tv; + auto end = tv.end(); + ASSERT_SAME_TYPE(decltype(end.base()), std::ranges::sentinel_t); + assert(base(end.base()) == globalBuff + 8); + static_assert(!HasConstQualifiedEnd); } - { - std::ranges::transform_view transformView(ForwardView{}, PlusOneMutable{}); - assert(transformView.end().base().base() == globalBuff + 8); + using TransformView = std::ranges::transform_view; + static_assert(!std::ranges::common_range); + TransformView tv; + auto end = tv.end(); + ASSERT_SAME_TYPE(decltype(end.base()), std::ranges::sentinel_t); + assert(base(base(end.base())) == globalBuff + 8); + static_assert(!HasConstQualifiedEnd); } - { - std::ranges::transform_view transformView(InputView{}, PlusOneMutable{}); - assert(transformView.end().base() == globalBuff + 8); + using TransformView = std::ranges::transform_view; + static_assert(!std::ranges::common_range); + TransformView tv; + auto end = tv.end(); + ASSERT_SAME_TYPE(decltype(end.base()), std::ranges::sentinel_t); + assert(base(base(end.base())) == globalBuff + 8); + auto cend = std::as_const(tv).end(); + ASSERT_SAME_TYPE(decltype(cend.base()), std::ranges::sentinel_t); + assert(base(base(cend.base())) == globalBuff + 8); } - { - const std::ranges::transform_view transformView(MoveOnlyView{}, PlusOne{}); - assert(transformView.end().base() == globalBuff + 8); + using TransformView = std::ranges::transform_view; + static_assert(std::ranges::common_range); + TransformView tv; + auto end = tv.end(); + ASSERT_SAME_TYPE(decltype(end.base()), std::ranges::sentinel_t); + assert(end.base() == globalBuff + 8); + static_assert(!HasConstQualifiedEnd); + } + { + using TransformView = std::ranges::transform_view; + static_assert(std::ranges::common_range); + TransformView tv; + auto end = tv.end(); + ASSERT_SAME_TYPE(decltype(end.base()), std::ranges::sentinel_t); + assert(end.base() == globalBuff + 8); + auto cend = std::as_const(tv).end(); + ASSERT_SAME_TYPE(decltype(cend.base()), std::ranges::sentinel_t); + assert(cend.base() == globalBuff + 8); } - - static_assert(!EndInvocable>); - static_assert( EndInvocable< std::ranges::transform_view>); - static_assert( EndInvocable>); - static_assert(!EndInvocable>); - static_assert( EndInvocable< std::ranges::transform_view>); - static_assert( EndInvocable>); - - static_assert(!EndIsIter>); - static_assert(!EndIsIter< std::ranges::transform_view>); - static_assert( EndIsIter>); - static_assert( EndIsIter< std::ranges::transform_view>); - return true; } diff --git a/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/base.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/base.pass.cpp index a69739647d6d..26fc953023c8 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/base.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/base.pass.cpp @@ -17,30 +17,32 @@ #include "test_macros.h" #include "../types.h" -template -concept BaseInvocable = requires(std::ranges::iterator_t> iter) { - iter.base(); +template +concept HasBase = requires(It it) { + static_cast(it).base(); }; constexpr bool test() { { - std::ranges::transform_view transformView; - auto iter = std::move(transformView).begin(); - ASSERT_SAME_TYPE(int*, decltype(iter.base())); - assert(iter.base() == globalBuff); - ASSERT_SAME_TYPE(int*, decltype(std::move(iter).base())); - assert(std::move(iter).base() == globalBuff); + using TransformView = std::ranges::transform_view; + TransformView tv; + auto begin = tv.begin(); + ASSERT_SAME_TYPE(decltype(begin.base()), int*); + assert(begin.base() == globalBuff); + ASSERT_SAME_TYPE(decltype(std::move(begin).base()), int*); + assert(std::move(begin).base() == globalBuff); } - { - std::ranges::transform_view transformView; - auto iter = transformView.begin(); - assert(std::move(iter).base() == globalBuff); - ASSERT_SAME_TYPE(cpp20_input_iterator, decltype(std::move(iter).base())); + using TransformView = std::ranges::transform_view; + TransformView tv; + auto begin = tv.begin(); + static_assert(!HasBase); + static_assert(HasBase); + static_assert(!HasBase); + static_assert(!HasBase); + std::same_as> auto it = std::move(begin).base(); + assert(base(it) == globalBuff); } - - static_assert(!BaseInvocable); - return true; } diff --git a/libcxx/test/std/ranges/range.adaptors/range.transform/types.h b/libcxx/test/std/ranges/range.adaptors/range.transform/types.h index 3c1ed0f564a8..80ffa6a0a67d 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.transform/types.h +++ b/libcxx/test/std/ranges/range.adaptors/range.transform/types.h @@ -85,12 +85,11 @@ static_assert( std::ranges::borrowed_range); struct InputView : std::ranges::view_base { int *ptr_; constexpr explicit InputView(int* ptr = globalBuff) : ptr_(ptr) {} - constexpr cpp20_input_iterator begin() const { return cpp20_input_iterator(ptr_); } - constexpr int *end() const { return ptr_ + 8; } + constexpr auto begin() const { return cpp20_input_iterator(ptr_); } + constexpr auto end() const { return sentinel_wrapper>(cpp20_input_iterator(ptr_ + 8)); } }; -// TODO: remove these bogus operators -constexpr bool operator==(const cpp20_input_iterator &lhs, int* rhs) { return lhs.base() == rhs; } -constexpr bool operator==(int* lhs, const cpp20_input_iterator &rhs) { return rhs.base() == lhs; } +static_assert( std::ranges::view); +static_assert(!std::ranges::sized_range); struct SizedSentinelView : std::ranges::view_base { int count_; From 7056250f517a9af0e26c019180c88a4bb5e691db Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Sat, 18 Dec 2021 10:21:25 +0100 Subject: [PATCH 019/293] [libc++][NFC] Granularize Granularize the `` header Reviewed By: Quuxplusone, ldionne, #libc Spies: libcxx-commits, mgorny Differential Revision: https://reviews.llvm.org/D115578 --- libcxx/include/CMakeLists.txt | 16 + libcxx/include/__filesystem/copy_options.h | 80 + libcxx/include/__filesystem/directory_entry.h | 504 +++ .../include/__filesystem/directory_iterator.h | 150 + .../include/__filesystem/directory_options.h | 78 + libcxx/include/__filesystem/file_status.h | 68 + libcxx/include/__filesystem/file_time_type.h | 27 + libcxx/include/__filesystem/file_type.h | 39 + .../include/__filesystem/filesystem_error.h | 99 + libcxx/include/__filesystem/operations.h | 599 ++++ libcxx/include/__filesystem/path.h | 1018 ++++++ libcxx/include/__filesystem/path_iterator.h | 132 + libcxx/include/__filesystem/perm_options.h | 73 + libcxx/include/__filesystem/perms.h | 91 + .../recursive_directory_iterator.h | 181 ++ libcxx/include/__filesystem/space_info.h | 35 + libcxx/include/__filesystem/u8path.h | 96 + libcxx/include/filesystem | 2788 +---------------- libcxx/include/module.modulemap | 19 + .../filesystem/copy_options.module.verify.cpp | 15 + .../directory_entry.module.verify.cpp | 15 + .../directory_iterator.module.verify.cpp | 15 + .../directory_options.module.verify.cpp | 15 + .../filesystem/file_status.module.verify.cpp | 15 + .../file_time_type.module.verify.cpp | 15 + .../filesystem/file_type.module.verify.cpp | 15 + .../filesystem_error.module.verify.cpp | 15 + .../filesystem/operations.module.verify.cpp | 15 + .../filesystem/path.module.verify.cpp | 15 + .../path_iterator.module.verify.cpp | 15 + .../filesystem/perm_options.module.verify.cpp | 15 + .../filesystem/perms.module.verify.cpp | 15 + ...rsive_directory_iterator.module.verify.cpp | 15 + .../filesystem/space_info.module.verify.cpp | 15 + .../filesystem/u8path.module.verify.cpp | 15 + 35 files changed, 3562 insertions(+), 2771 deletions(-) create mode 100644 libcxx/include/__filesystem/copy_options.h create mode 100644 libcxx/include/__filesystem/directory_entry.h create mode 100644 libcxx/include/__filesystem/directory_iterator.h create mode 100644 libcxx/include/__filesystem/directory_options.h create mode 100644 libcxx/include/__filesystem/file_status.h create mode 100644 libcxx/include/__filesystem/file_time_type.h create mode 100644 libcxx/include/__filesystem/file_type.h create mode 100644 libcxx/include/__filesystem/filesystem_error.h create mode 100644 libcxx/include/__filesystem/operations.h create mode 100644 libcxx/include/__filesystem/path.h create mode 100644 libcxx/include/__filesystem/path_iterator.h create mode 100644 libcxx/include/__filesystem/perm_options.h create mode 100644 libcxx/include/__filesystem/perms.h create mode 100644 libcxx/include/__filesystem/recursive_directory_iterator.h create mode 100644 libcxx/include/__filesystem/space_info.h create mode 100644 libcxx/include/__filesystem/u8path.h create mode 100644 libcxx/test/libcxx/diagnostics/detail.headers/filesystem/copy_options.module.verify.cpp create mode 100644 libcxx/test/libcxx/diagnostics/detail.headers/filesystem/directory_entry.module.verify.cpp create mode 100644 libcxx/test/libcxx/diagnostics/detail.headers/filesystem/directory_iterator.module.verify.cpp create mode 100644 libcxx/test/libcxx/diagnostics/detail.headers/filesystem/directory_options.module.verify.cpp create mode 100644 libcxx/test/libcxx/diagnostics/detail.headers/filesystem/file_status.module.verify.cpp create mode 100644 libcxx/test/libcxx/diagnostics/detail.headers/filesystem/file_time_type.module.verify.cpp create mode 100644 libcxx/test/libcxx/diagnostics/detail.headers/filesystem/file_type.module.verify.cpp create mode 100644 libcxx/test/libcxx/diagnostics/detail.headers/filesystem/filesystem_error.module.verify.cpp create mode 100644 libcxx/test/libcxx/diagnostics/detail.headers/filesystem/operations.module.verify.cpp create mode 100644 libcxx/test/libcxx/diagnostics/detail.headers/filesystem/path.module.verify.cpp create mode 100644 libcxx/test/libcxx/diagnostics/detail.headers/filesystem/path_iterator.module.verify.cpp create mode 100644 libcxx/test/libcxx/diagnostics/detail.headers/filesystem/perm_options.module.verify.cpp create mode 100644 libcxx/test/libcxx/diagnostics/detail.headers/filesystem/perms.module.verify.cpp create mode 100644 libcxx/test/libcxx/diagnostics/detail.headers/filesystem/recursive_directory_iterator.module.verify.cpp create mode 100644 libcxx/test/libcxx/diagnostics/detail.headers/filesystem/space_info.module.verify.cpp create mode 100644 libcxx/test/libcxx/diagnostics/detail.headers/filesystem/u8path.module.verify.cpp diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index f7e459b59179..76eed6f64d3b 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -141,6 +141,22 @@ set(files __coroutine/trivial_awaitables.h __debug __errc + __filesystem/copy_options.h + __filesystem/directory_entry.h + __filesystem/directory_iterator.h + __filesystem/directory_options.h + __filesystem/file_status.h + __filesystem/file_time_type.h + __filesystem/file_type.h + __filesystem/filesystem_error.h + __filesystem/operations.h + __filesystem/path_iterator.h + __filesystem/path.h + __filesystem/perm_options.h + __filesystem/perms.h + __filesystem/recursive_directory_iterator.h + __filesystem/space_info.h + __filesystem/u8path.h __format/format_arg.h __format/format_args.h __format/format_context.h diff --git a/libcxx/include/__filesystem/copy_options.h b/libcxx/include/__filesystem/copy_options.h new file mode 100644 index 000000000000..c0140d45717b --- /dev/null +++ b/libcxx/include/__filesystem/copy_options.h @@ -0,0 +1,80 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_COPY_OPTIONS_H +#define _LIBCPP___FILESYSTEM_COPY_OPTIONS_H + +#include <__availability> +#include <__config> + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +enum class _LIBCPP_ENUM_VIS copy_options : unsigned short { + none = 0, + skip_existing = 1, + overwrite_existing = 2, + update_existing = 4, + recursive = 8, + copy_symlinks = 16, + skip_symlinks = 32, + directories_only = 64, + create_symlinks = 128, + create_hard_links = 256, + __in_recursive_copy = 512, +}; + +_LIBCPP_INLINE_VISIBILITY +inline constexpr copy_options operator&(copy_options _LHS, copy_options _RHS) { + return static_cast(static_cast(_LHS) & + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr copy_options operator|(copy_options _LHS, copy_options _RHS) { + return static_cast(static_cast(_LHS) | + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr copy_options operator^(copy_options _LHS, copy_options _RHS) { + return static_cast(static_cast(_LHS) ^ + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr copy_options operator~(copy_options _LHS) { + return static_cast(~static_cast(_LHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline copy_options& operator&=(copy_options& _LHS, copy_options _RHS) { + return _LHS = _LHS & _RHS; +} + +_LIBCPP_INLINE_VISIBILITY +inline copy_options& operator|=(copy_options& _LHS, copy_options _RHS) { + return _LHS = _LHS | _RHS; +} + +_LIBCPP_INLINE_VISIBILITY +inline copy_options& operator^=(copy_options& _LHS, copy_options _RHS) { + return _LHS = _LHS ^ _RHS; +} + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_COPY_OPTIONS_H diff --git a/libcxx/include/__filesystem/directory_entry.h b/libcxx/include/__filesystem/directory_entry.h new file mode 100644 index 000000000000..9efe19465428 --- /dev/null +++ b/libcxx/include/__filesystem/directory_entry.h @@ -0,0 +1,504 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_DIRECTORY_ENTRY_H +#define _LIBCPP___FILESYSTEM_DIRECTORY_ENTRY_H + +#include <__availability> +#include <__config> +#include <__filesystem/path.h> +#include <__filesystem/file_time_type.h> +#include <__filesystem/filesystem_error.h> +#include <__filesystem/file_status.h> +#include <__filesystem/file_type.h> +#include <__filesystem/operations.h> +#include <__filesystem/perms.h> +#include <__errc> +#include +#include +#include +#include + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + + +class directory_entry { + typedef _VSTD_FS::path _Path; + +public: + // constructors and destructors + directory_entry() noexcept = default; + directory_entry(directory_entry const&) = default; + directory_entry(directory_entry&&) noexcept = default; + + _LIBCPP_INLINE_VISIBILITY + explicit directory_entry(_Path const& __p) : __p_(__p) { + error_code __ec; + __refresh(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + directory_entry(_Path const& __p, error_code& __ec) : __p_(__p) { + __refresh(&__ec); + } + + ~directory_entry() {} + + directory_entry& operator=(directory_entry const&) = default; + directory_entry& operator=(directory_entry&&) noexcept = default; + + _LIBCPP_INLINE_VISIBILITY + void assign(_Path const& __p) { + __p_ = __p; + error_code __ec; + __refresh(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + void assign(_Path const& __p, error_code& __ec) { + __p_ = __p; + __refresh(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + void replace_filename(_Path const& __p) { + __p_.replace_filename(__p); + error_code __ec; + __refresh(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + void replace_filename(_Path const& __p, error_code& __ec) { + __p_ = __p_.parent_path() / __p; + __refresh(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + void refresh() { __refresh(); } + + _LIBCPP_INLINE_VISIBILITY + void refresh(error_code& __ec) noexcept { __refresh(&__ec); } + + _LIBCPP_INLINE_VISIBILITY + _Path const& path() const noexcept { return __p_; } + + _LIBCPP_INLINE_VISIBILITY + operator const _Path&() const noexcept { return __p_; } + + _LIBCPP_INLINE_VISIBILITY + bool exists() const { return _VSTD_FS::exists(file_status{__get_ft()}); } + + _LIBCPP_INLINE_VISIBILITY + bool exists(error_code& __ec) const noexcept { + return _VSTD_FS::exists(file_status{__get_ft(&__ec)}); + } + + _LIBCPP_INLINE_VISIBILITY + bool is_block_file() const { return __get_ft() == file_type::block; } + + _LIBCPP_INLINE_VISIBILITY + bool is_block_file(error_code& __ec) const noexcept { + return __get_ft(&__ec) == file_type::block; + } + + _LIBCPP_INLINE_VISIBILITY + bool is_character_file() const { return __get_ft() == file_type::character; } + + _LIBCPP_INLINE_VISIBILITY + bool is_character_file(error_code& __ec) const noexcept { + return __get_ft(&__ec) == file_type::character; + } + + _LIBCPP_INLINE_VISIBILITY + bool is_directory() const { return __get_ft() == file_type::directory; } + + _LIBCPP_INLINE_VISIBILITY + bool is_directory(error_code& __ec) const noexcept { + return __get_ft(&__ec) == file_type::directory; + } + + _LIBCPP_INLINE_VISIBILITY + bool is_fifo() const { return __get_ft() == file_type::fifo; } + + _LIBCPP_INLINE_VISIBILITY + bool is_fifo(error_code& __ec) const noexcept { + return __get_ft(&__ec) == file_type::fifo; + } + + _LIBCPP_INLINE_VISIBILITY + bool is_other() const { return _VSTD_FS::is_other(file_status{__get_ft()}); } + + _LIBCPP_INLINE_VISIBILITY + bool is_other(error_code& __ec) const noexcept { + return _VSTD_FS::is_other(file_status{__get_ft(&__ec)}); + } + + _LIBCPP_INLINE_VISIBILITY + bool is_regular_file() const { return __get_ft() == file_type::regular; } + + _LIBCPP_INLINE_VISIBILITY + bool is_regular_file(error_code& __ec) const noexcept { + return __get_ft(&__ec) == file_type::regular; + } + + _LIBCPP_INLINE_VISIBILITY + bool is_socket() const { return __get_ft() == file_type::socket; } + + _LIBCPP_INLINE_VISIBILITY + bool is_socket(error_code& __ec) const noexcept { + return __get_ft(&__ec) == file_type::socket; + } + + _LIBCPP_INLINE_VISIBILITY + bool is_symlink() const { return __get_sym_ft() == file_type::symlink; } + + _LIBCPP_INLINE_VISIBILITY + bool is_symlink(error_code& __ec) const noexcept { + return __get_sym_ft(&__ec) == file_type::symlink; + } + _LIBCPP_INLINE_VISIBILITY + uintmax_t file_size() const { return __get_size(); } + + _LIBCPP_INLINE_VISIBILITY + uintmax_t file_size(error_code& __ec) const noexcept { + return __get_size(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + uintmax_t hard_link_count() const { return __get_nlink(); } + + _LIBCPP_INLINE_VISIBILITY + uintmax_t hard_link_count(error_code& __ec) const noexcept { + return __get_nlink(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + file_time_type last_write_time() const { return __get_write_time(); } + + _LIBCPP_INLINE_VISIBILITY + file_time_type last_write_time(error_code& __ec) const noexcept { + return __get_write_time(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + file_status status() const { return __get_status(); } + + _LIBCPP_INLINE_VISIBILITY + file_status status(error_code& __ec) const noexcept { + return __get_status(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + file_status symlink_status() const { return __get_symlink_status(); } + + _LIBCPP_INLINE_VISIBILITY + file_status symlink_status(error_code& __ec) const noexcept { + return __get_symlink_status(&__ec); + } + + _LIBCPP_INLINE_VISIBILITY + bool operator<(directory_entry const& __rhs) const noexcept { + return __p_ < __rhs.__p_; + } + + _LIBCPP_INLINE_VISIBILITY + bool operator==(directory_entry const& __rhs) const noexcept { + return __p_ == __rhs.__p_; + } + + _LIBCPP_INLINE_VISIBILITY + bool operator!=(directory_entry const& __rhs) const noexcept { + return __p_ != __rhs.__p_; + } + + _LIBCPP_INLINE_VISIBILITY + bool operator<=(directory_entry const& __rhs) const noexcept { + return __p_ <= __rhs.__p_; + } + + _LIBCPP_INLINE_VISIBILITY + bool operator>(directory_entry const& __rhs) const noexcept { + return __p_ > __rhs.__p_; + } + + _LIBCPP_INLINE_VISIBILITY + bool operator>=(directory_entry const& __rhs) const noexcept { + return __p_ >= __rhs.__p_; + } + +private: + friend class directory_iterator; + friend class recursive_directory_iterator; + friend class __dir_stream; + + enum _CacheType : unsigned char { + _Empty, + _IterSymlink, + _IterNonSymlink, + _RefreshSymlink, + _RefreshSymlinkUnresolved, + _RefreshNonSymlink + }; + + struct __cached_data { + uintmax_t __size_; + uintmax_t __nlink_; + file_time_type __write_time_; + perms __sym_perms_; + perms __non_sym_perms_; + file_type __type_; + _CacheType __cache_type_; + + _LIBCPP_INLINE_VISIBILITY + __cached_data() noexcept { __reset(); } + + _LIBCPP_INLINE_VISIBILITY + void __reset() { + __cache_type_ = _Empty; + __type_ = file_type::none; + __sym_perms_ = __non_sym_perms_ = perms::unknown; + __size_ = __nlink_ = uintmax_t(-1); + __write_time_ = file_time_type::min(); + } + }; + + _LIBCPP_INLINE_VISIBILITY + static __cached_data __create_iter_result(file_type __ft) { + __cached_data __data; + __data.__type_ = __ft; + __data.__cache_type_ = [&]() { + switch (__ft) { + case file_type::none: + return _Empty; + case file_type::symlink: + return _IterSymlink; + default: + return _IterNonSymlink; + } + }(); + return __data; + } + + _LIBCPP_INLINE_VISIBILITY + void __assign_iter_entry(_Path&& __p, __cached_data __dt) { + __p_ = _VSTD::move(__p); + __data_ = __dt; + } + + _LIBCPP_FUNC_VIS + error_code __do_refresh() noexcept; + + _LIBCPP_INLINE_VISIBILITY + static bool __is_dne_error(error_code const& __ec) { + if (!__ec) + return true; + switch (static_cast(__ec.value())) { + case errc::no_such_file_or_directory: + case errc::not_a_directory: + return true; + default: + return false; + } + } + + _LIBCPP_INLINE_VISIBILITY + void __handle_error(const char* __msg, error_code* __dest_ec, + error_code const& __ec, bool __allow_dne = false) const { + if (__dest_ec) { + *__dest_ec = __ec; + return; + } + if (__ec && (!__allow_dne || !__is_dne_error(__ec))) + __throw_filesystem_error(__msg, __p_, __ec); + } + + _LIBCPP_INLINE_VISIBILITY + void __refresh(error_code* __ec = nullptr) { + __handle_error("in directory_entry::refresh", __ec, __do_refresh(), + /*allow_dne*/ true); + } + + _LIBCPP_INLINE_VISIBILITY + file_type __get_sym_ft(error_code* __ec = nullptr) const { + switch (__data_.__cache_type_) { + case _Empty: + return __symlink_status(__p_, __ec).type(); + case _IterSymlink: + case _RefreshSymlink: + case _RefreshSymlinkUnresolved: + if (__ec) + __ec->clear(); + return file_type::symlink; + case _IterNonSymlink: + case _RefreshNonSymlink: + file_status __st(__data_.__type_); + if (__ec && !_VSTD_FS::exists(__st)) + *__ec = make_error_code(errc::no_such_file_or_directory); + else if (__ec) + __ec->clear(); + return __data_.__type_; + } + _LIBCPP_UNREACHABLE(); + } + + _LIBCPP_INLINE_VISIBILITY + file_type __get_ft(error_code* __ec = nullptr) const { + switch (__data_.__cache_type_) { + case _Empty: + case _IterSymlink: + case _RefreshSymlinkUnresolved: + return __status(__p_, __ec).type(); + case _IterNonSymlink: + case _RefreshNonSymlink: + case _RefreshSymlink: { + file_status __st(__data_.__type_); + if (__ec && !_VSTD_FS::exists(__st)) + *__ec = make_error_code(errc::no_such_file_or_directory); + else if (__ec) + __ec->clear(); + return __data_.__type_; + } + } + _LIBCPP_UNREACHABLE(); + } + + _LIBCPP_INLINE_VISIBILITY + file_status __get_status(error_code* __ec = nullptr) const { + switch (__data_.__cache_type_) { + case _Empty: + case _IterNonSymlink: + case _IterSymlink: + case _RefreshSymlinkUnresolved: + return __status(__p_, __ec); + case _RefreshNonSymlink: + case _RefreshSymlink: + return file_status(__get_ft(__ec), __data_.__non_sym_perms_); + } + _LIBCPP_UNREACHABLE(); + } + + _LIBCPP_INLINE_VISIBILITY + file_status __get_symlink_status(error_code* __ec = nullptr) const { + switch (__data_.__cache_type_) { + case _Empty: + case _IterNonSymlink: + case _IterSymlink: + return __symlink_status(__p_, __ec); + case _RefreshNonSymlink: + return file_status(__get_sym_ft(__ec), __data_.__non_sym_perms_); + case _RefreshSymlink: + case _RefreshSymlinkUnresolved: + return file_status(__get_sym_ft(__ec), __data_.__sym_perms_); + } + _LIBCPP_UNREACHABLE(); + } + + _LIBCPP_INLINE_VISIBILITY + uintmax_t __get_size(error_code* __ec = nullptr) const { + switch (__data_.__cache_type_) { + case _Empty: + case _IterNonSymlink: + case _IterSymlink: + case _RefreshSymlinkUnresolved: + return _VSTD_FS::__file_size(__p_, __ec); + case _RefreshSymlink: + case _RefreshNonSymlink: { + error_code __m_ec; + file_status __st(__get_ft(&__m_ec)); + __handle_error("in directory_entry::file_size", __ec, __m_ec); + if (_VSTD_FS::exists(__st) && !_VSTD_FS::is_regular_file(__st)) { + errc __err_kind = _VSTD_FS::is_directory(__st) ? errc::is_a_directory + : errc::not_supported; + __handle_error("in directory_entry::file_size", __ec, + make_error_code(__err_kind)); + } + return __data_.__size_; + } + } + _LIBCPP_UNREACHABLE(); + } + + _LIBCPP_INLINE_VISIBILITY + uintmax_t __get_nlink(error_code* __ec = nullptr) const { + switch (__data_.__cache_type_) { + case _Empty: + case _IterNonSymlink: + case _IterSymlink: + case _RefreshSymlinkUnresolved: + return _VSTD_FS::__hard_link_count(__p_, __ec); + case _RefreshSymlink: + case _RefreshNonSymlink: { + error_code __m_ec; + (void)__get_ft(&__m_ec); + __handle_error("in directory_entry::hard_link_count", __ec, __m_ec); + return __data_.__nlink_; + } + } + _LIBCPP_UNREACHABLE(); + } + + _LIBCPP_INLINE_VISIBILITY + file_time_type __get_write_time(error_code* __ec = nullptr) const { + switch (__data_.__cache_type_) { + case _Empty: + case _IterNonSymlink: + case _IterSymlink: + case _RefreshSymlinkUnresolved: + return _VSTD_FS::__last_write_time(__p_, __ec); + case _RefreshSymlink: + case _RefreshNonSymlink: { + error_code __m_ec; + file_status __st(__get_ft(&__m_ec)); + __handle_error("in directory_entry::last_write_time", __ec, __m_ec); + if (_VSTD_FS::exists(__st) && + __data_.__write_time_ == file_time_type::min()) + __handle_error("in directory_entry::last_write_time", __ec, + make_error_code(errc::value_too_large)); + return __data_.__write_time_; + } + } + _LIBCPP_UNREACHABLE(); + } + +private: + _Path __p_; + __cached_data __data_; +}; + +class __dir_element_proxy { +public: + inline _LIBCPP_INLINE_VISIBILITY directory_entry operator*() { + return _VSTD::move(__elem_); + } + +private: + friend class directory_iterator; + friend class recursive_directory_iterator; + explicit __dir_element_proxy(directory_entry const& __e) : __elem_(__e) {} + __dir_element_proxy(__dir_element_proxy&& __o) + : __elem_(_VSTD::move(__o.__elem_)) {} + directory_entry __elem_; +}; + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___FILESYSTEM_DIRECTORY_ENTRY_H diff --git a/libcxx/include/__filesystem/directory_iterator.h b/libcxx/include/__filesystem/directory_iterator.h new file mode 100644 index 000000000000..be958e0eb8de --- /dev/null +++ b/libcxx/include/__filesystem/directory_iterator.h @@ -0,0 +1,150 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_DIRECTORY_ITERATOR_H +#define _LIBCPP___FILESYSTEM_DIRECTORY_ITERATOR_H + +#include <__availability> +#include <__config> +#include <__filesystem/directory_entry.h> +#include <__filesystem/directory_options.h> +#include <__filesystem/path.h> +#include <__iterator/iterator_traits.h> +#include <__memory/shared_ptr.h> +#include <__debug> +#include <__ranges/enable_borrowed_range.h> +#include <__ranges/enable_view.h> +#include +#include + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +class _LIBCPP_HIDDEN __dir_stream; +class directory_iterator { +public: + typedef directory_entry value_type; + typedef ptrdiff_t difference_type; + typedef value_type const* pointer; + typedef value_type const& reference; + typedef input_iterator_tag iterator_category; + +public: + //ctor & dtor + directory_iterator() noexcept {} + + explicit directory_iterator(const path& __p) + : directory_iterator(__p, nullptr) {} + + directory_iterator(const path& __p, directory_options __opts) + : directory_iterator(__p, nullptr, __opts) {} + + directory_iterator(const path& __p, error_code& __ec) + : directory_iterator(__p, &__ec) {} + + directory_iterator(const path& __p, directory_options __opts, + error_code& __ec) + : directory_iterator(__p, &__ec, __opts) {} + + directory_iterator(const directory_iterator&) = default; + directory_iterator(directory_iterator&&) = default; + directory_iterator& operator=(const directory_iterator&) = default; + + directory_iterator& operator=(directory_iterator&& __o) noexcept { + // non-default implementation provided to support self-move assign. + if (this != &__o) { + __imp_ = _VSTD::move(__o.__imp_); + } + return *this; + } + + ~directory_iterator() = default; + + const directory_entry& operator*() const { + _LIBCPP_ASSERT(__imp_, "The end iterator cannot be dereferenced"); + return __dereference(); + } + + const directory_entry* operator->() const { return &**this; } + + directory_iterator& operator++() { return __increment(); } + + __dir_element_proxy operator++(int) { + __dir_element_proxy __p(**this); + __increment(); + return __p; + } + + directory_iterator& increment(error_code& __ec) { return __increment(&__ec); } + +private: + inline _LIBCPP_INLINE_VISIBILITY friend bool + operator==(const directory_iterator& __lhs, + const directory_iterator& __rhs) noexcept; + + // construct the dir_stream + _LIBCPP_FUNC_VIS + directory_iterator(const path&, error_code*, + directory_options = directory_options::none); + + _LIBCPP_FUNC_VIS + directory_iterator& __increment(error_code* __ec = nullptr); + + _LIBCPP_FUNC_VIS + const directory_entry& __dereference() const; + +private: + shared_ptr<__dir_stream> __imp_; +}; + +inline _LIBCPP_INLINE_VISIBILITY bool +operator==(const directory_iterator& __lhs, + const directory_iterator& __rhs) noexcept { + return __lhs.__imp_ == __rhs.__imp_; +} + +inline _LIBCPP_INLINE_VISIBILITY bool +operator!=(const directory_iterator& __lhs, + const directory_iterator& __rhs) noexcept { + return !(__lhs == __rhs); +} + +// enable directory_iterator range-based for statements +inline _LIBCPP_INLINE_VISIBILITY directory_iterator +begin(directory_iterator __iter) noexcept { + return __iter; +} + +inline _LIBCPP_INLINE_VISIBILITY directory_iterator +end(directory_iterator) noexcept { + return directory_iterator(); +} + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#if !defined(_LIBCPP_HAS_NO_RANGES) + +template <> +_LIBCPP_AVAILABILITY_FILESYSTEM +inline constexpr bool _VSTD::ranges::enable_borrowed_range<_VSTD_FS::directory_iterator> = true; + +template <> +_LIBCPP_AVAILABILITY_FILESYSTEM +inline constexpr bool _VSTD::ranges::enable_view<_VSTD_FS::directory_iterator> = true; + +#endif + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_DIRECTORY_ITERATOR_H diff --git a/libcxx/include/__filesystem/directory_options.h b/libcxx/include/__filesystem/directory_options.h new file mode 100644 index 000000000000..79c0c2cbaa55 --- /dev/null +++ b/libcxx/include/__filesystem/directory_options.h @@ -0,0 +1,78 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_DIRECTORY_OPTIONS_H +#define _LIBCPP___FILESYSTEM_DIRECTORY_OPTIONS_H + +#include <__availability> +#include <__config> + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +enum class _LIBCPP_ENUM_VIS directory_options : unsigned char { + none = 0, + follow_directory_symlink = 1, + skip_permission_denied = 2 +}; + +_LIBCPP_INLINE_VISIBILITY +inline constexpr directory_options operator&(directory_options _LHS, + directory_options _RHS) { + return static_cast(static_cast(_LHS) & + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr directory_options operator|(directory_options _LHS, + directory_options _RHS) { + return static_cast(static_cast(_LHS) | + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr directory_options operator^(directory_options _LHS, + directory_options _RHS) { + return static_cast(static_cast(_LHS) ^ + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr directory_options operator~(directory_options _LHS) { + return static_cast(~static_cast(_LHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline directory_options& operator&=(directory_options& _LHS, + directory_options _RHS) { + return _LHS = _LHS & _RHS; +} + +_LIBCPP_INLINE_VISIBILITY +inline directory_options& operator|=(directory_options& _LHS, + directory_options _RHS) { + return _LHS = _LHS | _RHS; +} + +_LIBCPP_INLINE_VISIBILITY +inline directory_options& operator^=(directory_options& _LHS, + directory_options _RHS) { + return _LHS = _LHS ^ _RHS; +} + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_DIRECTORY_OPTIONS_H diff --git a/libcxx/include/__filesystem/file_status.h b/libcxx/include/__filesystem/file_status.h new file mode 100644 index 000000000000..a8f653ab44fc --- /dev/null +++ b/libcxx/include/__filesystem/file_status.h @@ -0,0 +1,68 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_FILE_STATUS_H +#define _LIBCPP___FILESYSTEM_FILE_STATUS_H + +#include <__availability> +#include <__config> +#include <__filesystem/file_type.h> +#include <__filesystem/perms.h> + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +class _LIBCPP_TYPE_VIS file_status { +public: + // constructors + _LIBCPP_INLINE_VISIBILITY + file_status() noexcept : file_status(file_type::none) {} + _LIBCPP_INLINE_VISIBILITY + explicit file_status(file_type __ft, perms __prms = perms::unknown) noexcept + : __ft_(__ft), + __prms_(__prms) {} + + file_status(const file_status&) noexcept = default; + file_status(file_status&&) noexcept = default; + + _LIBCPP_INLINE_VISIBILITY + ~file_status() {} + + file_status& operator=(const file_status&) noexcept = default; + file_status& operator=(file_status&&) noexcept = default; + + // observers + _LIBCPP_INLINE_VISIBILITY + file_type type() const noexcept { return __ft_; } + + _LIBCPP_INLINE_VISIBILITY + perms permissions() const noexcept { return __prms_; } + + // modifiers + _LIBCPP_INLINE_VISIBILITY + void type(file_type __ft) noexcept { __ft_ = __ft; } + + _LIBCPP_INLINE_VISIBILITY + void permissions(perms __p) noexcept { __prms_ = __p; } + +private: + file_type __ft_; + perms __prms_; +}; + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_FILE_STATUS_H diff --git a/libcxx/include/__filesystem/file_time_type.h b/libcxx/include/__filesystem/file_time_type.h new file mode 100644 index 000000000000..590146a06600 --- /dev/null +++ b/libcxx/include/__filesystem/file_time_type.h @@ -0,0 +1,27 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_FILE_TIME_TYPE_H +#define _LIBCPP___FILESYSTEM_FILE_TIME_TYPE_H + +#include <__availability> +#include <__config> +#include + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +typedef chrono::time_point<_FilesystemClock> file_time_type; + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_FILE_TIME_TYPE_H diff --git a/libcxx/include/__filesystem/file_type.h b/libcxx/include/__filesystem/file_type.h new file mode 100644 index 000000000000..93bee86ad635 --- /dev/null +++ b/libcxx/include/__filesystem/file_type.h @@ -0,0 +1,39 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_FILE_TYPE_H +#define _LIBCPP___FILESYSTEM_FILE_TYPE_H + +#include <__availability> +#include <__config> + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +// On Windows, the library never identifies files as block, character, fifo +// or socket. +enum class _LIBCPP_ENUM_VIS file_type : signed char { + none = 0, + not_found = -1, + regular = 1, + directory = 2, + symlink = 3, + block = 4, + character = 5, + fifo = 6, + socket = 7, + unknown = 8 +}; + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_FILE_TYPE_H diff --git a/libcxx/include/__filesystem/filesystem_error.h b/libcxx/include/__filesystem/filesystem_error.h new file mode 100644 index 000000000000..a8c6977b48e9 --- /dev/null +++ b/libcxx/include/__filesystem/filesystem_error.h @@ -0,0 +1,99 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_FILESYSTEM_ERROR_H +#define _LIBCPP___FILESYSTEM_FILESYSTEM_ERROR_H + +#include <__availability> +#include <__config> +#include <__filesystem/path.h> +#include <__memory/shared_ptr.h> +#include +#include +#include +#include + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +class _LIBCPP_AVAILABILITY_FILESYSTEM _LIBCPP_EXCEPTION_ABI filesystem_error : public system_error { +public: + _LIBCPP_INLINE_VISIBILITY + filesystem_error(const string& __what, error_code __ec) + : system_error(__ec, __what), + __storage_(make_shared<_Storage>(path(), path())) { + __create_what(0); + } + + _LIBCPP_INLINE_VISIBILITY + filesystem_error(const string& __what, const path& __p1, error_code __ec) + : system_error(__ec, __what), + __storage_(make_shared<_Storage>(__p1, path())) { + __create_what(1); + } + + _LIBCPP_INLINE_VISIBILITY + filesystem_error(const string& __what, const path& __p1, const path& __p2, + error_code __ec) + : system_error(__ec, __what), + __storage_(make_shared<_Storage>(__p1, __p2)) { + __create_what(2); + } + + _LIBCPP_INLINE_VISIBILITY + const path& path1() const noexcept { return __storage_->__p1_; } + + _LIBCPP_INLINE_VISIBILITY + const path& path2() const noexcept { return __storage_->__p2_; } + + filesystem_error(const filesystem_error&) = default; + ~filesystem_error() override; // key function + + _LIBCPP_INLINE_VISIBILITY + const char* what() const noexcept override { + return __storage_->__what_.c_str(); + } + + void __create_what(int __num_paths); + +private: + struct _LIBCPP_HIDDEN _Storage { + _LIBCPP_INLINE_VISIBILITY + _Storage(const path& __p1, const path& __p2) : __p1_(__p1), __p2_(__p2) {} + + path __p1_; + path __p2_; + string __what_; + }; + shared_ptr<_Storage> __storage_; +}; + +// TODO(ldionne): We need to pop the pragma and push it again after +// filesystem_error to work around PR41078. +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +template +_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY +#ifndef _LIBCPP_NO_EXCEPTIONS +void __throw_filesystem_error(_Args&&... __args) { + throw filesystem_error(_VSTD::forward<_Args>(__args)...); +} +#else +void __throw_filesystem_error(_Args&&...) { + _VSTD::abort(); +} +#endif +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_FILESYSTEM_ERROR_H diff --git a/libcxx/include/__filesystem/operations.h b/libcxx/include/__filesystem/operations.h new file mode 100644 index 000000000000..19d6c2d437f9 --- /dev/null +++ b/libcxx/include/__filesystem/operations.h @@ -0,0 +1,599 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_OPERATIONS_H +#define _LIBCPP___FILESYSTEM_OPERATIONS_H + +#include <__availability> +#include <__config> +#include <__filesystem/copy_options.h> +#include <__filesystem/file_status.h> +#include <__filesystem/file_time_type.h> +#include <__filesystem/file_type.h> +#include <__filesystem/path.h> +#include <__filesystem/perm_options.h> +#include <__filesystem/perms.h> +#include <__filesystem/space_info.h> +#include +#include +#include + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +_LIBCPP_FUNC_VIS +path __absolute(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +path __canonical(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +bool __copy_file(const path& __from, const path& __to, copy_options __opt, + error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +void __copy_symlink(const path& __existing_symlink, const path& __new_symlink, + error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +void __copy(const path& __from, const path& __to, copy_options __opt, + error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +bool __create_directories(const path& p, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +void __create_directory_symlink(const path& __to, const path& __new_symlink, + error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +bool __create_directory(const path& p, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +bool __create_directory(const path& p, const path& attributes, + error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +void __create_hard_link(const path& __to, const path& __new_hard_link, + error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +void __create_symlink(const path& __to, const path& __new_symlink, + error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +path __current_path(error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +void __current_path(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +bool __equivalent(const path&, const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +file_status __status(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +uintmax_t __file_size(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +uintmax_t __hard_link_count(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +file_status __symlink_status(const path&, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +file_time_type __last_write_time(const path& p, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +void __last_write_time(const path& p, file_time_type new_time, + error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +path __weakly_canonical(path const& __p, error_code* __ec = nullptr); +_LIBCPP_FUNC_VIS +path __read_symlink(const path& p, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +uintmax_t __remove_all(const path& p, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +bool __remove(const path& p, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +void __rename(const path& from, const path& to, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +void __resize_file(const path& p, uintmax_t size, error_code* ec = nullptr); +_LIBCPP_FUNC_VIS +path __temp_directory_path(error_code* __ec = nullptr); + +inline _LIBCPP_INLINE_VISIBILITY path absolute(const path& __p) { + return __absolute(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY path absolute(const path& __p, + error_code& __ec) { + return __absolute(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path canonical(const path& __p) { + return __canonical(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY path canonical(const path& __p, + error_code& __ec) { + return __canonical(__p, &__ec); +} + + +inline _LIBCPP_INLINE_VISIBILITY bool copy_file(const path& __from, + const path& __to) { + return __copy_file(__from, __to, copy_options::none); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +copy_file(const path& __from, const path& __to, error_code& __ec) { + return __copy_file(__from, __to, copy_options::none, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +copy_file(const path& __from, const path& __to, copy_options __opt) { + return __copy_file(__from, __to, __opt); +} + +inline _LIBCPP_INLINE_VISIBILITY bool copy_file(const path& __from, + const path& __to, + copy_options __opt, + error_code& __ec) { + return __copy_file(__from, __to, __opt, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void copy_symlink(const path& __existing, + const path& __new) { + __copy_symlink(__existing, __new); +} + +inline _LIBCPP_INLINE_VISIBILITY void +copy_symlink(const path& __ext, const path& __new, error_code& __ec) noexcept { + __copy_symlink(__ext, __new, &__ec); +} + + +inline _LIBCPP_INLINE_VISIBILITY void copy(const path& __from, + const path& __to) { + __copy(__from, __to, copy_options::none); +} + +inline _LIBCPP_INLINE_VISIBILITY void copy(const path& __from, const path& __to, + error_code& __ec) { + __copy(__from, __to, copy_options::none, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void copy(const path& __from, const path& __to, + copy_options __opt) { + __copy(__from, __to, __opt); +} + +inline _LIBCPP_INLINE_VISIBILITY void copy(const path& __from, const path& __to, + copy_options __opt, + error_code& __ec) { + __copy(__from, __to, __opt, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool create_directories(const path& __p) { + return __create_directories(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY bool create_directories(const path& __p, + error_code& __ec) { + return __create_directories(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void +create_directory_symlink(const path& __to, const path& __new) { + __create_directory_symlink(__to, __new); +} + +inline _LIBCPP_INLINE_VISIBILITY void +create_directory_symlink(const path& __to, const path& __new, + error_code& __ec) noexcept { + __create_directory_symlink(__to, __new, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool create_directory(const path& __p) { + return __create_directory(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +create_directory(const path& __p, error_code& __ec) noexcept { + return __create_directory(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool create_directory(const path& __p, + const path& __attrs) { + return __create_directory(__p, __attrs); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +create_directory(const path& __p, const path& __attrs, + error_code& __ec) noexcept { + return __create_directory(__p, __attrs, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void create_hard_link(const path& __to, + const path& __new) { + __create_hard_link(__to, __new); +} + +inline _LIBCPP_INLINE_VISIBILITY void +create_hard_link(const path& __to, const path& __new, + error_code& __ec) noexcept { + __create_hard_link(__to, __new, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void create_symlink(const path& __to, + const path& __new) { + __create_symlink(__to, __new); +} + +inline _LIBCPP_INLINE_VISIBILITY void +create_symlink(const path& __to, const path& __new, error_code& __ec) noexcept { + return __create_symlink(__to, __new, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path current_path() { + return __current_path(); +} + +inline _LIBCPP_INLINE_VISIBILITY path current_path(error_code& __ec) { + return __current_path(&__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void current_path(const path& __p) { + __current_path(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY void current_path(const path& __p, + error_code& __ec) noexcept { + __current_path(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool equivalent(const path& __p1, + const path& __p2) { + return __equivalent(__p1, __p2); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +equivalent(const path& __p1, const path& __p2, error_code& __ec) noexcept { + return __equivalent(__p1, __p2, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool status_known(file_status __s) noexcept { + return __s.type() != file_type::none; +} + +inline _LIBCPP_INLINE_VISIBILITY bool exists(file_status __s) noexcept { + return status_known(__s) && __s.type() != file_type::not_found; +} + +inline _LIBCPP_INLINE_VISIBILITY bool exists(const path& __p) { + return exists(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool exists(const path& __p, + error_code& __ec) noexcept { + auto __s = __status(__p, &__ec); + if (status_known(__s)) + __ec.clear(); + return exists(__s); +} + +inline _LIBCPP_INLINE_VISIBILITY uintmax_t file_size(const path& __p) { + return __file_size(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY uintmax_t +file_size(const path& __p, error_code& __ec) noexcept { + return __file_size(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY uintmax_t hard_link_count(const path& __p) { + return __hard_link_count(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY uintmax_t +hard_link_count(const path& __p, error_code& __ec) noexcept { + return __hard_link_count(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_block_file(file_status __s) noexcept { + return __s.type() == file_type::block; +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_block_file(const path& __p) { + return is_block_file(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_block_file(const path& __p, + error_code& __ec) noexcept { + return is_block_file(__status(__p, &__ec)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +is_character_file(file_status __s) noexcept { + return __s.type() == file_type::character; +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_character_file(const path& __p) { + return is_character_file(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +is_character_file(const path& __p, error_code& __ec) noexcept { + return is_character_file(__status(__p, &__ec)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_directory(file_status __s) noexcept { + return __s.type() == file_type::directory; +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_directory(const path& __p) { + return is_directory(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_directory(const path& __p, + error_code& __ec) noexcept { + return is_directory(__status(__p, &__ec)); +} + +_LIBCPP_FUNC_VIS +bool __fs_is_empty(const path& p, error_code* ec = nullptr); + +inline _LIBCPP_INLINE_VISIBILITY bool is_empty(const path& __p) { + return __fs_is_empty(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_empty(const path& __p, + error_code& __ec) { + return __fs_is_empty(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_fifo(file_status __s) noexcept { + return __s.type() == file_type::fifo; +} +inline _LIBCPP_INLINE_VISIBILITY bool is_fifo(const path& __p) { + return is_fifo(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_fifo(const path& __p, + error_code& __ec) noexcept { + return is_fifo(__status(__p, &__ec)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +is_regular_file(file_status __s) noexcept { + return __s.type() == file_type::regular; +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_regular_file(const path& __p) { + return is_regular_file(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool +is_regular_file(const path& __p, error_code& __ec) noexcept { + return is_regular_file(__status(__p, &__ec)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_symlink(file_status __s) noexcept { + return __s.type() == file_type::symlink; +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_symlink(const path& __p) { + return is_symlink(__symlink_status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_symlink(const path& __p, + error_code& __ec) noexcept { + return is_symlink(__symlink_status(__p, &__ec)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_other(file_status __s) noexcept { + return exists(__s) && !is_regular_file(__s) && !is_directory(__s) && + !is_symlink(__s); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_other(const path& __p) { + return is_other(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_other(const path& __p, + error_code& __ec) noexcept { + return is_other(__status(__p, &__ec)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_socket(file_status __s) noexcept { + return __s.type() == file_type::socket; +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_socket(const path& __p) { + return is_socket(__status(__p)); +} + +inline _LIBCPP_INLINE_VISIBILITY bool is_socket(const path& __p, + error_code& __ec) noexcept { + return is_socket(__status(__p, &__ec)); +} + +inline _LIBCPP_INLINE_VISIBILITY file_time_type +last_write_time(const path& __p) { + return __last_write_time(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY file_time_type +last_write_time(const path& __p, error_code& __ec) noexcept { + return __last_write_time(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void last_write_time(const path& __p, + file_time_type __t) { + __last_write_time(__p, __t); +} + +inline _LIBCPP_INLINE_VISIBILITY void +last_write_time(const path& __p, file_time_type __t, + error_code& __ec) noexcept { + __last_write_time(__p, __t, &__ec); +} + +_LIBCPP_FUNC_VIS +void __permissions(const path&, perms, perm_options, error_code* = nullptr); + +inline _LIBCPP_INLINE_VISIBILITY void +permissions(const path& __p, perms __prms, + perm_options __opts = perm_options::replace) { + __permissions(__p, __prms, __opts); +} + +inline _LIBCPP_INLINE_VISIBILITY void permissions(const path& __p, perms __prms, + error_code& __ec) noexcept { + __permissions(__p, __prms, perm_options::replace, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void permissions(const path& __p, perms __prms, + perm_options __opts, + error_code& __ec) { + __permissions(__p, __prms, __opts, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path proximate(const path& __p, + const path& __base, + error_code& __ec) { + path __tmp = __weakly_canonical(__p, &__ec); + if (__ec) + return {}; + path __tmp_base = __weakly_canonical(__base, &__ec); + if (__ec) + return {}; + return __tmp.lexically_proximate(__tmp_base); +} + +inline _LIBCPP_INLINE_VISIBILITY path proximate(const path& __p, + error_code& __ec) { + return proximate(__p, current_path(), __ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path +proximate(const path& __p, const path& __base = current_path()) { + return __weakly_canonical(__p).lexically_proximate( + __weakly_canonical(__base)); +} + +inline _LIBCPP_INLINE_VISIBILITY path read_symlink(const path& __p) { + return __read_symlink(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY path read_symlink(const path& __p, + error_code& __ec) { + return __read_symlink(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path relative(const path& __p, + const path& __base, + error_code& __ec) { + path __tmp = __weakly_canonical(__p, &__ec); + if (__ec) + return path(); + path __tmpbase = __weakly_canonical(__base, &__ec); + if (__ec) + return path(); + return __tmp.lexically_relative(__tmpbase); +} + +inline _LIBCPP_INLINE_VISIBILITY path relative(const path& __p, + error_code& __ec) { + return relative(__p, current_path(), __ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path +relative(const path& __p, const path& __base = current_path()) { + return __weakly_canonical(__p).lexically_relative(__weakly_canonical(__base)); +} + +inline _LIBCPP_INLINE_VISIBILITY uintmax_t remove_all(const path& __p) { + return __remove_all(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY uintmax_t remove_all(const path& __p, + error_code& __ec) { + return __remove_all(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY bool remove(const path& __p) { + return __remove(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY bool remove(const path& __p, + error_code& __ec) noexcept { + return __remove(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void rename(const path& __from, + const path& __to) { + return __rename(__from, __to); +} + +inline _LIBCPP_INLINE_VISIBILITY void +rename(const path& __from, const path& __to, error_code& __ec) noexcept { + return __rename(__from, __to, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY void resize_file(const path& __p, + uintmax_t __ns) { + return __resize_file(__p, __ns); +} + +inline _LIBCPP_INLINE_VISIBILITY void +resize_file(const path& __p, uintmax_t __ns, error_code& __ec) noexcept { + return __resize_file(__p, __ns, &__ec); +} + +_LIBCPP_FUNC_VIS +space_info __space(const path&, error_code* __ec = nullptr); + +inline _LIBCPP_INLINE_VISIBILITY space_info space(const path& __p) { + return __space(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY space_info space(const path& __p, + error_code& __ec) noexcept { + return __space(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY file_status status(const path& __p) { + return __status(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY file_status status(const path& __p, + error_code& __ec) noexcept { + return __status(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY file_status symlink_status(const path& __p) { + return __symlink_status(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY file_status +symlink_status(const path& __p, error_code& __ec) noexcept { + return __symlink_status(__p, &__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path temp_directory_path() { + return __temp_directory_path(); +} + +inline _LIBCPP_INLINE_VISIBILITY path temp_directory_path(error_code& __ec) { + return __temp_directory_path(&__ec); +} + +inline _LIBCPP_INLINE_VISIBILITY path weakly_canonical(path const& __p) { + return __weakly_canonical(__p); +} + +inline _LIBCPP_INLINE_VISIBILITY path weakly_canonical(path const& __p, + error_code& __ec) { + return __weakly_canonical(__p, &__ec); +} + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_OPERATIONS_H diff --git a/libcxx/include/__filesystem/path.h b/libcxx/include/__filesystem/path.h new file mode 100644 index 000000000000..a6d1ee997d91 --- /dev/null +++ b/libcxx/include/__filesystem/path.h @@ -0,0 +1,1018 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_PATH_H +#define _LIBCPP___FILESYSTEM_PATH_H + +#include <__availability> +#include <__config> +#include +#include +#include <__iterator/back_insert_iterator.h> +#include <__iterator/iterator_traits.h> +#include +#include + +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +# include // for quoted +#endif + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +template +struct __can_convert_char { + static const bool value = false; +}; +template +struct __can_convert_char : public __can_convert_char<_Tp> {}; +template <> +struct __can_convert_char { + static const bool value = true; + using __char_type = char; +}; +template <> +struct __can_convert_char { + static const bool value = true; + using __char_type = wchar_t; +}; +#ifndef _LIBCPP_HAS_NO_CHAR8_T +template <> +struct __can_convert_char { + static const bool value = true; + using __char_type = char8_t; +}; +#endif +template <> +struct __can_convert_char { + static const bool value = true; + using __char_type = char16_t; +}; +template <> +struct __can_convert_char { + static const bool value = true; + using __char_type = char32_t; +}; + +template +typename enable_if<__can_convert_char<_ECharT>::value, bool>::type +__is_separator(_ECharT __e) { +#if defined(_LIBCPP_WIN32API) + return __e == _ECharT('/') || __e == _ECharT('\\'); +#else + return __e == _ECharT('/'); +#endif +} + +#ifndef _LIBCPP_HAS_NO_CHAR8_T +typedef u8string __u8_string; +#else +typedef string __u8_string; +#endif + +struct _NullSentinel {}; + +template +using _Void = void; + +template +struct __is_pathable_string : public false_type {}; + +template +struct __is_pathable_string< + basic_string<_ECharT, _Traits, _Alloc>, + _Void::__char_type> > + : public __can_convert_char<_ECharT> { + using _Str = basic_string<_ECharT, _Traits, _Alloc>; + using _Base = __can_convert_char<_ECharT>; + static _ECharT const* __range_begin(_Str const& __s) { return __s.data(); } + static _ECharT const* __range_end(_Str const& __s) { + return __s.data() + __s.length(); + } + static _ECharT __first_or_null(_Str const& __s) { + return __s.empty() ? _ECharT{} : __s[0]; + } +}; + +template +struct __is_pathable_string< + basic_string_view<_ECharT, _Traits>, + _Void::__char_type> > + : public __can_convert_char<_ECharT> { + using _Str = basic_string_view<_ECharT, _Traits>; + using _Base = __can_convert_char<_ECharT>; + static _ECharT const* __range_begin(_Str const& __s) { return __s.data(); } + static _ECharT const* __range_end(_Str const& __s) { + return __s.data() + __s.length(); + } + static _ECharT __first_or_null(_Str const& __s) { + return __s.empty() ? _ECharT{} : __s[0]; + } +}; + +template ::type, + class _UnqualPtrType = + typename remove_const::type>::type, + bool _IsCharPtr = is_pointer<_DS>::value&& + __can_convert_char<_UnqualPtrType>::value> +struct __is_pathable_char_array : false_type {}; + +template +struct __is_pathable_char_array<_Source, _ECharT*, _UPtr, true> + : __can_convert_char::type> { + using _Base = __can_convert_char::type>; + + static _ECharT const* __range_begin(const _ECharT* __b) { return __b; } + static _ECharT const* __range_end(const _ECharT* __b) { + using _Iter = const _ECharT*; + const _ECharT __sentinel = _ECharT{}; + _Iter __e = __b; + for (; *__e != __sentinel; ++__e) + ; + return __e; + } + + static _ECharT __first_or_null(const _ECharT* __b) { return *__b; } +}; + +template ::value, + class = void> +struct __is_pathable_iter : false_type {}; + +template +struct __is_pathable_iter< + _Iter, true, + _Void::value_type>::__char_type> > + : __can_convert_char::value_type> { + using _ECharT = typename iterator_traits<_Iter>::value_type; + using _Base = __can_convert_char<_ECharT>; + + static _Iter __range_begin(_Iter __b) { return __b; } + static _NullSentinel __range_end(_Iter) { return _NullSentinel{}; } + + static _ECharT __first_or_null(_Iter __b) { return *__b; } +}; + +template ::value, + bool _IsCharIterT = __is_pathable_char_array<_Tp>::value, + bool _IsIterT = !_IsCharIterT && __is_pathable_iter<_Tp>::value> +struct __is_pathable : false_type { + static_assert(!_IsStringT && !_IsCharIterT && !_IsIterT, "Must all be false"); +}; + +template +struct __is_pathable<_Tp, true, false, false> : __is_pathable_string<_Tp> {}; + +template +struct __is_pathable<_Tp, false, true, false> : __is_pathable_char_array<_Tp> { +}; + +template +struct __is_pathable<_Tp, false, false, true> : __is_pathable_iter<_Tp> {}; + +#if defined(_LIBCPP_WIN32API) +typedef wstring __path_string; +typedef wchar_t __path_value; +#else +typedef string __path_string; +typedef char __path_value; +#endif + +#if defined(_LIBCPP_WIN32API) +_LIBCPP_FUNC_VIS +size_t __wide_to_char(const wstring&, char*, size_t); +_LIBCPP_FUNC_VIS +size_t __char_to_wide(const string&, wchar_t*, size_t); +#endif + +template +struct _PathCVT; + +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +template +struct _PathCVT { + static_assert(__can_convert_char<_ECharT>::value, + "Char type not convertible"); + + typedef __narrow_to_utf8 _Narrower; +#if defined(_LIBCPP_WIN32API) + typedef __widen_from_utf8 _Widener; +#endif + + static void __append_range(__path_string& __dest, _ECharT const* __b, + _ECharT const* __e) { +#if defined(_LIBCPP_WIN32API) + string __utf8; + _Narrower()(back_inserter(__utf8), __b, __e); + _Widener()(back_inserter(__dest), __utf8.data(), __utf8.data() + __utf8.size()); +#else + _Narrower()(back_inserter(__dest), __b, __e); +#endif + } + + template + static void __append_range(__path_string& __dest, _Iter __b, _Iter __e) { + static_assert(!is_same<_Iter, _ECharT*>::value, "Call const overload"); + if (__b == __e) + return; + basic_string<_ECharT> __tmp(__b, __e); +#if defined(_LIBCPP_WIN32API) + string __utf8; + _Narrower()(back_inserter(__utf8), __tmp.data(), + __tmp.data() + __tmp.length()); + _Widener()(back_inserter(__dest), __utf8.data(), __utf8.data() + __utf8.size()); +#else + _Narrower()(back_inserter(__dest), __tmp.data(), + __tmp.data() + __tmp.length()); +#endif + } + + template + static void __append_range(__path_string& __dest, _Iter __b, _NullSentinel) { + static_assert(!is_same<_Iter, _ECharT*>::value, "Call const overload"); + const _ECharT __sentinel = _ECharT{}; + if (*__b == __sentinel) + return; + basic_string<_ECharT> __tmp; + for (; *__b != __sentinel; ++__b) + __tmp.push_back(*__b); +#if defined(_LIBCPP_WIN32API) + string __utf8; + _Narrower()(back_inserter(__utf8), __tmp.data(), + __tmp.data() + __tmp.length()); + _Widener()(back_inserter(__dest), __utf8.data(), __utf8.data() + __utf8.size()); +#else + _Narrower()(back_inserter(__dest), __tmp.data(), + __tmp.data() + __tmp.length()); +#endif + } + + template + static void __append_source(__path_string& __dest, _Source const& __s) { + using _Traits = __is_pathable<_Source>; + __append_range(__dest, _Traits::__range_begin(__s), + _Traits::__range_end(__s)); + } +}; +#endif // !_LIBCPP_HAS_NO_LOCALIZATION + +template <> +struct _PathCVT<__path_value> { + + template + static typename enable_if<__is_exactly_cpp17_input_iterator<_Iter>::value>::type + __append_range(__path_string& __dest, _Iter __b, _Iter __e) { + for (; __b != __e; ++__b) + __dest.push_back(*__b); + } + + template + static typename enable_if<__is_cpp17_forward_iterator<_Iter>::value>::type + __append_range(__path_string& __dest, _Iter __b, _Iter __e) { + __dest.append(__b, __e); + } + + template + static void __append_range(__path_string& __dest, _Iter __b, _NullSentinel) { + const char __sentinel = char{}; + for (; *__b != __sentinel; ++__b) + __dest.push_back(*__b); + } + + template + static void __append_source(__path_string& __dest, _Source const& __s) { + using _Traits = __is_pathable<_Source>; + __append_range(__dest, _Traits::__range_begin(__s), + _Traits::__range_end(__s)); + } +}; + +#if defined(_LIBCPP_WIN32API) +template <> +struct _PathCVT { + + static void + __append_string(__path_string& __dest, const basic_string &__str) { + size_t __size = __char_to_wide(__str, nullptr, 0); + size_t __pos = __dest.size(); + __dest.resize(__pos + __size); + __char_to_wide(__str, const_cast<__path_value*>(__dest.data()) + __pos, __size); + } + + template + static typename enable_if<__is_exactly_cpp17_input_iterator<_Iter>::value>::type + __append_range(__path_string& __dest, _Iter __b, _Iter __e) { + basic_string __tmp(__b, __e); + __append_string(__dest, __tmp); + } + + template + static typename enable_if<__is_cpp17_forward_iterator<_Iter>::value>::type + __append_range(__path_string& __dest, _Iter __b, _Iter __e) { + basic_string __tmp(__b, __e); + __append_string(__dest, __tmp); + } + + template + static void __append_range(__path_string& __dest, _Iter __b, _NullSentinel) { + const char __sentinel = char{}; + basic_string __tmp; + for (; *__b != __sentinel; ++__b) + __tmp.push_back(*__b); + __append_string(__dest, __tmp); + } + + template + static void __append_source(__path_string& __dest, _Source const& __s) { + using _Traits = __is_pathable<_Source>; + __append_range(__dest, _Traits::__range_begin(__s), + _Traits::__range_end(__s)); + } +}; + +template +struct _PathExport { + typedef __narrow_to_utf8 _Narrower; + typedef __widen_from_utf8 _Widener; + + template + static void __append(_Str& __dest, const __path_string& __src) { + string __utf8; + _Narrower()(back_inserter(__utf8), __src.data(), __src.data() + __src.size()); + _Widener()(back_inserter(__dest), __utf8.data(), __utf8.data() + __utf8.size()); + } +}; + +template <> +struct _PathExport { + template + static void __append(_Str& __dest, const __path_string& __src) { + size_t __size = __wide_to_char(__src, nullptr, 0); + size_t __pos = __dest.size(); + __dest.resize(__size); + __wide_to_char(__src, const_cast(__dest.data()) + __pos, __size); + } +}; + +template <> +struct _PathExport { + template + static void __append(_Str& __dest, const __path_string& __src) { + __dest.append(__src.begin(), __src.end()); + } +}; + +template <> +struct _PathExport { + template + static void __append(_Str& __dest, const __path_string& __src) { + __dest.append(__src.begin(), __src.end()); + } +}; + +#ifndef _LIBCPP_HAS_NO_CHAR8_T +template <> +struct _PathExport { + typedef __narrow_to_utf8 _Narrower; + + template + static void __append(_Str& __dest, const __path_string& __src) { + _Narrower()(back_inserter(__dest), __src.data(), __src.data() + __src.size()); + } +}; +#endif /* !_LIBCPP_HAS_NO_CHAR8_T */ +#endif /* _LIBCPP_WIN32API */ + +class _LIBCPP_TYPE_VIS path { + template + using _EnableIfPathable = + typename enable_if<__is_pathable<_SourceOrIter>::value, _Tp>::type; + + template + using _SourceChar = typename __is_pathable<_Tp>::__char_type; + + template + using _SourceCVT = _PathCVT<_SourceChar<_Tp> >; + +public: +#if defined(_LIBCPP_WIN32API) + typedef wchar_t value_type; + static constexpr value_type preferred_separator = L'\\'; +#else + typedef char value_type; + static constexpr value_type preferred_separator = '/'; +#endif + typedef basic_string string_type; + typedef basic_string_view __string_view; + + enum _LIBCPP_ENUM_VIS format : unsigned char { + auto_format, + native_format, + generic_format + }; + + // constructors and destructor + _LIBCPP_INLINE_VISIBILITY path() noexcept {} + _LIBCPP_INLINE_VISIBILITY path(const path& __p) : __pn_(__p.__pn_) {} + _LIBCPP_INLINE_VISIBILITY path(path&& __p) noexcept + : __pn_(_VSTD::move(__p.__pn_)) {} + + _LIBCPP_INLINE_VISIBILITY + path(string_type&& __s, format = format::auto_format) noexcept + : __pn_(_VSTD::move(__s)) {} + + template > + path(const _Source& __src, format = format::auto_format) { + _SourceCVT<_Source>::__append_source(__pn_, __src); + } + + template + path(_InputIt __first, _InputIt __last, format = format::auto_format) { + typedef typename iterator_traits<_InputIt>::value_type _ItVal; + _PathCVT<_ItVal>::__append_range(__pn_, __first, __last); + } + +/* +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) + // TODO Implement locale conversions. + template > + path(const _Source& __src, const locale& __loc, format = format::auto_format); + template + path(_InputIt __first, _InputIt _last, const locale& __loc, + format = format::auto_format); +#endif +*/ + + _LIBCPP_INLINE_VISIBILITY + ~path() = default; + + // assignments + _LIBCPP_INLINE_VISIBILITY + path& operator=(const path& __p) { + __pn_ = __p.__pn_; + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + path& operator=(path&& __p) noexcept { + __pn_ = _VSTD::move(__p.__pn_); + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + path& operator=(string_type&& __s) noexcept { + __pn_ = _VSTD::move(__s); + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + path& assign(string_type&& __s) noexcept { + __pn_ = _VSTD::move(__s); + return *this; + } + + template + _LIBCPP_INLINE_VISIBILITY _EnableIfPathable<_Source> + operator=(const _Source& __src) { + return this->assign(__src); + } + + template + _EnableIfPathable<_Source> assign(const _Source& __src) { + __pn_.clear(); + _SourceCVT<_Source>::__append_source(__pn_, __src); + return *this; + } + + template + path& assign(_InputIt __first, _InputIt __last) { + typedef typename iterator_traits<_InputIt>::value_type _ItVal; + __pn_.clear(); + _PathCVT<_ItVal>::__append_range(__pn_, __first, __last); + return *this; + } + +public: + // appends +#if defined(_LIBCPP_WIN32API) + path& operator/=(const path& __p) { + auto __p_root_name = __p.__root_name(); + auto __p_root_name_size = __p_root_name.size(); + if (__p.is_absolute() || + (!__p_root_name.empty() && __p_root_name != __string_view(root_name().__pn_))) { + __pn_ = __p.__pn_; + return *this; + } + if (__p.has_root_directory()) { + path __root_name_str = root_name(); + __pn_ = __root_name_str.native(); + __pn_ += __string_view(__p.__pn_).substr(__p_root_name_size); + return *this; + } + if (has_filename() || (!has_root_directory() && is_absolute())) + __pn_ += preferred_separator; + __pn_ += __string_view(__p.__pn_).substr(__p_root_name_size); + return *this; + } + template + _LIBCPP_INLINE_VISIBILITY _EnableIfPathable<_Source> + operator/=(const _Source& __src) { + return operator/=(path(__src)); + } + + template + _EnableIfPathable<_Source> append(const _Source& __src) { + return operator/=(path(__src)); + } + + template + path& append(_InputIt __first, _InputIt __last) { + return operator/=(path(__first, __last)); + } +#else + path& operator/=(const path& __p) { + if (__p.is_absolute()) { + __pn_ = __p.__pn_; + return *this; + } + if (has_filename()) + __pn_ += preferred_separator; + __pn_ += __p.native(); + return *this; + } + + // FIXME: Use _LIBCPP_DIAGNOSE_WARNING to produce a diagnostic when __src + // is known at compile time to be "/' since the user almost certainly intended + // to append a separator instead of overwriting the path with "/" + template + _LIBCPP_INLINE_VISIBILITY _EnableIfPathable<_Source> + operator/=(const _Source& __src) { + return this->append(__src); + } + + template + _EnableIfPathable<_Source> append(const _Source& __src) { + using _Traits = __is_pathable<_Source>; + using _CVT = _PathCVT<_SourceChar<_Source> >; + bool __source_is_absolute = __is_separator(_Traits::__first_or_null(__src)); + if (__source_is_absolute) + __pn_.clear(); + else if (has_filename()) + __pn_ += preferred_separator; + _CVT::__append_source(__pn_, __src); + return *this; + } + + template + path& append(_InputIt __first, _InputIt __last) { + typedef typename iterator_traits<_InputIt>::value_type _ItVal; + static_assert(__can_convert_char<_ItVal>::value, "Must convertible"); + using _CVT = _PathCVT<_ItVal>; + if (__first != __last && __is_separator(*__first)) + __pn_.clear(); + else if (has_filename()) + __pn_ += preferred_separator; + _CVT::__append_range(__pn_, __first, __last); + return *this; + } +#endif + + // concatenation + _LIBCPP_INLINE_VISIBILITY + path& operator+=(const path& __x) { + __pn_ += __x.__pn_; + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + path& operator+=(const string_type& __x) { + __pn_ += __x; + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + path& operator+=(__string_view __x) { + __pn_ += __x; + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + path& operator+=(const value_type* __x) { + __pn_ += __x; + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + path& operator+=(value_type __x) { + __pn_ += __x; + return *this; + } + + template + typename enable_if<__can_convert_char<_ECharT>::value, path&>::type + operator+=(_ECharT __x) { + _PathCVT<_ECharT>::__append_source(__pn_, + basic_string_view<_ECharT>(&__x, 1)); + return *this; + } + + template + _EnableIfPathable<_Source> operator+=(const _Source& __x) { + return this->concat(__x); + } + + template + _EnableIfPathable<_Source> concat(const _Source& __x) { + _SourceCVT<_Source>::__append_source(__pn_, __x); + return *this; + } + + template + path& concat(_InputIt __first, _InputIt __last) { + typedef typename iterator_traits<_InputIt>::value_type _ItVal; + _PathCVT<_ItVal>::__append_range(__pn_, __first, __last); + return *this; + } + + // modifiers + _LIBCPP_INLINE_VISIBILITY + void clear() noexcept { __pn_.clear(); } + + path& make_preferred() { +#if defined(_LIBCPP_WIN32API) + _VSTD::replace(__pn_.begin(), __pn_.end(), L'/', L'\\'); +#endif + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + path& remove_filename() { + auto __fname = __filename(); + if (!__fname.empty()) + __pn_.erase(__fname.data() - __pn_.data()); + return *this; + } + + path& replace_filename(const path& __replacement) { + remove_filename(); + return (*this /= __replacement); + } + + path& replace_extension(const path& __replacement = path()); + + _LIBCPP_INLINE_VISIBILITY + void swap(path& __rhs) noexcept { __pn_.swap(__rhs.__pn_); } + + // private helper to allow reserving memory in the path + _LIBCPP_INLINE_VISIBILITY + void __reserve(size_t __s) { __pn_.reserve(__s); } + + // native format observers + _LIBCPP_INLINE_VISIBILITY + const string_type& native() const noexcept { return __pn_; } + + _LIBCPP_INLINE_VISIBILITY + const value_type* c_str() const noexcept { return __pn_.c_str(); } + + _LIBCPP_INLINE_VISIBILITY operator string_type() const { return __pn_; } + +#if defined(_LIBCPP_WIN32API) + _LIBCPP_INLINE_VISIBILITY _VSTD::wstring wstring() const { return __pn_; } + + _VSTD::wstring generic_wstring() const { + _VSTD::wstring __s; + __s.resize(__pn_.size()); + _VSTD::replace_copy(__pn_.begin(), __pn_.end(), __s.begin(), '\\', '/'); + return __s; + } + +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) + template , + class _Allocator = allocator<_ECharT> > + basic_string<_ECharT, _Traits, _Allocator> + string(const _Allocator& __a = _Allocator()) const { + using _Str = basic_string<_ECharT, _Traits, _Allocator>; + _Str __s(__a); + __s.reserve(__pn_.size()); + _PathExport<_ECharT>::__append(__s, __pn_); + return __s; + } + + _LIBCPP_INLINE_VISIBILITY _VSTD::string string() const { + return string(); + } + _LIBCPP_INLINE_VISIBILITY __u8_string u8string() const { + using _CVT = __narrow_to_utf8; + __u8_string __s; + __s.reserve(__pn_.size()); + _CVT()(back_inserter(__s), __pn_.data(), __pn_.data() + __pn_.size()); + return __s; + } + + _LIBCPP_INLINE_VISIBILITY _VSTD::u16string u16string() const { + return string(); + } + _LIBCPP_INLINE_VISIBILITY _VSTD::u32string u32string() const { + return string(); + } + + // generic format observers + template , + class _Allocator = allocator<_ECharT> > + basic_string<_ECharT, _Traits, _Allocator> + generic_string(const _Allocator& __a = _Allocator()) const { + using _Str = basic_string<_ECharT, _Traits, _Allocator>; + _Str __s = string<_ECharT, _Traits, _Allocator>(__a); + // Note: This (and generic_u8string below) is slightly suboptimal as + // it iterates twice over the string; once to convert it to the right + // character type, and once to replace path delimiters. + _VSTD::replace(__s.begin(), __s.end(), + static_cast<_ECharT>('\\'), static_cast<_ECharT>('/')); + return __s; + } + + _VSTD::string generic_string() const { return generic_string(); } + _VSTD::u16string generic_u16string() const { return generic_string(); } + _VSTD::u32string generic_u32string() const { return generic_string(); } + __u8_string generic_u8string() const { + __u8_string __s = u8string(); + _VSTD::replace(__s.begin(), __s.end(), '\\', '/'); + return __s; + } +#endif /* !_LIBCPP_HAS_NO_LOCALIZATION */ +#else /* _LIBCPP_WIN32API */ + + _LIBCPP_INLINE_VISIBILITY _VSTD::string string() const { return __pn_; } +#ifndef _LIBCPP_HAS_NO_CHAR8_T + _LIBCPP_INLINE_VISIBILITY _VSTD::u8string u8string() const { return _VSTD::u8string(__pn_.begin(), __pn_.end()); } +#else + _LIBCPP_INLINE_VISIBILITY _VSTD::string u8string() const { return __pn_; } +#endif + +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) + template , + class _Allocator = allocator<_ECharT> > + basic_string<_ECharT, _Traits, _Allocator> + string(const _Allocator& __a = _Allocator()) const { + using _CVT = __widen_from_utf8; + using _Str = basic_string<_ECharT, _Traits, _Allocator>; + _Str __s(__a); + __s.reserve(__pn_.size()); + _CVT()(back_inserter(__s), __pn_.data(), __pn_.data() + __pn_.size()); + return __s; + } + +#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS + _LIBCPP_INLINE_VISIBILITY _VSTD::wstring wstring() const { + return string(); + } +#endif + _LIBCPP_INLINE_VISIBILITY _VSTD::u16string u16string() const { + return string(); + } + _LIBCPP_INLINE_VISIBILITY _VSTD::u32string u32string() const { + return string(); + } +#endif /* !_LIBCPP_HAS_NO_LOCALIZATION */ + + // generic format observers + _VSTD::string generic_string() const { return __pn_; } +#ifndef _LIBCPP_HAS_NO_CHAR8_T + _VSTD::u8string generic_u8string() const { return _VSTD::u8string(__pn_.begin(), __pn_.end()); } +#else + _VSTD::string generic_u8string() const { return __pn_; } +#endif + +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) + template , + class _Allocator = allocator<_ECharT> > + basic_string<_ECharT, _Traits, _Allocator> + generic_string(const _Allocator& __a = _Allocator()) const { + return string<_ECharT, _Traits, _Allocator>(__a); + } + +#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS + _VSTD::wstring generic_wstring() const { return string(); } +#endif + _VSTD::u16string generic_u16string() const { return string(); } + _VSTD::u32string generic_u32string() const { return string(); } +#endif /* !_LIBCPP_HAS_NO_LOCALIZATION */ +#endif /* !_LIBCPP_WIN32API */ + +private: + int __compare(__string_view) const; + __string_view __root_name() const; + __string_view __root_directory() const; + __string_view __root_path_raw() const; + __string_view __relative_path() const; + __string_view __parent_path() const; + __string_view __filename() const; + __string_view __stem() const; + __string_view __extension() const; + +public: + // compare + _LIBCPP_INLINE_VISIBILITY int compare(const path& __p) const noexcept { + return __compare(__p.__pn_); + } + _LIBCPP_INLINE_VISIBILITY int compare(const string_type& __s) const { + return __compare(__s); + } + _LIBCPP_INLINE_VISIBILITY int compare(__string_view __s) const { + return __compare(__s); + } + _LIBCPP_INLINE_VISIBILITY int compare(const value_type* __s) const { + return __compare(__s); + } + + // decomposition + _LIBCPP_INLINE_VISIBILITY path root_name() const { + return string_type(__root_name()); + } + _LIBCPP_INLINE_VISIBILITY path root_directory() const { + return string_type(__root_directory()); + } + _LIBCPP_INLINE_VISIBILITY path root_path() const { +#if defined(_LIBCPP_WIN32API) + return string_type(__root_path_raw()); +#else + return root_name().append(string_type(__root_directory())); +#endif + } + _LIBCPP_INLINE_VISIBILITY path relative_path() const { + return string_type(__relative_path()); + } + _LIBCPP_INLINE_VISIBILITY path parent_path() const { + return string_type(__parent_path()); + } + _LIBCPP_INLINE_VISIBILITY path filename() const { + return string_type(__filename()); + } + _LIBCPP_INLINE_VISIBILITY path stem() const { return string_type(__stem()); } + _LIBCPP_INLINE_VISIBILITY path extension() const { + return string_type(__extension()); + } + + // query + _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY bool + empty() const noexcept { + return __pn_.empty(); + } + + _LIBCPP_INLINE_VISIBILITY bool has_root_name() const { + return !__root_name().empty(); + } + _LIBCPP_INLINE_VISIBILITY bool has_root_directory() const { + return !__root_directory().empty(); + } + _LIBCPP_INLINE_VISIBILITY bool has_root_path() const { + return !__root_path_raw().empty(); + } + _LIBCPP_INLINE_VISIBILITY bool has_relative_path() const { + return !__relative_path().empty(); + } + _LIBCPP_INLINE_VISIBILITY bool has_parent_path() const { + return !__parent_path().empty(); + } + _LIBCPP_INLINE_VISIBILITY bool has_filename() const { + return !__filename().empty(); + } + _LIBCPP_INLINE_VISIBILITY bool has_stem() const { return !__stem().empty(); } + _LIBCPP_INLINE_VISIBILITY bool has_extension() const { + return !__extension().empty(); + } + + _LIBCPP_INLINE_VISIBILITY bool is_absolute() const { +#if defined(_LIBCPP_WIN32API) + __string_view __root_name_str = __root_name(); + __string_view __root_dir = __root_directory(); + if (__root_name_str.size() == 2 && __root_name_str[1] == ':') { + // A drive letter with no root directory is relative, e.g. x:example. + return !__root_dir.empty(); + } + // If no root name, it's relative, e.g. \example is relative to the current drive + if (__root_name_str.empty()) + return false; + if (__root_name_str.size() < 3) + return false; + // A server root name, like \\server, is always absolute + if (__root_name_str[0] != '/' && __root_name_str[0] != '\\') + return false; + if (__root_name_str[1] != '/' && __root_name_str[1] != '\\') + return false; + // Seems to be a server root name + return true; +#else + return has_root_directory(); +#endif + } + _LIBCPP_INLINE_VISIBILITY bool is_relative() const { return !is_absolute(); } + + // relative paths + path lexically_normal() const; + path lexically_relative(const path& __base) const; + + _LIBCPP_INLINE_VISIBILITY path lexically_proximate(const path& __base) const { + path __result = this->lexically_relative(__base); + if (__result.native().empty()) + return *this; + return __result; + } + + // iterators + class _LIBCPP_TYPE_VIS iterator; + typedef iterator const_iterator; + + iterator begin() const; + iterator end() const; + +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) + template + _LIBCPP_INLINE_VISIBILITY friend + typename enable_if::value && + is_same<_Traits, char_traits >::value, + basic_ostream<_CharT, _Traits>&>::type + operator<<(basic_ostream<_CharT, _Traits>& __os, const path& __p) { + __os << _VSTD::__quoted(__p.native()); + return __os; + } + + template + _LIBCPP_INLINE_VISIBILITY friend + typename enable_if::value || + !is_same<_Traits, char_traits >::value, + basic_ostream<_CharT, _Traits>&>::type + operator<<(basic_ostream<_CharT, _Traits>& __os, const path& __p) { + __os << _VSTD::__quoted(__p.string<_CharT, _Traits>()); + return __os; + } + + template + _LIBCPP_INLINE_VISIBILITY friend basic_istream<_CharT, _Traits>& + operator>>(basic_istream<_CharT, _Traits>& __is, path& __p) { + basic_string<_CharT, _Traits> __tmp; + __is >> __quoted(__tmp); + __p = __tmp; + return __is; + } +#endif // !_LIBCPP_HAS_NO_LOCALIZATION + + friend _LIBCPP_INLINE_VISIBILITY bool operator==(const path& __lhs, const path& __rhs) noexcept { + return __lhs.__compare(__rhs.__pn_) == 0; + } + friend _LIBCPP_INLINE_VISIBILITY bool operator!=(const path& __lhs, const path& __rhs) noexcept { + return __lhs.__compare(__rhs.__pn_) != 0; + } + friend _LIBCPP_INLINE_VISIBILITY bool operator<(const path& __lhs, const path& __rhs) noexcept { + return __lhs.__compare(__rhs.__pn_) < 0; + } + friend _LIBCPP_INLINE_VISIBILITY bool operator<=(const path& __lhs, const path& __rhs) noexcept { + return __lhs.__compare(__rhs.__pn_) <= 0; + } + friend _LIBCPP_INLINE_VISIBILITY bool operator>(const path& __lhs, const path& __rhs) noexcept { + return __lhs.__compare(__rhs.__pn_) > 0; + } + friend _LIBCPP_INLINE_VISIBILITY bool operator>=(const path& __lhs, const path& __rhs) noexcept { + return __lhs.__compare(__rhs.__pn_) >= 0; + } + + friend _LIBCPP_INLINE_VISIBILITY path operator/(const path& __lhs, + const path& __rhs) { + path __result(__lhs); + __result /= __rhs; + return __result; + } +private: + inline _LIBCPP_INLINE_VISIBILITY path& + __assign_view(__string_view const& __s) noexcept { + __pn_ = string_type(__s); + return *this; + } + string_type __pn_; +}; + +inline _LIBCPP_INLINE_VISIBILITY void swap(path& __lhs, path& __rhs) noexcept { + __lhs.swap(__rhs); +} + +_LIBCPP_FUNC_VIS +size_t hash_value(const path& __p) noexcept; + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_PATH_H diff --git a/libcxx/include/__filesystem/path_iterator.h b/libcxx/include/__filesystem/path_iterator.h new file mode 100644 index 000000000000..62f8dc6fd357 --- /dev/null +++ b/libcxx/include/__filesystem/path_iterator.h @@ -0,0 +1,132 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_PATH_ITERATOR_H +#define _LIBCPP___FILESYSTEM_PATH_ITERATOR_H + +#include <__availability> +#include <__config> +#include <__filesystem/path.h> +#include <__iterator/iterator_traits.h> +#include <__debug> +#include +#include +#include + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +class _LIBCPP_TYPE_VIS path::iterator { +public: + enum _ParserState : unsigned char { + _Singular, + _BeforeBegin, + _InRootName, + _InRootDir, + _InFilenames, + _InTrailingSep, + _AtEnd + }; + +public: + typedef bidirectional_iterator_tag iterator_category; + + typedef path value_type; + typedef ptrdiff_t difference_type; + typedef const path* pointer; + typedef const path& reference; + + typedef void + __stashing_iterator_tag; // See reverse_iterator and __is_stashing_iterator + +public: + _LIBCPP_INLINE_VISIBILITY + iterator() + : __stashed_elem_(), __path_ptr_(nullptr), __entry_(), + __state_(_Singular) {} + + iterator(const iterator&) = default; + ~iterator() = default; + + iterator& operator=(const iterator&) = default; + + _LIBCPP_INLINE_VISIBILITY + reference operator*() const { return __stashed_elem_; } + + _LIBCPP_INLINE_VISIBILITY + pointer operator->() const { return &__stashed_elem_; } + + _LIBCPP_INLINE_VISIBILITY + iterator& operator++() { + _LIBCPP_ASSERT(__state_ != _Singular, + "attempting to increment a singular iterator"); + _LIBCPP_ASSERT(__state_ != _AtEnd, + "attempting to increment the end iterator"); + return __increment(); + } + + _LIBCPP_INLINE_VISIBILITY + iterator operator++(int) { + iterator __it(*this); + this->operator++(); + return __it; + } + + _LIBCPP_INLINE_VISIBILITY + iterator& operator--() { + _LIBCPP_ASSERT(__state_ != _Singular, + "attempting to decrement a singular iterator"); + _LIBCPP_ASSERT(__entry_.data() != __path_ptr_->native().data(), + "attempting to decrement the begin iterator"); + return __decrement(); + } + + _LIBCPP_INLINE_VISIBILITY + iterator operator--(int) { + iterator __it(*this); + this->operator--(); + return __it; + } + +private: + friend class path; + + inline _LIBCPP_INLINE_VISIBILITY friend bool operator==(const iterator&, + const iterator&); + + iterator& __increment(); + iterator& __decrement(); + + path __stashed_elem_; + const path* __path_ptr_; + path::__string_view __entry_; + _ParserState __state_; +}; + +inline _LIBCPP_INLINE_VISIBILITY bool operator==(const path::iterator& __lhs, + const path::iterator& __rhs) { + return __lhs.__path_ptr_ == __rhs.__path_ptr_ && + __lhs.__entry_.data() == __rhs.__entry_.data(); +} + +inline _LIBCPP_INLINE_VISIBILITY bool operator!=(const path::iterator& __lhs, + const path::iterator& __rhs) { + return !(__lhs == __rhs); +} + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_PATH_ITERATOR_H diff --git a/libcxx/include/__filesystem/perm_options.h b/libcxx/include/__filesystem/perm_options.h new file mode 100644 index 000000000000..62cd8f575650 --- /dev/null +++ b/libcxx/include/__filesystem/perm_options.h @@ -0,0 +1,73 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_PERM_OPTIONS_H +#define _LIBCPP___FILESYSTEM_PERM_OPTIONS_H + +#include <__availability> +#include <__config> + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +enum class _LIBCPP_ENUM_VIS perm_options : unsigned char { + replace = 1, + add = 2, + remove = 4, + nofollow = 8 +}; + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perm_options operator&(perm_options _LHS, perm_options _RHS) { + return static_cast(static_cast(_LHS) & + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perm_options operator|(perm_options _LHS, perm_options _RHS) { + return static_cast(static_cast(_LHS) | + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perm_options operator^(perm_options _LHS, perm_options _RHS) { + return static_cast(static_cast(_LHS) ^ + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perm_options operator~(perm_options _LHS) { + return static_cast(~static_cast(_LHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline perm_options& operator&=(perm_options& _LHS, perm_options _RHS) { + return _LHS = _LHS & _RHS; +} + +_LIBCPP_INLINE_VISIBILITY +inline perm_options& operator|=(perm_options& _LHS, perm_options _RHS) { + return _LHS = _LHS | _RHS; +} + +_LIBCPP_INLINE_VISIBILITY +inline perm_options& operator^=(perm_options& _LHS, perm_options _RHS) { + return _LHS = _LHS ^ _RHS; +} + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_PERM_OPTIONS_H diff --git a/libcxx/include/__filesystem/perms.h b/libcxx/include/__filesystem/perms.h new file mode 100644 index 000000000000..832f8b07e55c --- /dev/null +++ b/libcxx/include/__filesystem/perms.h @@ -0,0 +1,91 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_PERMS_H +#define _LIBCPP___FILESYSTEM_PERMS_H + +#include <__availability> +#include <__config> + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +// On Windows, these permission bits map to one single readonly flag per +// file, and the executable bit is always returned as set. When setting +// permissions, as long as the write bit is set for either owner, group or +// others, the readonly flag is cleared. +enum class _LIBCPP_ENUM_VIS perms : unsigned { + none = 0, + + owner_read = 0400, + owner_write = 0200, + owner_exec = 0100, + owner_all = 0700, + + group_read = 040, + group_write = 020, + group_exec = 010, + group_all = 070, + + others_read = 04, + others_write = 02, + others_exec = 01, + others_all = 07, + + all = 0777, + + set_uid = 04000, + set_gid = 02000, + sticky_bit = 01000, + mask = 07777, + unknown = 0xFFFF, +}; + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perms operator&(perms _LHS, perms _RHS) { + return static_cast(static_cast(_LHS) & + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perms operator|(perms _LHS, perms _RHS) { + return static_cast(static_cast(_LHS) | + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perms operator^(perms _LHS, perms _RHS) { + return static_cast(static_cast(_LHS) ^ + static_cast(_RHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline constexpr perms operator~(perms _LHS) { + return static_cast(~static_cast(_LHS)); +} + +_LIBCPP_INLINE_VISIBILITY +inline perms& operator&=(perms& _LHS, perms _RHS) { return _LHS = _LHS & _RHS; } + +_LIBCPP_INLINE_VISIBILITY +inline perms& operator|=(perms& _LHS, perms _RHS) { return _LHS = _LHS | _RHS; } + +_LIBCPP_INLINE_VISIBILITY +inline perms& operator^=(perms& _LHS, perms _RHS) { return _LHS = _LHS ^ _RHS; } + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_PERMS_H diff --git a/libcxx/include/__filesystem/recursive_directory_iterator.h b/libcxx/include/__filesystem/recursive_directory_iterator.h new file mode 100644 index 000000000000..db7e793e8530 --- /dev/null +++ b/libcxx/include/__filesystem/recursive_directory_iterator.h @@ -0,0 +1,181 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_RECURSIVE_DIRECTORY_ITERATOR_H +#define _LIBCPP___FILESYSTEM_RECURSIVE_DIRECTORY_ITERATOR_H + +#include <__availability> +#include <__config> +#include <__filesystem/directory_entry.h> +#include <__filesystem/directory_options.h> +#include <__filesystem/path.h> +#include <__iterator/iterator_traits.h> +#include <__memory/shared_ptr.h> +#include <__ranges/enable_borrowed_range.h> +#include <__ranges/enable_view.h> +#include +#include + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +class recursive_directory_iterator { +public: + using value_type = directory_entry; + using difference_type = ptrdiff_t; + using pointer = directory_entry const*; + using reference = directory_entry const&; + using iterator_category = input_iterator_tag; + +public: + // constructors and destructor + _LIBCPP_INLINE_VISIBILITY + recursive_directory_iterator() noexcept : __rec_(false) {} + + _LIBCPP_INLINE_VISIBILITY + explicit recursive_directory_iterator( + const path& __p, directory_options __xoptions = directory_options::none) + : recursive_directory_iterator(__p, __xoptions, nullptr) {} + + _LIBCPP_INLINE_VISIBILITY + recursive_directory_iterator(const path& __p, directory_options __xoptions, + error_code& __ec) + : recursive_directory_iterator(__p, __xoptions, &__ec) {} + + _LIBCPP_INLINE_VISIBILITY + recursive_directory_iterator(const path& __p, error_code& __ec) + : recursive_directory_iterator(__p, directory_options::none, &__ec) {} + + recursive_directory_iterator(const recursive_directory_iterator&) = default; + recursive_directory_iterator(recursive_directory_iterator&&) = default; + + recursive_directory_iterator& + operator=(const recursive_directory_iterator&) = default; + + _LIBCPP_INLINE_VISIBILITY + recursive_directory_iterator& + operator=(recursive_directory_iterator&& __o) noexcept { + // non-default implementation provided to support self-move assign. + if (this != &__o) { + __imp_ = _VSTD::move(__o.__imp_); + __rec_ = __o.__rec_; + } + return *this; + } + + ~recursive_directory_iterator() = default; + + _LIBCPP_INLINE_VISIBILITY + const directory_entry& operator*() const { return __dereference(); } + + _LIBCPP_INLINE_VISIBILITY + const directory_entry* operator->() const { return &__dereference(); } + + recursive_directory_iterator& operator++() { return __increment(); } + + _LIBCPP_INLINE_VISIBILITY + __dir_element_proxy operator++(int) { + __dir_element_proxy __p(**this); + __increment(); + return __p; + } + + _LIBCPP_INLINE_VISIBILITY + recursive_directory_iterator& increment(error_code& __ec) { + return __increment(&__ec); + } + + _LIBCPP_FUNC_VIS directory_options options() const; + _LIBCPP_FUNC_VIS int depth() const; + + _LIBCPP_INLINE_VISIBILITY + void pop() { __pop(); } + + _LIBCPP_INLINE_VISIBILITY + void pop(error_code& __ec) { __pop(&__ec); } + + _LIBCPP_INLINE_VISIBILITY + bool recursion_pending() const { return __rec_; } + + _LIBCPP_INLINE_VISIBILITY + void disable_recursion_pending() { __rec_ = false; } + +private: + _LIBCPP_FUNC_VIS + recursive_directory_iterator(const path& __p, directory_options __opt, + error_code* __ec); + + _LIBCPP_FUNC_VIS + const directory_entry& __dereference() const; + + _LIBCPP_FUNC_VIS + bool __try_recursion(error_code* __ec); + + _LIBCPP_FUNC_VIS + void __advance(error_code* __ec = nullptr); + + _LIBCPP_FUNC_VIS + recursive_directory_iterator& __increment(error_code* __ec = nullptr); + + _LIBCPP_FUNC_VIS + void __pop(error_code* __ec = nullptr); + + inline _LIBCPP_INLINE_VISIBILITY friend bool + operator==(const recursive_directory_iterator&, + const recursive_directory_iterator&) noexcept; + + struct _LIBCPP_HIDDEN __shared_imp; + shared_ptr<__shared_imp> __imp_; + bool __rec_; +}; // class recursive_directory_iterator + +inline _LIBCPP_INLINE_VISIBILITY bool +operator==(const recursive_directory_iterator& __lhs, + const recursive_directory_iterator& __rhs) noexcept { + return __lhs.__imp_ == __rhs.__imp_; +} + +_LIBCPP_INLINE_VISIBILITY +inline bool operator!=(const recursive_directory_iterator& __lhs, + const recursive_directory_iterator& __rhs) noexcept { + return !(__lhs == __rhs); +} +// enable recursive_directory_iterator range-based for statements +inline _LIBCPP_INLINE_VISIBILITY recursive_directory_iterator +begin(recursive_directory_iterator __iter) noexcept { + return __iter; +} + +inline _LIBCPP_INLINE_VISIBILITY recursive_directory_iterator +end(recursive_directory_iterator) noexcept { + return recursive_directory_iterator(); +} + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#if !defined(_LIBCPP_HAS_NO_RANGES) + +template <> +_LIBCPP_AVAILABILITY_FILESYSTEM +inline constexpr bool _VSTD::ranges::enable_borrowed_range<_VSTD_FS::recursive_directory_iterator> = true; + +template <> +_LIBCPP_AVAILABILITY_FILESYSTEM +inline constexpr bool _VSTD::ranges::enable_view<_VSTD_FS::recursive_directory_iterator> = true; + +#endif + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_RECURSIVE_DIRECTORY_ITERATOR_H diff --git a/libcxx/include/__filesystem/space_info.h b/libcxx/include/__filesystem/space_info.h new file mode 100644 index 000000000000..098f085678e4 --- /dev/null +++ b/libcxx/include/__filesystem/space_info.h @@ -0,0 +1,35 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_SPACE_INFO_H +#define _LIBCPP___FILESYSTEM_SPACE_INFO_H + +#include <__availability> +#include <__config> +#include + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +struct _LIBCPP_TYPE_VIS space_info { + uintmax_t capacity; + uintmax_t free; + uintmax_t available; +}; + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_SPACE_INFO_H diff --git a/libcxx/include/__filesystem/u8path.h b/libcxx/include/__filesystem/u8path.h new file mode 100644 index 000000000000..dca3b0c5028b --- /dev/null +++ b/libcxx/include/__filesystem/u8path.h @@ -0,0 +1,96 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FILESYSTEM_U8PATH_H +#define _LIBCPP___FILESYSTEM_U8PATH_H + +#include <__availability> +#include <__config> +#include <__filesystem/path.h> +#include + +#ifndef _LIBCPP_CXX03_LANG + +_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM + +_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH + +template +_LIBCPP_INLINE_VISIBILITY _LIBCPP_DEPRECATED_WITH_CHAR8_T + typename enable_if<__is_pathable<_InputIt>::value, path>::type + u8path(_InputIt __f, _InputIt __l) { + static_assert( +#ifndef _LIBCPP_HAS_NO_CHAR8_T + is_same::__char_type, char8_t>::value || +#endif + is_same::__char_type, char>::value, + "u8path(Iter, Iter) requires Iter have a value_type of type 'char'" + " or 'char8_t'"); +#if defined(_LIBCPP_WIN32API) + string __tmp(__f, __l); + using _CVT = __widen_from_utf8; + _VSTD::wstring __w; + __w.reserve(__tmp.size()); + _CVT()(back_inserter(__w), __tmp.data(), __tmp.data() + __tmp.size()); + return path(__w); +#else + return path(__f, __l); +#endif /* !_LIBCPP_WIN32API */ +} + +#if defined(_LIBCPP_WIN32API) +template +_LIBCPP_INLINE_VISIBILITY _LIBCPP_DEPRECATED_WITH_CHAR8_T + typename enable_if<__is_pathable<_InputIt>::value, path>::type + u8path(_InputIt __f, _NullSentinel) { + static_assert( +#ifndef _LIBCPP_HAS_NO_CHAR8_T + is_same::__char_type, char8_t>::value || +#endif + is_same::__char_type, char>::value, + "u8path(Iter, Iter) requires Iter have a value_type of type 'char'" + " or 'char8_t'"); + string __tmp; + const char __sentinel = char{}; + for (; *__f != __sentinel; ++__f) + __tmp.push_back(*__f); + using _CVT = __widen_from_utf8; + _VSTD::wstring __w; + __w.reserve(__tmp.size()); + _CVT()(back_inserter(__w), __tmp.data(), __tmp.data() + __tmp.size()); + return path(__w); +} +#endif /* _LIBCPP_WIN32API */ + +template +_LIBCPP_INLINE_VISIBILITY _LIBCPP_DEPRECATED_WITH_CHAR8_T + typename enable_if<__is_pathable<_Source>::value, path>::type + u8path(const _Source& __s) { + static_assert( +#ifndef _LIBCPP_HAS_NO_CHAR8_T + is_same::__char_type, char8_t>::value || +#endif + is_same::__char_type, char>::value, + "u8path(Source const&) requires Source have a character type of type " + "'char' or 'char8_t'"); +#if defined(_LIBCPP_WIN32API) + using _Traits = __is_pathable<_Source>; + return u8path(_VSTD::__unwrap_iter(_Traits::__range_begin(__s)), _VSTD::__unwrap_iter(_Traits::__range_end(__s))); +#else + return path(__s); +#endif +} + +_LIBCPP_AVAILABILITY_FILESYSTEM_POP + +_LIBCPP_END_NAMESPACE_FILESYSTEM + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP___FILESYSTEM_U8PATH_H diff --git a/libcxx/include/filesystem b/libcxx/include/filesystem index 39e8ca2e814b..09d90614aa0d 100644 --- a/libcxx/include/filesystem +++ b/libcxx/include/filesystem @@ -238,6 +238,23 @@ inline constexpr bool std::ranges::enable_view +#include<__filesystem/directory_entry.h> +#include<__filesystem/directory_iterator.h> +#include<__filesystem/directory_options.h> +#include<__filesystem/file_status.h> +#include<__filesystem/file_time_type.h> +#include<__filesystem/file_type.h> +#include<__filesystem/filesystem_error.h> +#include<__filesystem/operations.h> +#include<__filesystem/path_iterator.h> +#include<__filesystem/path.h> +#include<__filesystem/perm_options.h> +#include<__filesystem/perms.h> +#include<__filesystem/recursive_directory_iterator.h> +#include<__filesystem/space_info.h> +#include<__filesystem/u8path.h> + #include <__availability> #include <__config> #include <__debug> @@ -271,2775 +288,4 @@ inline constexpr bool std::ranges::enable_view - -#ifndef _LIBCPP_CXX03_LANG - -_LIBCPP_BEGIN_NAMESPACE_FILESYSTEM - -_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH - -typedef chrono::time_point<_FilesystemClock> file_time_type; - -struct _LIBCPP_TYPE_VIS space_info { - uintmax_t capacity; - uintmax_t free; - uintmax_t available; -}; - -// On Windows, the library never identifies files as block, character, fifo -// or socket. -enum class _LIBCPP_ENUM_VIS file_type : signed char { - none = 0, - not_found = -1, - regular = 1, - directory = 2, - symlink = 3, - block = 4, - character = 5, - fifo = 6, - socket = 7, - unknown = 8 -}; - -// On Windows, these permission bits map to one single readonly flag per -// file, and the executable bit is always returned as set. When setting -// permissions, as long as the write bit is set for either owner, group or -// others, the readonly flag is cleared. -enum class _LIBCPP_ENUM_VIS perms : unsigned { - none = 0, - - owner_read = 0400, - owner_write = 0200, - owner_exec = 0100, - owner_all = 0700, - - group_read = 040, - group_write = 020, - group_exec = 010, - group_all = 070, - - others_read = 04, - others_write = 02, - others_exec = 01, - others_all = 07, - - all = 0777, - - set_uid = 04000, - set_gid = 02000, - sticky_bit = 01000, - mask = 07777, - unknown = 0xFFFF, -}; - -_LIBCPP_INLINE_VISIBILITY -inline constexpr perms operator&(perms _LHS, perms _RHS) { - return static_cast(static_cast(_LHS) & - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr perms operator|(perms _LHS, perms _RHS) { - return static_cast(static_cast(_LHS) | - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr perms operator^(perms _LHS, perms _RHS) { - return static_cast(static_cast(_LHS) ^ - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr perms operator~(perms _LHS) { - return static_cast(~static_cast(_LHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline perms& operator&=(perms& _LHS, perms _RHS) { return _LHS = _LHS & _RHS; } - -_LIBCPP_INLINE_VISIBILITY -inline perms& operator|=(perms& _LHS, perms _RHS) { return _LHS = _LHS | _RHS; } - -_LIBCPP_INLINE_VISIBILITY -inline perms& operator^=(perms& _LHS, perms _RHS) { return _LHS = _LHS ^ _RHS; } - -enum class _LIBCPP_ENUM_VIS perm_options : unsigned char { - replace = 1, - add = 2, - remove = 4, - nofollow = 8 -}; - -_LIBCPP_INLINE_VISIBILITY -inline constexpr perm_options operator&(perm_options _LHS, perm_options _RHS) { - return static_cast(static_cast(_LHS) & - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr perm_options operator|(perm_options _LHS, perm_options _RHS) { - return static_cast(static_cast(_LHS) | - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr perm_options operator^(perm_options _LHS, perm_options _RHS) { - return static_cast(static_cast(_LHS) ^ - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr perm_options operator~(perm_options _LHS) { - return static_cast(~static_cast(_LHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline perm_options& operator&=(perm_options& _LHS, perm_options _RHS) { - return _LHS = _LHS & _RHS; -} - -_LIBCPP_INLINE_VISIBILITY -inline perm_options& operator|=(perm_options& _LHS, perm_options _RHS) { - return _LHS = _LHS | _RHS; -} - -_LIBCPP_INLINE_VISIBILITY -inline perm_options& operator^=(perm_options& _LHS, perm_options _RHS) { - return _LHS = _LHS ^ _RHS; -} - -enum class _LIBCPP_ENUM_VIS copy_options : unsigned short { - none = 0, - skip_existing = 1, - overwrite_existing = 2, - update_existing = 4, - recursive = 8, - copy_symlinks = 16, - skip_symlinks = 32, - directories_only = 64, - create_symlinks = 128, - create_hard_links = 256, - __in_recursive_copy = 512, -}; - -_LIBCPP_INLINE_VISIBILITY -inline constexpr copy_options operator&(copy_options _LHS, copy_options _RHS) { - return static_cast(static_cast(_LHS) & - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr copy_options operator|(copy_options _LHS, copy_options _RHS) { - return static_cast(static_cast(_LHS) | - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr copy_options operator^(copy_options _LHS, copy_options _RHS) { - return static_cast(static_cast(_LHS) ^ - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr copy_options operator~(copy_options _LHS) { - return static_cast(~static_cast(_LHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline copy_options& operator&=(copy_options& _LHS, copy_options _RHS) { - return _LHS = _LHS & _RHS; -} - -_LIBCPP_INLINE_VISIBILITY -inline copy_options& operator|=(copy_options& _LHS, copy_options _RHS) { - return _LHS = _LHS | _RHS; -} - -_LIBCPP_INLINE_VISIBILITY -inline copy_options& operator^=(copy_options& _LHS, copy_options _RHS) { - return _LHS = _LHS ^ _RHS; -} - -enum class _LIBCPP_ENUM_VIS directory_options : unsigned char { - none = 0, - follow_directory_symlink = 1, - skip_permission_denied = 2 -}; - -_LIBCPP_INLINE_VISIBILITY -inline constexpr directory_options operator&(directory_options _LHS, - directory_options _RHS) { - return static_cast(static_cast(_LHS) & - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr directory_options operator|(directory_options _LHS, - directory_options _RHS) { - return static_cast(static_cast(_LHS) | - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr directory_options operator^(directory_options _LHS, - directory_options _RHS) { - return static_cast(static_cast(_LHS) ^ - static_cast(_RHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline constexpr directory_options operator~(directory_options _LHS) { - return static_cast(~static_cast(_LHS)); -} - -_LIBCPP_INLINE_VISIBILITY -inline directory_options& operator&=(directory_options& _LHS, - directory_options _RHS) { - return _LHS = _LHS & _RHS; -} - -_LIBCPP_INLINE_VISIBILITY -inline directory_options& operator|=(directory_options& _LHS, - directory_options _RHS) { - return _LHS = _LHS | _RHS; -} - -_LIBCPP_INLINE_VISIBILITY -inline directory_options& operator^=(directory_options& _LHS, - directory_options _RHS) { - return _LHS = _LHS ^ _RHS; -} - -class _LIBCPP_TYPE_VIS file_status { -public: - // constructors - _LIBCPP_INLINE_VISIBILITY - file_status() noexcept : file_status(file_type::none) {} - _LIBCPP_INLINE_VISIBILITY - explicit file_status(file_type __ft, perms __prms = perms::unknown) noexcept - : __ft_(__ft), - __prms_(__prms) {} - - file_status(const file_status&) noexcept = default; - file_status(file_status&&) noexcept = default; - - _LIBCPP_INLINE_VISIBILITY - ~file_status() {} - - file_status& operator=(const file_status&) noexcept = default; - file_status& operator=(file_status&&) noexcept = default; - - // observers - _LIBCPP_INLINE_VISIBILITY - file_type type() const noexcept { return __ft_; } - - _LIBCPP_INLINE_VISIBILITY - perms permissions() const noexcept { return __prms_; } - - // modifiers - _LIBCPP_INLINE_VISIBILITY - void type(file_type __ft) noexcept { __ft_ = __ft; } - - _LIBCPP_INLINE_VISIBILITY - void permissions(perms __p) noexcept { __prms_ = __p; } - -private: - file_type __ft_; - perms __prms_; -}; - -class _LIBCPP_TYPE_VIS directory_entry; - -template -struct __can_convert_char { - static const bool value = false; -}; -template -struct __can_convert_char : public __can_convert_char<_Tp> {}; -template <> -struct __can_convert_char { - static const bool value = true; - using __char_type = char; -}; -template <> -struct __can_convert_char { - static const bool value = true; - using __char_type = wchar_t; -}; -#ifndef _LIBCPP_HAS_NO_CHAR8_T -template <> -struct __can_convert_char { - static const bool value = true; - using __char_type = char8_t; -}; -#endif -template <> -struct __can_convert_char { - static const bool value = true; - using __char_type = char16_t; -}; -template <> -struct __can_convert_char { - static const bool value = true; - using __char_type = char32_t; -}; - -template -typename enable_if<__can_convert_char<_ECharT>::value, bool>::type -__is_separator(_ECharT __e) { -#if defined(_LIBCPP_WIN32API) - return __e == _ECharT('/') || __e == _ECharT('\\'); -#else - return __e == _ECharT('/'); -#endif -} - -#ifndef _LIBCPP_HAS_NO_CHAR8_T -typedef u8string __u8_string; -#else -typedef string __u8_string; -#endif - -struct _NullSentinel {}; - -template -using _Void = void; - -template -struct __is_pathable_string : public false_type {}; - -template -struct __is_pathable_string< - basic_string<_ECharT, _Traits, _Alloc>, - _Void::__char_type> > - : public __can_convert_char<_ECharT> { - using _Str = basic_string<_ECharT, _Traits, _Alloc>; - using _Base = __can_convert_char<_ECharT>; - static _ECharT const* __range_begin(_Str const& __s) { return __s.data(); } - static _ECharT const* __range_end(_Str const& __s) { - return __s.data() + __s.length(); - } - static _ECharT __first_or_null(_Str const& __s) { - return __s.empty() ? _ECharT{} : __s[0]; - } -}; - -template -struct __is_pathable_string< - basic_string_view<_ECharT, _Traits>, - _Void::__char_type> > - : public __can_convert_char<_ECharT> { - using _Str = basic_string_view<_ECharT, _Traits>; - using _Base = __can_convert_char<_ECharT>; - static _ECharT const* __range_begin(_Str const& __s) { return __s.data(); } - static _ECharT const* __range_end(_Str const& __s) { - return __s.data() + __s.length(); - } - static _ECharT __first_or_null(_Str const& __s) { - return __s.empty() ? _ECharT{} : __s[0]; - } -}; - -template ::type, - class _UnqualPtrType = - typename remove_const::type>::type, - bool _IsCharPtr = is_pointer<_DS>::value&& - __can_convert_char<_UnqualPtrType>::value> -struct __is_pathable_char_array : false_type {}; - -template -struct __is_pathable_char_array<_Source, _ECharT*, _UPtr, true> - : __can_convert_char::type> { - using _Base = __can_convert_char::type>; - - static _ECharT const* __range_begin(const _ECharT* __b) { return __b; } - static _ECharT const* __range_end(const _ECharT* __b) { - using _Iter = const _ECharT*; - const _ECharT __sentinel = _ECharT{}; - _Iter __e = __b; - for (; *__e != __sentinel; ++__e) - ; - return __e; - } - - static _ECharT __first_or_null(const _ECharT* __b) { return *__b; } -}; - -template ::value, - class = void> -struct __is_pathable_iter : false_type {}; - -template -struct __is_pathable_iter< - _Iter, true, - _Void::value_type>::__char_type> > - : __can_convert_char::value_type> { - using _ECharT = typename iterator_traits<_Iter>::value_type; - using _Base = __can_convert_char<_ECharT>; - - static _Iter __range_begin(_Iter __b) { return __b; } - static _NullSentinel __range_end(_Iter) { return _NullSentinel{}; } - - static _ECharT __first_or_null(_Iter __b) { return *__b; } -}; - -template ::value, - bool _IsCharIterT = __is_pathable_char_array<_Tp>::value, - bool _IsIterT = !_IsCharIterT && __is_pathable_iter<_Tp>::value> -struct __is_pathable : false_type { - static_assert(!_IsStringT && !_IsCharIterT && !_IsIterT, "Must all be false"); -}; - -template -struct __is_pathable<_Tp, true, false, false> : __is_pathable_string<_Tp> {}; - -template -struct __is_pathable<_Tp, false, true, false> : __is_pathable_char_array<_Tp> { -}; - -template -struct __is_pathable<_Tp, false, false, true> : __is_pathable_iter<_Tp> {}; - -#if defined(_LIBCPP_WIN32API) -typedef wstring __path_string; -typedef wchar_t __path_value; -#else -typedef string __path_string; -typedef char __path_value; -#endif - -#if defined(_LIBCPP_WIN32API) -_LIBCPP_FUNC_VIS -size_t __wide_to_char(const wstring&, char*, size_t); -_LIBCPP_FUNC_VIS -size_t __char_to_wide(const string&, wchar_t*, size_t); -#endif - -template -struct _PathCVT; - -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) -template -struct _PathCVT { - static_assert(__can_convert_char<_ECharT>::value, - "Char type not convertible"); - - typedef __narrow_to_utf8 _Narrower; -#if defined(_LIBCPP_WIN32API) - typedef __widen_from_utf8 _Widener; -#endif - - static void __append_range(__path_string& __dest, _ECharT const* __b, - _ECharT const* __e) { -#if defined(_LIBCPP_WIN32API) - string __utf8; - _Narrower()(back_inserter(__utf8), __b, __e); - _Widener()(back_inserter(__dest), __utf8.data(), __utf8.data() + __utf8.size()); -#else - _Narrower()(back_inserter(__dest), __b, __e); -#endif - } - - template - static void __append_range(__path_string& __dest, _Iter __b, _Iter __e) { - static_assert(!is_same<_Iter, _ECharT*>::value, "Call const overload"); - if (__b == __e) - return; - basic_string<_ECharT> __tmp(__b, __e); -#if defined(_LIBCPP_WIN32API) - string __utf8; - _Narrower()(back_inserter(__utf8), __tmp.data(), - __tmp.data() + __tmp.length()); - _Widener()(back_inserter(__dest), __utf8.data(), __utf8.data() + __utf8.size()); -#else - _Narrower()(back_inserter(__dest), __tmp.data(), - __tmp.data() + __tmp.length()); -#endif - } - - template - static void __append_range(__path_string& __dest, _Iter __b, _NullSentinel) { - static_assert(!is_same<_Iter, _ECharT*>::value, "Call const overload"); - const _ECharT __sentinel = _ECharT{}; - if (*__b == __sentinel) - return; - basic_string<_ECharT> __tmp; - for (; *__b != __sentinel; ++__b) - __tmp.push_back(*__b); -#if defined(_LIBCPP_WIN32API) - string __utf8; - _Narrower()(back_inserter(__utf8), __tmp.data(), - __tmp.data() + __tmp.length()); - _Widener()(back_inserter(__dest), __utf8.data(), __utf8.data() + __utf8.size()); -#else - _Narrower()(back_inserter(__dest), __tmp.data(), - __tmp.data() + __tmp.length()); -#endif - } - - template - static void __append_source(__path_string& __dest, _Source const& __s) { - using _Traits = __is_pathable<_Source>; - __append_range(__dest, _Traits::__range_begin(__s), - _Traits::__range_end(__s)); - } -}; -#endif // !_LIBCPP_HAS_NO_LOCALIZATION - -template <> -struct _PathCVT<__path_value> { - - template - static typename enable_if<__is_exactly_cpp17_input_iterator<_Iter>::value>::type - __append_range(__path_string& __dest, _Iter __b, _Iter __e) { - for (; __b != __e; ++__b) - __dest.push_back(*__b); - } - - template - static typename enable_if<__is_cpp17_forward_iterator<_Iter>::value>::type - __append_range(__path_string& __dest, _Iter __b, _Iter __e) { - __dest.append(__b, __e); - } - - template - static void __append_range(__path_string& __dest, _Iter __b, _NullSentinel) { - const char __sentinel = char{}; - for (; *__b != __sentinel; ++__b) - __dest.push_back(*__b); - } - - template - static void __append_source(__path_string& __dest, _Source const& __s) { - using _Traits = __is_pathable<_Source>; - __append_range(__dest, _Traits::__range_begin(__s), - _Traits::__range_end(__s)); - } -}; - -#if defined(_LIBCPP_WIN32API) -template <> -struct _PathCVT { - - static void - __append_string(__path_string& __dest, const basic_string &__str) { - size_t __size = __char_to_wide(__str, nullptr, 0); - size_t __pos = __dest.size(); - __dest.resize(__pos + __size); - __char_to_wide(__str, const_cast<__path_value*>(__dest.data()) + __pos, __size); - } - - template - static typename enable_if<__is_exactly_cpp17_input_iterator<_Iter>::value>::type - __append_range(__path_string& __dest, _Iter __b, _Iter __e) { - basic_string __tmp(__b, __e); - __append_string(__dest, __tmp); - } - - template - static typename enable_if<__is_cpp17_forward_iterator<_Iter>::value>::type - __append_range(__path_string& __dest, _Iter __b, _Iter __e) { - basic_string __tmp(__b, __e); - __append_string(__dest, __tmp); - } - - template - static void __append_range(__path_string& __dest, _Iter __b, _NullSentinel) { - const char __sentinel = char{}; - basic_string __tmp; - for (; *__b != __sentinel; ++__b) - __tmp.push_back(*__b); - __append_string(__dest, __tmp); - } - - template - static void __append_source(__path_string& __dest, _Source const& __s) { - using _Traits = __is_pathable<_Source>; - __append_range(__dest, _Traits::__range_begin(__s), - _Traits::__range_end(__s)); - } -}; - -template -struct _PathExport { - typedef __narrow_to_utf8 _Narrower; - typedef __widen_from_utf8 _Widener; - - template - static void __append(_Str& __dest, const __path_string& __src) { - string __utf8; - _Narrower()(back_inserter(__utf8), __src.data(), __src.data() + __src.size()); - _Widener()(back_inserter(__dest), __utf8.data(), __utf8.data() + __utf8.size()); - } -}; - -template <> -struct _PathExport { - template - static void __append(_Str& __dest, const __path_string& __src) { - size_t __size = __wide_to_char(__src, nullptr, 0); - size_t __pos = __dest.size(); - __dest.resize(__size); - __wide_to_char(__src, const_cast(__dest.data()) + __pos, __size); - } -}; - -template <> -struct _PathExport { - template - static void __append(_Str& __dest, const __path_string& __src) { - __dest.append(__src.begin(), __src.end()); - } -}; - -template <> -struct _PathExport { - template - static void __append(_Str& __dest, const __path_string& __src) { - __dest.append(__src.begin(), __src.end()); - } -}; - -#ifndef _LIBCPP_HAS_NO_CHAR8_T -template <> -struct _PathExport { - typedef __narrow_to_utf8 _Narrower; - - template - static void __append(_Str& __dest, const __path_string& __src) { - _Narrower()(back_inserter(__dest), __src.data(), __src.data() + __src.size()); - } -}; -#endif /* !_LIBCPP_HAS_NO_CHAR8_T */ -#endif /* _LIBCPP_WIN32API */ - -class _LIBCPP_TYPE_VIS path { - template - using _EnableIfPathable = - typename enable_if<__is_pathable<_SourceOrIter>::value, _Tp>::type; - - template - using _SourceChar = typename __is_pathable<_Tp>::__char_type; - - template - using _SourceCVT = _PathCVT<_SourceChar<_Tp> >; - -public: -#if defined(_LIBCPP_WIN32API) - typedef wchar_t value_type; - static constexpr value_type preferred_separator = L'\\'; -#else - typedef char value_type; - static constexpr value_type preferred_separator = '/'; -#endif - typedef basic_string string_type; - typedef basic_string_view __string_view; - - enum _LIBCPP_ENUM_VIS format : unsigned char { - auto_format, - native_format, - generic_format - }; - - // constructors and destructor - _LIBCPP_INLINE_VISIBILITY path() noexcept {} - _LIBCPP_INLINE_VISIBILITY path(const path& __p) : __pn_(__p.__pn_) {} - _LIBCPP_INLINE_VISIBILITY path(path&& __p) noexcept - : __pn_(_VSTD::move(__p.__pn_)) {} - - _LIBCPP_INLINE_VISIBILITY - path(string_type&& __s, format = format::auto_format) noexcept - : __pn_(_VSTD::move(__s)) {} - - template > - path(const _Source& __src, format = format::auto_format) { - _SourceCVT<_Source>::__append_source(__pn_, __src); - } - - template - path(_InputIt __first, _InputIt __last, format = format::auto_format) { - typedef typename iterator_traits<_InputIt>::value_type _ItVal; - _PathCVT<_ItVal>::__append_range(__pn_, __first, __last); - } - -/* -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) - // TODO Implement locale conversions. - template > - path(const _Source& __src, const locale& __loc, format = format::auto_format); - template - path(_InputIt __first, _InputIt _last, const locale& __loc, - format = format::auto_format); -#endif -*/ - - _LIBCPP_INLINE_VISIBILITY - ~path() = default; - - // assignments - _LIBCPP_INLINE_VISIBILITY - path& operator=(const path& __p) { - __pn_ = __p.__pn_; - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - path& operator=(path&& __p) noexcept { - __pn_ = _VSTD::move(__p.__pn_); - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - path& operator=(string_type&& __s) noexcept { - __pn_ = _VSTD::move(__s); - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - path& assign(string_type&& __s) noexcept { - __pn_ = _VSTD::move(__s); - return *this; - } - - template - _LIBCPP_INLINE_VISIBILITY _EnableIfPathable<_Source> - operator=(const _Source& __src) { - return this->assign(__src); - } - - template - _EnableIfPathable<_Source> assign(const _Source& __src) { - __pn_.clear(); - _SourceCVT<_Source>::__append_source(__pn_, __src); - return *this; - } - - template - path& assign(_InputIt __first, _InputIt __last) { - typedef typename iterator_traits<_InputIt>::value_type _ItVal; - __pn_.clear(); - _PathCVT<_ItVal>::__append_range(__pn_, __first, __last); - return *this; - } - -public: - // appends -#if defined(_LIBCPP_WIN32API) - path& operator/=(const path& __p) { - auto __p_root_name = __p.__root_name(); - auto __p_root_name_size = __p_root_name.size(); - if (__p.is_absolute() || - (!__p_root_name.empty() && __p_root_name != __string_view(root_name().__pn_))) { - __pn_ = __p.__pn_; - return *this; - } - if (__p.has_root_directory()) { - path __root_name_str = root_name(); - __pn_ = __root_name_str.native(); - __pn_ += __string_view(__p.__pn_).substr(__p_root_name_size); - return *this; - } - if (has_filename() || (!has_root_directory() && is_absolute())) - __pn_ += preferred_separator; - __pn_ += __string_view(__p.__pn_).substr(__p_root_name_size); - return *this; - } - template - _LIBCPP_INLINE_VISIBILITY _EnableIfPathable<_Source> - operator/=(const _Source& __src) { - return operator/=(path(__src)); - } - - template - _EnableIfPathable<_Source> append(const _Source& __src) { - return operator/=(path(__src)); - } - - template - path& append(_InputIt __first, _InputIt __last) { - return operator/=(path(__first, __last)); - } -#else - path& operator/=(const path& __p) { - if (__p.is_absolute()) { - __pn_ = __p.__pn_; - return *this; - } - if (has_filename()) - __pn_ += preferred_separator; - __pn_ += __p.native(); - return *this; - } - - // FIXME: Use _LIBCPP_DIAGNOSE_WARNING to produce a diagnostic when __src - // is known at compile time to be "/' since the user almost certainly intended - // to append a separator instead of overwriting the path with "/" - template - _LIBCPP_INLINE_VISIBILITY _EnableIfPathable<_Source> - operator/=(const _Source& __src) { - return this->append(__src); - } - - template - _EnableIfPathable<_Source> append(const _Source& __src) { - using _Traits = __is_pathable<_Source>; - using _CVT = _PathCVT<_SourceChar<_Source> >; - bool __source_is_absolute = __is_separator(_Traits::__first_or_null(__src)); - if (__source_is_absolute) - __pn_.clear(); - else if (has_filename()) - __pn_ += preferred_separator; - _CVT::__append_source(__pn_, __src); - return *this; - } - - template - path& append(_InputIt __first, _InputIt __last) { - typedef typename iterator_traits<_InputIt>::value_type _ItVal; - static_assert(__can_convert_char<_ItVal>::value, "Must convertible"); - using _CVT = _PathCVT<_ItVal>; - if (__first != __last && __is_separator(*__first)) - __pn_.clear(); - else if (has_filename()) - __pn_ += preferred_separator; - _CVT::__append_range(__pn_, __first, __last); - return *this; - } -#endif - - // concatenation - _LIBCPP_INLINE_VISIBILITY - path& operator+=(const path& __x) { - __pn_ += __x.__pn_; - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - path& operator+=(const string_type& __x) { - __pn_ += __x; - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - path& operator+=(__string_view __x) { - __pn_ += __x; - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - path& operator+=(const value_type* __x) { - __pn_ += __x; - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - path& operator+=(value_type __x) { - __pn_ += __x; - return *this; - } - - template - typename enable_if<__can_convert_char<_ECharT>::value, path&>::type - operator+=(_ECharT __x) { - _PathCVT<_ECharT>::__append_source(__pn_, - basic_string_view<_ECharT>(&__x, 1)); - return *this; - } - - template - _EnableIfPathable<_Source> operator+=(const _Source& __x) { - return this->concat(__x); - } - - template - _EnableIfPathable<_Source> concat(const _Source& __x) { - _SourceCVT<_Source>::__append_source(__pn_, __x); - return *this; - } - - template - path& concat(_InputIt __first, _InputIt __last) { - typedef typename iterator_traits<_InputIt>::value_type _ItVal; - _PathCVT<_ItVal>::__append_range(__pn_, __first, __last); - return *this; - } - - // modifiers - _LIBCPP_INLINE_VISIBILITY - void clear() noexcept { __pn_.clear(); } - - path& make_preferred() { -#if defined(_LIBCPP_WIN32API) - _VSTD::replace(__pn_.begin(), __pn_.end(), L'/', L'\\'); -#endif - return *this; - } - - _LIBCPP_INLINE_VISIBILITY - path& remove_filename() { - auto __fname = __filename(); - if (!__fname.empty()) - __pn_.erase(__fname.data() - __pn_.data()); - return *this; - } - - path& replace_filename(const path& __replacement) { - remove_filename(); - return (*this /= __replacement); - } - - path& replace_extension(const path& __replacement = path()); - - _LIBCPP_INLINE_VISIBILITY - void swap(path& __rhs) noexcept { __pn_.swap(__rhs.__pn_); } - - // private helper to allow reserving memory in the path - _LIBCPP_INLINE_VISIBILITY - void __reserve(size_t __s) { __pn_.reserve(__s); } - - // native format observers - _LIBCPP_INLINE_VISIBILITY - const string_type& native() const noexcept { return __pn_; } - - _LIBCPP_INLINE_VISIBILITY - const value_type* c_str() const noexcept { return __pn_.c_str(); } - - _LIBCPP_INLINE_VISIBILITY operator string_type() const { return __pn_; } - -#if defined(_LIBCPP_WIN32API) - _LIBCPP_INLINE_VISIBILITY _VSTD::wstring wstring() const { return __pn_; } - - _VSTD::wstring generic_wstring() const { - _VSTD::wstring __s; - __s.resize(__pn_.size()); - _VSTD::replace_copy(__pn_.begin(), __pn_.end(), __s.begin(), '\\', '/'); - return __s; - } - -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) - template , - class _Allocator = allocator<_ECharT> > - basic_string<_ECharT, _Traits, _Allocator> - string(const _Allocator& __a = _Allocator()) const { - using _Str = basic_string<_ECharT, _Traits, _Allocator>; - _Str __s(__a); - __s.reserve(__pn_.size()); - _PathExport<_ECharT>::__append(__s, __pn_); - return __s; - } - - _LIBCPP_INLINE_VISIBILITY _VSTD::string string() const { - return string(); - } - _LIBCPP_INLINE_VISIBILITY __u8_string u8string() const { - using _CVT = __narrow_to_utf8; - __u8_string __s; - __s.reserve(__pn_.size()); - _CVT()(back_inserter(__s), __pn_.data(), __pn_.data() + __pn_.size()); - return __s; - } - - _LIBCPP_INLINE_VISIBILITY _VSTD::u16string u16string() const { - return string(); - } - _LIBCPP_INLINE_VISIBILITY _VSTD::u32string u32string() const { - return string(); - } - - // generic format observers - template , - class _Allocator = allocator<_ECharT> > - basic_string<_ECharT, _Traits, _Allocator> - generic_string(const _Allocator& __a = _Allocator()) const { - using _Str = basic_string<_ECharT, _Traits, _Allocator>; - _Str __s = string<_ECharT, _Traits, _Allocator>(__a); - // Note: This (and generic_u8string below) is slightly suboptimal as - // it iterates twice over the string; once to convert it to the right - // character type, and once to replace path delimiters. - _VSTD::replace(__s.begin(), __s.end(), - static_cast<_ECharT>('\\'), static_cast<_ECharT>('/')); - return __s; - } - - _VSTD::string generic_string() const { return generic_string(); } - _VSTD::u16string generic_u16string() const { return generic_string(); } - _VSTD::u32string generic_u32string() const { return generic_string(); } - __u8_string generic_u8string() const { - __u8_string __s = u8string(); - _VSTD::replace(__s.begin(), __s.end(), '\\', '/'); - return __s; - } -#endif /* !_LIBCPP_HAS_NO_LOCALIZATION */ -#else /* _LIBCPP_WIN32API */ - - _LIBCPP_INLINE_VISIBILITY _VSTD::string string() const { return __pn_; } -#ifndef _LIBCPP_HAS_NO_CHAR8_T - _LIBCPP_INLINE_VISIBILITY _VSTD::u8string u8string() const { return _VSTD::u8string(__pn_.begin(), __pn_.end()); } -#else - _LIBCPP_INLINE_VISIBILITY _VSTD::string u8string() const { return __pn_; } -#endif - -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) - template , - class _Allocator = allocator<_ECharT> > - basic_string<_ECharT, _Traits, _Allocator> - string(const _Allocator& __a = _Allocator()) const { - using _CVT = __widen_from_utf8; - using _Str = basic_string<_ECharT, _Traits, _Allocator>; - _Str __s(__a); - __s.reserve(__pn_.size()); - _CVT()(back_inserter(__s), __pn_.data(), __pn_.data() + __pn_.size()); - return __s; - } - -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS - _LIBCPP_INLINE_VISIBILITY _VSTD::wstring wstring() const { - return string(); - } -#endif - _LIBCPP_INLINE_VISIBILITY _VSTD::u16string u16string() const { - return string(); - } - _LIBCPP_INLINE_VISIBILITY _VSTD::u32string u32string() const { - return string(); - } -#endif /* !_LIBCPP_HAS_NO_LOCALIZATION */ - - // generic format observers - _VSTD::string generic_string() const { return __pn_; } -#ifndef _LIBCPP_HAS_NO_CHAR8_T - _VSTD::u8string generic_u8string() const { return _VSTD::u8string(__pn_.begin(), __pn_.end()); } -#else - _VSTD::string generic_u8string() const { return __pn_; } -#endif - -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) - template , - class _Allocator = allocator<_ECharT> > - basic_string<_ECharT, _Traits, _Allocator> - generic_string(const _Allocator& __a = _Allocator()) const { - return string<_ECharT, _Traits, _Allocator>(__a); - } - -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS - _VSTD::wstring generic_wstring() const { return string(); } -#endif - _VSTD::u16string generic_u16string() const { return string(); } - _VSTD::u32string generic_u32string() const { return string(); } -#endif /* !_LIBCPP_HAS_NO_LOCALIZATION */ -#endif /* !_LIBCPP_WIN32API */ - -private: - int __compare(__string_view) const; - __string_view __root_name() const; - __string_view __root_directory() const; - __string_view __root_path_raw() const; - __string_view __relative_path() const; - __string_view __parent_path() const; - __string_view __filename() const; - __string_view __stem() const; - __string_view __extension() const; - -public: - // compare - _LIBCPP_INLINE_VISIBILITY int compare(const path& __p) const noexcept { - return __compare(__p.__pn_); - } - _LIBCPP_INLINE_VISIBILITY int compare(const string_type& __s) const { - return __compare(__s); - } - _LIBCPP_INLINE_VISIBILITY int compare(__string_view __s) const { - return __compare(__s); - } - _LIBCPP_INLINE_VISIBILITY int compare(const value_type* __s) const { - return __compare(__s); - } - - // decomposition - _LIBCPP_INLINE_VISIBILITY path root_name() const { - return string_type(__root_name()); - } - _LIBCPP_INLINE_VISIBILITY path root_directory() const { - return string_type(__root_directory()); - } - _LIBCPP_INLINE_VISIBILITY path root_path() const { -#if defined(_LIBCPP_WIN32API) - return string_type(__root_path_raw()); -#else - return root_name().append(string_type(__root_directory())); -#endif - } - _LIBCPP_INLINE_VISIBILITY path relative_path() const { - return string_type(__relative_path()); - } - _LIBCPP_INLINE_VISIBILITY path parent_path() const { - return string_type(__parent_path()); - } - _LIBCPP_INLINE_VISIBILITY path filename() const { - return string_type(__filename()); - } - _LIBCPP_INLINE_VISIBILITY path stem() const { return string_type(__stem()); } - _LIBCPP_INLINE_VISIBILITY path extension() const { - return string_type(__extension()); - } - - // query - _LIBCPP_NODISCARD_AFTER_CXX17 _LIBCPP_INLINE_VISIBILITY bool - empty() const noexcept { - return __pn_.empty(); - } - - _LIBCPP_INLINE_VISIBILITY bool has_root_name() const { - return !__root_name().empty(); - } - _LIBCPP_INLINE_VISIBILITY bool has_root_directory() const { - return !__root_directory().empty(); - } - _LIBCPP_INLINE_VISIBILITY bool has_root_path() const { - return !__root_path_raw().empty(); - } - _LIBCPP_INLINE_VISIBILITY bool has_relative_path() const { - return !__relative_path().empty(); - } - _LIBCPP_INLINE_VISIBILITY bool has_parent_path() const { - return !__parent_path().empty(); - } - _LIBCPP_INLINE_VISIBILITY bool has_filename() const { - return !__filename().empty(); - } - _LIBCPP_INLINE_VISIBILITY bool has_stem() const { return !__stem().empty(); } - _LIBCPP_INLINE_VISIBILITY bool has_extension() const { - return !__extension().empty(); - } - - _LIBCPP_INLINE_VISIBILITY bool is_absolute() const { -#if defined(_LIBCPP_WIN32API) - __string_view __root_name_str = __root_name(); - __string_view __root_dir = __root_directory(); - if (__root_name_str.size() == 2 && __root_name_str[1] == ':') { - // A drive letter with no root directory is relative, e.g. x:example. - return !__root_dir.empty(); - } - // If no root name, it's relative, e.g. \example is relative to the current drive - if (__root_name_str.empty()) - return false; - if (__root_name_str.size() < 3) - return false; - // A server root name, like \\server, is always absolute - if (__root_name_str[0] != '/' && __root_name_str[0] != '\\') - return false; - if (__root_name_str[1] != '/' && __root_name_str[1] != '\\') - return false; - // Seems to be a server root name - return true; -#else - return has_root_directory(); -#endif - } - _LIBCPP_INLINE_VISIBILITY bool is_relative() const { return !is_absolute(); } - - // relative paths - path lexically_normal() const; - path lexically_relative(const path& __base) const; - - _LIBCPP_INLINE_VISIBILITY path lexically_proximate(const path& __base) const { - path __result = this->lexically_relative(__base); - if (__result.native().empty()) - return *this; - return __result; - } - - // iterators - class _LIBCPP_TYPE_VIS iterator; - typedef iterator const_iterator; - - iterator begin() const; - iterator end() const; - -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) - template - _LIBCPP_INLINE_VISIBILITY friend - typename enable_if::value && - is_same<_Traits, char_traits >::value, - basic_ostream<_CharT, _Traits>&>::type - operator<<(basic_ostream<_CharT, _Traits>& __os, const path& __p) { - __os << _VSTD::__quoted(__p.native()); - return __os; - } - - template - _LIBCPP_INLINE_VISIBILITY friend - typename enable_if::value || - !is_same<_Traits, char_traits >::value, - basic_ostream<_CharT, _Traits>&>::type - operator<<(basic_ostream<_CharT, _Traits>& __os, const path& __p) { - __os << _VSTD::__quoted(__p.string<_CharT, _Traits>()); - return __os; - } - - template - _LIBCPP_INLINE_VISIBILITY friend basic_istream<_CharT, _Traits>& - operator>>(basic_istream<_CharT, _Traits>& __is, path& __p) { - basic_string<_CharT, _Traits> __tmp; - __is >> __quoted(__tmp); - __p = __tmp; - return __is; - } -#endif // !_LIBCPP_HAS_NO_LOCALIZATION - - friend _LIBCPP_INLINE_VISIBILITY bool operator==(const path& __lhs, const path& __rhs) noexcept { - return __lhs.__compare(__rhs.__pn_) == 0; - } - friend _LIBCPP_INLINE_VISIBILITY bool operator!=(const path& __lhs, const path& __rhs) noexcept { - return __lhs.__compare(__rhs.__pn_) != 0; - } - friend _LIBCPP_INLINE_VISIBILITY bool operator<(const path& __lhs, const path& __rhs) noexcept { - return __lhs.__compare(__rhs.__pn_) < 0; - } - friend _LIBCPP_INLINE_VISIBILITY bool operator<=(const path& __lhs, const path& __rhs) noexcept { - return __lhs.__compare(__rhs.__pn_) <= 0; - } - friend _LIBCPP_INLINE_VISIBILITY bool operator>(const path& __lhs, const path& __rhs) noexcept { - return __lhs.__compare(__rhs.__pn_) > 0; - } - friend _LIBCPP_INLINE_VISIBILITY bool operator>=(const path& __lhs, const path& __rhs) noexcept { - return __lhs.__compare(__rhs.__pn_) >= 0; - } - - friend _LIBCPP_INLINE_VISIBILITY path operator/(const path& __lhs, - const path& __rhs) { - path __result(__lhs); - __result /= __rhs; - return __result; - } -private: - inline _LIBCPP_INLINE_VISIBILITY path& - __assign_view(__string_view const& __s) noexcept { - __pn_ = string_type(__s); - return *this; - } - string_type __pn_; -}; - -inline _LIBCPP_INLINE_VISIBILITY void swap(path& __lhs, path& __rhs) noexcept { - __lhs.swap(__rhs); -} - -_LIBCPP_FUNC_VIS -size_t hash_value(const path& __p) noexcept; - -template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_DEPRECATED_WITH_CHAR8_T - typename enable_if<__is_pathable<_InputIt>::value, path>::type - u8path(_InputIt __f, _InputIt __l) { - static_assert( -#ifndef _LIBCPP_HAS_NO_CHAR8_T - is_same::__char_type, char8_t>::value || -#endif - is_same::__char_type, char>::value, - "u8path(Iter, Iter) requires Iter have a value_type of type 'char'" - " or 'char8_t'"); -#if defined(_LIBCPP_WIN32API) - string __tmp(__f, __l); - using _CVT = __widen_from_utf8; - _VSTD::wstring __w; - __w.reserve(__tmp.size()); - _CVT()(back_inserter(__w), __tmp.data(), __tmp.data() + __tmp.size()); - return path(__w); -#else - return path(__f, __l); -#endif /* !_LIBCPP_WIN32API */ -} - -#if defined(_LIBCPP_WIN32API) -template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_DEPRECATED_WITH_CHAR8_T - typename enable_if<__is_pathable<_InputIt>::value, path>::type - u8path(_InputIt __f, _NullSentinel) { - static_assert( -#ifndef _LIBCPP_HAS_NO_CHAR8_T - is_same::__char_type, char8_t>::value || -#endif - is_same::__char_type, char>::value, - "u8path(Iter, Iter) requires Iter have a value_type of type 'char'" - " or 'char8_t'"); - string __tmp; - const char __sentinel = char{}; - for (; *__f != __sentinel; ++__f) - __tmp.push_back(*__f); - using _CVT = __widen_from_utf8; - _VSTD::wstring __w; - __w.reserve(__tmp.size()); - _CVT()(back_inserter(__w), __tmp.data(), __tmp.data() + __tmp.size()); - return path(__w); -} -#endif /* _LIBCPP_WIN32API */ - -template -_LIBCPP_INLINE_VISIBILITY _LIBCPP_DEPRECATED_WITH_CHAR8_T - typename enable_if<__is_pathable<_Source>::value, path>::type - u8path(const _Source& __s) { - static_assert( -#ifndef _LIBCPP_HAS_NO_CHAR8_T - is_same::__char_type, char8_t>::value || -#endif - is_same::__char_type, char>::value, - "u8path(Source const&) requires Source have a character type of type " - "'char' or 'char8_t'"); -#if defined(_LIBCPP_WIN32API) - using _Traits = __is_pathable<_Source>; - return u8path(_VSTD::__unwrap_iter(_Traits::__range_begin(__s)), _VSTD::__unwrap_iter(_Traits::__range_end(__s))); -#else - return path(__s); -#endif -} - -class _LIBCPP_TYPE_VIS path::iterator { -public: - enum _ParserState : unsigned char { - _Singular, - _BeforeBegin, - _InRootName, - _InRootDir, - _InFilenames, - _InTrailingSep, - _AtEnd - }; - -public: - typedef bidirectional_iterator_tag iterator_category; - - typedef path value_type; - typedef ptrdiff_t difference_type; - typedef const path* pointer; - typedef const path& reference; - - typedef void - __stashing_iterator_tag; // See reverse_iterator and __is_stashing_iterator - -public: - _LIBCPP_INLINE_VISIBILITY - iterator() - : __stashed_elem_(), __path_ptr_(nullptr), __entry_(), - __state_(_Singular) {} - - iterator(const iterator&) = default; - ~iterator() = default; - - iterator& operator=(const iterator&) = default; - - _LIBCPP_INLINE_VISIBILITY - reference operator*() const { return __stashed_elem_; } - - _LIBCPP_INLINE_VISIBILITY - pointer operator->() const { return &__stashed_elem_; } - - _LIBCPP_INLINE_VISIBILITY - iterator& operator++() { - _LIBCPP_ASSERT(__state_ != _Singular, - "attempting to increment a singular iterator"); - _LIBCPP_ASSERT(__state_ != _AtEnd, - "attempting to increment the end iterator"); - return __increment(); - } - - _LIBCPP_INLINE_VISIBILITY - iterator operator++(int) { - iterator __it(*this); - this->operator++(); - return __it; - } - - _LIBCPP_INLINE_VISIBILITY - iterator& operator--() { - _LIBCPP_ASSERT(__state_ != _Singular, - "attempting to decrement a singular iterator"); - _LIBCPP_ASSERT(__entry_.data() != __path_ptr_->native().data(), - "attempting to decrement the begin iterator"); - return __decrement(); - } - - _LIBCPP_INLINE_VISIBILITY - iterator operator--(int) { - iterator __it(*this); - this->operator--(); - return __it; - } - -private: - friend class path; - - inline _LIBCPP_INLINE_VISIBILITY friend bool operator==(const iterator&, - const iterator&); - - iterator& __increment(); - iterator& __decrement(); - - path __stashed_elem_; - const path* __path_ptr_; - path::__string_view __entry_; - _ParserState __state_; -}; - -inline _LIBCPP_INLINE_VISIBILITY bool operator==(const path::iterator& __lhs, - const path::iterator& __rhs) { - return __lhs.__path_ptr_ == __rhs.__path_ptr_ && - __lhs.__entry_.data() == __rhs.__entry_.data(); -} - -inline _LIBCPP_INLINE_VISIBILITY bool operator!=(const path::iterator& __lhs, - const path::iterator& __rhs) { - return !(__lhs == __rhs); -} - -// TODO(ldionne): We need to pop the pragma and push it again after -// filesystem_error to work around PR41078. -_LIBCPP_AVAILABILITY_FILESYSTEM_POP - -class _LIBCPP_AVAILABILITY_FILESYSTEM _LIBCPP_EXCEPTION_ABI filesystem_error : public system_error { -public: - _LIBCPP_INLINE_VISIBILITY - filesystem_error(const string& __what, error_code __ec) - : system_error(__ec, __what), - __storage_(make_shared<_Storage>(path(), path())) { - __create_what(0); - } - - _LIBCPP_INLINE_VISIBILITY - filesystem_error(const string& __what, const path& __p1, error_code __ec) - : system_error(__ec, __what), - __storage_(make_shared<_Storage>(__p1, path())) { - __create_what(1); - } - - _LIBCPP_INLINE_VISIBILITY - filesystem_error(const string& __what, const path& __p1, const path& __p2, - error_code __ec) - : system_error(__ec, __what), - __storage_(make_shared<_Storage>(__p1, __p2)) { - __create_what(2); - } - - _LIBCPP_INLINE_VISIBILITY - const path& path1() const noexcept { return __storage_->__p1_; } - - _LIBCPP_INLINE_VISIBILITY - const path& path2() const noexcept { return __storage_->__p2_; } - - filesystem_error(const filesystem_error&) = default; - ~filesystem_error() override; // key function - - _LIBCPP_INLINE_VISIBILITY - const char* what() const noexcept override { - return __storage_->__what_.c_str(); - } - - void __create_what(int __num_paths); - -private: - struct _LIBCPP_HIDDEN _Storage { - _LIBCPP_INLINE_VISIBILITY - _Storage(const path& __p1, const path& __p2) : __p1_(__p1), __p2_(__p2) {} - - path __p1_; - path __p2_; - string __what_; - }; - shared_ptr<_Storage> __storage_; -}; - -_LIBCPP_AVAILABILITY_FILESYSTEM_PUSH - -template -_LIBCPP_NORETURN inline _LIBCPP_INLINE_VISIBILITY -#ifndef _LIBCPP_NO_EXCEPTIONS -void __throw_filesystem_error(_Args&&... __args) { - throw filesystem_error(_VSTD::forward<_Args>(__args)...); -} -#else -void __throw_filesystem_error(_Args&&...) { - _VSTD::abort(); -} -#endif - -// operational functions - -_LIBCPP_FUNC_VIS -path __absolute(const path&, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -path __canonical(const path&, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -void __copy(const path& __from, const path& __to, copy_options __opt, - error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -bool __copy_file(const path& __from, const path& __to, copy_options __opt, - error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -void __copy_symlink(const path& __existing_symlink, const path& __new_symlink, - error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -bool __create_directories(const path& p, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS -bool __create_directory(const path& p, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS -bool __create_directory(const path& p, const path& attributes, - error_code* ec = nullptr); -_LIBCPP_FUNC_VIS -void __create_directory_symlink(const path& __to, const path& __new_symlink, - error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -void __create_hard_link(const path& __to, const path& __new_hard_link, - error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -void __create_symlink(const path& __to, const path& __new_symlink, - error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -path __current_path(error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -void __current_path(const path&, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -bool __equivalent(const path&, const path&, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -uintmax_t __file_size(const path&, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -uintmax_t __hard_link_count(const path&, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -bool __fs_is_empty(const path& p, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS -file_time_type __last_write_time(const path& p, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS -void __last_write_time(const path& p, file_time_type new_time, - error_code* ec = nullptr); -_LIBCPP_FUNC_VIS -void __permissions(const path&, perms, perm_options, error_code* = nullptr); -_LIBCPP_FUNC_VIS -path __read_symlink(const path& p, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS -bool __remove(const path& p, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS -uintmax_t __remove_all(const path& p, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS -void __rename(const path& from, const path& to, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS -void __resize_file(const path& p, uintmax_t size, error_code* ec = nullptr); -_LIBCPP_FUNC_VIS -space_info __space(const path&, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -file_status __status(const path&, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -file_status __symlink_status(const path&, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -path __system_complete(const path&, error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -path __temp_directory_path(error_code* __ec = nullptr); -_LIBCPP_FUNC_VIS -path __weakly_canonical(path const& __p, error_code* __ec = nullptr); - -inline _LIBCPP_INLINE_VISIBILITY path current_path() { - return __current_path(); -} - -inline _LIBCPP_INLINE_VISIBILITY path current_path(error_code& __ec) { - return __current_path(&__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void current_path(const path& __p) { - __current_path(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY void current_path(const path& __p, - error_code& __ec) noexcept { - __current_path(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY path absolute(const path& __p) { - return __absolute(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY path absolute(const path& __p, - error_code& __ec) { - return __absolute(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY path canonical(const path& __p) { - return __canonical(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY path canonical(const path& __p, - error_code& __ec) { - return __canonical(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void copy(const path& __from, - const path& __to) { - __copy(__from, __to, copy_options::none); -} - -inline _LIBCPP_INLINE_VISIBILITY void copy(const path& __from, const path& __to, - error_code& __ec) { - __copy(__from, __to, copy_options::none, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void copy(const path& __from, const path& __to, - copy_options __opt) { - __copy(__from, __to, __opt); -} - -inline _LIBCPP_INLINE_VISIBILITY void copy(const path& __from, const path& __to, - copy_options __opt, - error_code& __ec) { - __copy(__from, __to, __opt, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY bool copy_file(const path& __from, - const path& __to) { - return __copy_file(__from, __to, copy_options::none); -} - -inline _LIBCPP_INLINE_VISIBILITY bool -copy_file(const path& __from, const path& __to, error_code& __ec) { - return __copy_file(__from, __to, copy_options::none, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY bool -copy_file(const path& __from, const path& __to, copy_options __opt) { - return __copy_file(__from, __to, __opt); -} - -inline _LIBCPP_INLINE_VISIBILITY bool copy_file(const path& __from, - const path& __to, - copy_options __opt, - error_code& __ec) { - return __copy_file(__from, __to, __opt, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void copy_symlink(const path& __existing, - const path& __new) { - __copy_symlink(__existing, __new); -} - -inline _LIBCPP_INLINE_VISIBILITY void -copy_symlink(const path& __ext, const path& __new, error_code& __ec) noexcept { - __copy_symlink(__ext, __new, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY bool create_directories(const path& __p) { - return __create_directories(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY bool create_directories(const path& __p, - error_code& __ec) { - return __create_directories(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY bool create_directory(const path& __p) { - return __create_directory(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY bool -create_directory(const path& __p, error_code& __ec) noexcept { - return __create_directory(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY bool create_directory(const path& __p, - const path& __attrs) { - return __create_directory(__p, __attrs); -} - -inline _LIBCPP_INLINE_VISIBILITY bool -create_directory(const path& __p, const path& __attrs, - error_code& __ec) noexcept { - return __create_directory(__p, __attrs, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void -create_directory_symlink(const path& __to, const path& __new) { - __create_directory_symlink(__to, __new); -} - -inline _LIBCPP_INLINE_VISIBILITY void -create_directory_symlink(const path& __to, const path& __new, - error_code& __ec) noexcept { - __create_directory_symlink(__to, __new, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void create_hard_link(const path& __to, - const path& __new) { - __create_hard_link(__to, __new); -} - -inline _LIBCPP_INLINE_VISIBILITY void -create_hard_link(const path& __to, const path& __new, - error_code& __ec) noexcept { - __create_hard_link(__to, __new, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void create_symlink(const path& __to, - const path& __new) { - __create_symlink(__to, __new); -} - -inline _LIBCPP_INLINE_VISIBILITY void -create_symlink(const path& __to, const path& __new, error_code& __ec) noexcept { - return __create_symlink(__to, __new, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY bool status_known(file_status __s) noexcept { - return __s.type() != file_type::none; -} - -inline _LIBCPP_INLINE_VISIBILITY bool exists(file_status __s) noexcept { - return status_known(__s) && __s.type() != file_type::not_found; -} - -inline _LIBCPP_INLINE_VISIBILITY bool exists(const path& __p) { - return exists(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool exists(const path& __p, - error_code& __ec) noexcept { - auto __s = __status(__p, &__ec); - if (status_known(__s)) - __ec.clear(); - return exists(__s); -} - -inline _LIBCPP_INLINE_VISIBILITY bool equivalent(const path& __p1, - const path& __p2) { - return __equivalent(__p1, __p2); -} - -inline _LIBCPP_INLINE_VISIBILITY bool -equivalent(const path& __p1, const path& __p2, error_code& __ec) noexcept { - return __equivalent(__p1, __p2, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY uintmax_t file_size(const path& __p) { - return __file_size(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY uintmax_t -file_size(const path& __p, error_code& __ec) noexcept { - return __file_size(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY uintmax_t hard_link_count(const path& __p) { - return __hard_link_count(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY uintmax_t -hard_link_count(const path& __p, error_code& __ec) noexcept { - return __hard_link_count(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_block_file(file_status __s) noexcept { - return __s.type() == file_type::block; -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_block_file(const path& __p) { - return is_block_file(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_block_file(const path& __p, - error_code& __ec) noexcept { - return is_block_file(__status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool -is_character_file(file_status __s) noexcept { - return __s.type() == file_type::character; -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_character_file(const path& __p) { - return is_character_file(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool -is_character_file(const path& __p, error_code& __ec) noexcept { - return is_character_file(__status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_directory(file_status __s) noexcept { - return __s.type() == file_type::directory; -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_directory(const path& __p) { - return is_directory(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_directory(const path& __p, - error_code& __ec) noexcept { - return is_directory(__status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_empty(const path& __p) { - return __fs_is_empty(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_empty(const path& __p, - error_code& __ec) { - return __fs_is_empty(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_fifo(file_status __s) noexcept { - return __s.type() == file_type::fifo; -} -inline _LIBCPP_INLINE_VISIBILITY bool is_fifo(const path& __p) { - return is_fifo(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_fifo(const path& __p, - error_code& __ec) noexcept { - return is_fifo(__status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool -is_regular_file(file_status __s) noexcept { - return __s.type() == file_type::regular; -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_regular_file(const path& __p) { - return is_regular_file(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool -is_regular_file(const path& __p, error_code& __ec) noexcept { - return is_regular_file(__status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_socket(file_status __s) noexcept { - return __s.type() == file_type::socket; -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_socket(const path& __p) { - return is_socket(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_socket(const path& __p, - error_code& __ec) noexcept { - return is_socket(__status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_symlink(file_status __s) noexcept { - return __s.type() == file_type::symlink; -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_symlink(const path& __p) { - return is_symlink(__symlink_status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_symlink(const path& __p, - error_code& __ec) noexcept { - return is_symlink(__symlink_status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_other(file_status __s) noexcept { - return exists(__s) && !is_regular_file(__s) && !is_directory(__s) && - !is_symlink(__s); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_other(const path& __p) { - return is_other(__status(__p)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool is_other(const path& __p, - error_code& __ec) noexcept { - return is_other(__status(__p, &__ec)); -} - -inline _LIBCPP_INLINE_VISIBILITY file_time_type -last_write_time(const path& __p) { - return __last_write_time(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY file_time_type -last_write_time(const path& __p, error_code& __ec) noexcept { - return __last_write_time(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void last_write_time(const path& __p, - file_time_type __t) { - __last_write_time(__p, __t); -} - -inline _LIBCPP_INLINE_VISIBILITY void -last_write_time(const path& __p, file_time_type __t, - error_code& __ec) noexcept { - __last_write_time(__p, __t, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void -permissions(const path& __p, perms __prms, - perm_options __opts = perm_options::replace) { - __permissions(__p, __prms, __opts); -} - -inline _LIBCPP_INLINE_VISIBILITY void permissions(const path& __p, perms __prms, - error_code& __ec) noexcept { - __permissions(__p, __prms, perm_options::replace, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void permissions(const path& __p, perms __prms, - perm_options __opts, - error_code& __ec) { - __permissions(__p, __prms, __opts, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY path proximate(const path& __p, - const path& __base, - error_code& __ec) { - path __tmp = __weakly_canonical(__p, &__ec); - if (__ec) - return {}; - path __tmp_base = __weakly_canonical(__base, &__ec); - if (__ec) - return {}; - return __tmp.lexically_proximate(__tmp_base); -} - -inline _LIBCPP_INLINE_VISIBILITY path proximate(const path& __p, - error_code& __ec) { - return proximate(__p, current_path(), __ec); -} - -inline _LIBCPP_INLINE_VISIBILITY path -proximate(const path& __p, const path& __base = current_path()) { - return __weakly_canonical(__p).lexically_proximate( - __weakly_canonical(__base)); -} - -inline _LIBCPP_INLINE_VISIBILITY path read_symlink(const path& __p) { - return __read_symlink(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY path read_symlink(const path& __p, - error_code& __ec) { - return __read_symlink(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY path relative(const path& __p, - const path& __base, - error_code& __ec) { - path __tmp = __weakly_canonical(__p, &__ec); - if (__ec) - return path(); - path __tmpbase = __weakly_canonical(__base, &__ec); - if (__ec) - return path(); - return __tmp.lexically_relative(__tmpbase); -} - -inline _LIBCPP_INLINE_VISIBILITY path relative(const path& __p, - error_code& __ec) { - return relative(__p, current_path(), __ec); -} - -inline _LIBCPP_INLINE_VISIBILITY path -relative(const path& __p, const path& __base = current_path()) { - return __weakly_canonical(__p).lexically_relative(__weakly_canonical(__base)); -} - -inline _LIBCPP_INLINE_VISIBILITY bool remove(const path& __p) { - return __remove(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY bool remove(const path& __p, - error_code& __ec) noexcept { - return __remove(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY uintmax_t remove_all(const path& __p) { - return __remove_all(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY uintmax_t remove_all(const path& __p, - error_code& __ec) { - return __remove_all(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void rename(const path& __from, - const path& __to) { - return __rename(__from, __to); -} - -inline _LIBCPP_INLINE_VISIBILITY void -rename(const path& __from, const path& __to, error_code& __ec) noexcept { - return __rename(__from, __to, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY void resize_file(const path& __p, - uintmax_t __ns) { - return __resize_file(__p, __ns); -} - -inline _LIBCPP_INLINE_VISIBILITY void -resize_file(const path& __p, uintmax_t __ns, error_code& __ec) noexcept { - return __resize_file(__p, __ns, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY space_info space(const path& __p) { - return __space(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY space_info space(const path& __p, - error_code& __ec) noexcept { - return __space(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY file_status status(const path& __p) { - return __status(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY file_status status(const path& __p, - error_code& __ec) noexcept { - return __status(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY file_status symlink_status(const path& __p) { - return __symlink_status(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY file_status -symlink_status(const path& __p, error_code& __ec) noexcept { - return __symlink_status(__p, &__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY path temp_directory_path() { - return __temp_directory_path(); -} - -inline _LIBCPP_INLINE_VISIBILITY path temp_directory_path(error_code& __ec) { - return __temp_directory_path(&__ec); -} - -inline _LIBCPP_INLINE_VISIBILITY path weakly_canonical(path const& __p) { - return __weakly_canonical(__p); -} - -inline _LIBCPP_INLINE_VISIBILITY path weakly_canonical(path const& __p, - error_code& __ec) { - return __weakly_canonical(__p, &__ec); -} - -class directory_iterator; -class recursive_directory_iterator; -class _LIBCPP_HIDDEN __dir_stream; - -class directory_entry { - typedef _VSTD_FS::path _Path; - -public: - // constructors and destructors - directory_entry() noexcept = default; - directory_entry(directory_entry const&) = default; - directory_entry(directory_entry&&) noexcept = default; - - _LIBCPP_INLINE_VISIBILITY - explicit directory_entry(_Path const& __p) : __p_(__p) { - error_code __ec; - __refresh(&__ec); - } - - _LIBCPP_INLINE_VISIBILITY - directory_entry(_Path const& __p, error_code& __ec) : __p_(__p) { - __refresh(&__ec); - } - - ~directory_entry() {} - - directory_entry& operator=(directory_entry const&) = default; - directory_entry& operator=(directory_entry&&) noexcept = default; - - _LIBCPP_INLINE_VISIBILITY - void assign(_Path const& __p) { - __p_ = __p; - error_code __ec; - __refresh(&__ec); - } - - _LIBCPP_INLINE_VISIBILITY - void assign(_Path const& __p, error_code& __ec) { - __p_ = __p; - __refresh(&__ec); - } - - _LIBCPP_INLINE_VISIBILITY - void replace_filename(_Path const& __p) { - __p_.replace_filename(__p); - error_code __ec; - __refresh(&__ec); - } - - _LIBCPP_INLINE_VISIBILITY - void replace_filename(_Path const& __p, error_code& __ec) { - __p_ = __p_.parent_path() / __p; - __refresh(&__ec); - } - - _LIBCPP_INLINE_VISIBILITY - void refresh() { __refresh(); } - - _LIBCPP_INLINE_VISIBILITY - void refresh(error_code& __ec) noexcept { __refresh(&__ec); } - - _LIBCPP_INLINE_VISIBILITY - _Path const& path() const noexcept { return __p_; } - - _LIBCPP_INLINE_VISIBILITY - operator const _Path&() const noexcept { return __p_; } - - _LIBCPP_INLINE_VISIBILITY - bool exists() const { return _VSTD_FS::exists(file_status{__get_ft()}); } - - _LIBCPP_INLINE_VISIBILITY - bool exists(error_code& __ec) const noexcept { - return _VSTD_FS::exists(file_status{__get_ft(&__ec)}); - } - - _LIBCPP_INLINE_VISIBILITY - bool is_block_file() const { return __get_ft() == file_type::block; } - - _LIBCPP_INLINE_VISIBILITY - bool is_block_file(error_code& __ec) const noexcept { - return __get_ft(&__ec) == file_type::block; - } - - _LIBCPP_INLINE_VISIBILITY - bool is_character_file() const { return __get_ft() == file_type::character; } - - _LIBCPP_INLINE_VISIBILITY - bool is_character_file(error_code& __ec) const noexcept { - return __get_ft(&__ec) == file_type::character; - } - - _LIBCPP_INLINE_VISIBILITY - bool is_directory() const { return __get_ft() == file_type::directory; } - - _LIBCPP_INLINE_VISIBILITY - bool is_directory(error_code& __ec) const noexcept { - return __get_ft(&__ec) == file_type::directory; - } - - _LIBCPP_INLINE_VISIBILITY - bool is_fifo() const { return __get_ft() == file_type::fifo; } - - _LIBCPP_INLINE_VISIBILITY - bool is_fifo(error_code& __ec) const noexcept { - return __get_ft(&__ec) == file_type::fifo; - } - - _LIBCPP_INLINE_VISIBILITY - bool is_other() const { return _VSTD_FS::is_other(file_status{__get_ft()}); } - - _LIBCPP_INLINE_VISIBILITY - bool is_other(error_code& __ec) const noexcept { - return _VSTD_FS::is_other(file_status{__get_ft(&__ec)}); - } - - _LIBCPP_INLINE_VISIBILITY - bool is_regular_file() const { return __get_ft() == file_type::regular; } - - _LIBCPP_INLINE_VISIBILITY - bool is_regular_file(error_code& __ec) const noexcept { - return __get_ft(&__ec) == file_type::regular; - } - - _LIBCPP_INLINE_VISIBILITY - bool is_socket() const { return __get_ft() == file_type::socket; } - - _LIBCPP_INLINE_VISIBILITY - bool is_socket(error_code& __ec) const noexcept { - return __get_ft(&__ec) == file_type::socket; - } - - _LIBCPP_INLINE_VISIBILITY - bool is_symlink() const { return __get_sym_ft() == file_type::symlink; } - - _LIBCPP_INLINE_VISIBILITY - bool is_symlink(error_code& __ec) const noexcept { - return __get_sym_ft(&__ec) == file_type::symlink; - } - _LIBCPP_INLINE_VISIBILITY - uintmax_t file_size() const { return __get_size(); } - - _LIBCPP_INLINE_VISIBILITY - uintmax_t file_size(error_code& __ec) const noexcept { - return __get_size(&__ec); - } - - _LIBCPP_INLINE_VISIBILITY - uintmax_t hard_link_count() const { return __get_nlink(); } - - _LIBCPP_INLINE_VISIBILITY - uintmax_t hard_link_count(error_code& __ec) const noexcept { - return __get_nlink(&__ec); - } - - _LIBCPP_INLINE_VISIBILITY - file_time_type last_write_time() const { return __get_write_time(); } - - _LIBCPP_INLINE_VISIBILITY - file_time_type last_write_time(error_code& __ec) const noexcept { - return __get_write_time(&__ec); - } - - _LIBCPP_INLINE_VISIBILITY - file_status status() const { return __get_status(); } - - _LIBCPP_INLINE_VISIBILITY - file_status status(error_code& __ec) const noexcept { - return __get_status(&__ec); - } - - _LIBCPP_INLINE_VISIBILITY - file_status symlink_status() const { return __get_symlink_status(); } - - _LIBCPP_INLINE_VISIBILITY - file_status symlink_status(error_code& __ec) const noexcept { - return __get_symlink_status(&__ec); - } - - _LIBCPP_INLINE_VISIBILITY - bool operator<(directory_entry const& __rhs) const noexcept { - return __p_ < __rhs.__p_; - } - - _LIBCPP_INLINE_VISIBILITY - bool operator==(directory_entry const& __rhs) const noexcept { - return __p_ == __rhs.__p_; - } - - _LIBCPP_INLINE_VISIBILITY - bool operator!=(directory_entry const& __rhs) const noexcept { - return __p_ != __rhs.__p_; - } - - _LIBCPP_INLINE_VISIBILITY - bool operator<=(directory_entry const& __rhs) const noexcept { - return __p_ <= __rhs.__p_; - } - - _LIBCPP_INLINE_VISIBILITY - bool operator>(directory_entry const& __rhs) const noexcept { - return __p_ > __rhs.__p_; - } - - _LIBCPP_INLINE_VISIBILITY - bool operator>=(directory_entry const& __rhs) const noexcept { - return __p_ >= __rhs.__p_; - } - -private: - friend class directory_iterator; - friend class recursive_directory_iterator; - friend class __dir_stream; - - enum _CacheType : unsigned char { - _Empty, - _IterSymlink, - _IterNonSymlink, - _RefreshSymlink, - _RefreshSymlinkUnresolved, - _RefreshNonSymlink - }; - - struct __cached_data { - uintmax_t __size_; - uintmax_t __nlink_; - file_time_type __write_time_; - perms __sym_perms_; - perms __non_sym_perms_; - file_type __type_; - _CacheType __cache_type_; - - _LIBCPP_INLINE_VISIBILITY - __cached_data() noexcept { __reset(); } - - _LIBCPP_INLINE_VISIBILITY - void __reset() { - __cache_type_ = _Empty; - __type_ = file_type::none; - __sym_perms_ = __non_sym_perms_ = perms::unknown; - __size_ = __nlink_ = uintmax_t(-1); - __write_time_ = file_time_type::min(); - } - }; - - _LIBCPP_INLINE_VISIBILITY - static __cached_data __create_iter_result(file_type __ft) { - __cached_data __data; - __data.__type_ = __ft; - __data.__cache_type_ = [&]() { - switch (__ft) { - case file_type::none: - return _Empty; - case file_type::symlink: - return _IterSymlink; - default: - return _IterNonSymlink; - } - }(); - return __data; - } - - _LIBCPP_INLINE_VISIBILITY - void __assign_iter_entry(_Path&& __p, __cached_data __dt) { - __p_ = _VSTD::move(__p); - __data_ = __dt; - } - - _LIBCPP_FUNC_VIS - error_code __do_refresh() noexcept; - - _LIBCPP_INLINE_VISIBILITY - static bool __is_dne_error(error_code const& __ec) { - if (!__ec) - return true; - switch (static_cast(__ec.value())) { - case errc::no_such_file_or_directory: - case errc::not_a_directory: - return true; - default: - return false; - } - } - - _LIBCPP_INLINE_VISIBILITY - void __handle_error(const char* __msg, error_code* __dest_ec, - error_code const& __ec, bool __allow_dne = false) const { - if (__dest_ec) { - *__dest_ec = __ec; - return; - } - if (__ec && (!__allow_dne || !__is_dne_error(__ec))) - __throw_filesystem_error(__msg, __p_, __ec); - } - - _LIBCPP_INLINE_VISIBILITY - void __refresh(error_code* __ec = nullptr) { - __handle_error("in directory_entry::refresh", __ec, __do_refresh(), - /*allow_dne*/ true); - } - - _LIBCPP_INLINE_VISIBILITY - file_type __get_sym_ft(error_code* __ec = nullptr) const { - switch (__data_.__cache_type_) { - case _Empty: - return __symlink_status(__p_, __ec).type(); - case _IterSymlink: - case _RefreshSymlink: - case _RefreshSymlinkUnresolved: - if (__ec) - __ec->clear(); - return file_type::symlink; - case _IterNonSymlink: - case _RefreshNonSymlink: - file_status __st(__data_.__type_); - if (__ec && !_VSTD_FS::exists(__st)) - *__ec = make_error_code(errc::no_such_file_or_directory); - else if (__ec) - __ec->clear(); - return __data_.__type_; - } - _LIBCPP_UNREACHABLE(); - } - - _LIBCPP_INLINE_VISIBILITY - file_type __get_ft(error_code* __ec = nullptr) const { - switch (__data_.__cache_type_) { - case _Empty: - case _IterSymlink: - case _RefreshSymlinkUnresolved: - return __status(__p_, __ec).type(); - case _IterNonSymlink: - case _RefreshNonSymlink: - case _RefreshSymlink: { - file_status __st(__data_.__type_); - if (__ec && !_VSTD_FS::exists(__st)) - *__ec = make_error_code(errc::no_such_file_or_directory); - else if (__ec) - __ec->clear(); - return __data_.__type_; - } - } - _LIBCPP_UNREACHABLE(); - } - - _LIBCPP_INLINE_VISIBILITY - file_status __get_status(error_code* __ec = nullptr) const { - switch (__data_.__cache_type_) { - case _Empty: - case _IterNonSymlink: - case _IterSymlink: - case _RefreshSymlinkUnresolved: - return __status(__p_, __ec); - case _RefreshNonSymlink: - case _RefreshSymlink: - return file_status(__get_ft(__ec), __data_.__non_sym_perms_); - } - _LIBCPP_UNREACHABLE(); - } - - _LIBCPP_INLINE_VISIBILITY - file_status __get_symlink_status(error_code* __ec = nullptr) const { - switch (__data_.__cache_type_) { - case _Empty: - case _IterNonSymlink: - case _IterSymlink: - return __symlink_status(__p_, __ec); - case _RefreshNonSymlink: - return file_status(__get_sym_ft(__ec), __data_.__non_sym_perms_); - case _RefreshSymlink: - case _RefreshSymlinkUnresolved: - return file_status(__get_sym_ft(__ec), __data_.__sym_perms_); - } - _LIBCPP_UNREACHABLE(); - } - - _LIBCPP_INLINE_VISIBILITY - uintmax_t __get_size(error_code* __ec = nullptr) const { - switch (__data_.__cache_type_) { - case _Empty: - case _IterNonSymlink: - case _IterSymlink: - case _RefreshSymlinkUnresolved: - return _VSTD_FS::__file_size(__p_, __ec); - case _RefreshSymlink: - case _RefreshNonSymlink: { - error_code __m_ec; - file_status __st(__get_ft(&__m_ec)); - __handle_error("in directory_entry::file_size", __ec, __m_ec); - if (_VSTD_FS::exists(__st) && !_VSTD_FS::is_regular_file(__st)) { - errc __err_kind = _VSTD_FS::is_directory(__st) ? errc::is_a_directory - : errc::not_supported; - __handle_error("in directory_entry::file_size", __ec, - make_error_code(__err_kind)); - } - return __data_.__size_; - } - } - _LIBCPP_UNREACHABLE(); - } - - _LIBCPP_INLINE_VISIBILITY - uintmax_t __get_nlink(error_code* __ec = nullptr) const { - switch (__data_.__cache_type_) { - case _Empty: - case _IterNonSymlink: - case _IterSymlink: - case _RefreshSymlinkUnresolved: - return _VSTD_FS::__hard_link_count(__p_, __ec); - case _RefreshSymlink: - case _RefreshNonSymlink: { - error_code __m_ec; - (void)__get_ft(&__m_ec); - __handle_error("in directory_entry::hard_link_count", __ec, __m_ec); - return __data_.__nlink_; - } - } - _LIBCPP_UNREACHABLE(); - } - - _LIBCPP_INLINE_VISIBILITY - file_time_type __get_write_time(error_code* __ec = nullptr) const { - switch (__data_.__cache_type_) { - case _Empty: - case _IterNonSymlink: - case _IterSymlink: - case _RefreshSymlinkUnresolved: - return _VSTD_FS::__last_write_time(__p_, __ec); - case _RefreshSymlink: - case _RefreshNonSymlink: { - error_code __m_ec; - file_status __st(__get_ft(&__m_ec)); - __handle_error("in directory_entry::last_write_time", __ec, __m_ec); - if (_VSTD_FS::exists(__st) && - __data_.__write_time_ == file_time_type::min()) - __handle_error("in directory_entry::last_write_time", __ec, - make_error_code(errc::value_too_large)); - return __data_.__write_time_; - } - } - _LIBCPP_UNREACHABLE(); - } - -private: - _Path __p_; - __cached_data __data_; -}; - -class __dir_element_proxy { -public: - inline _LIBCPP_INLINE_VISIBILITY directory_entry operator*() { - return _VSTD::move(__elem_); - } - -private: - friend class directory_iterator; - friend class recursive_directory_iterator; - explicit __dir_element_proxy(directory_entry const& __e) : __elem_(__e) {} - __dir_element_proxy(__dir_element_proxy&& __o) - : __elem_(_VSTD::move(__o.__elem_)) {} - directory_entry __elem_; -}; - -class directory_iterator { -public: - typedef directory_entry value_type; - typedef ptrdiff_t difference_type; - typedef value_type const* pointer; - typedef value_type const& reference; - typedef input_iterator_tag iterator_category; - -public: - //ctor & dtor - directory_iterator() noexcept {} - - explicit directory_iterator(const path& __p) - : directory_iterator(__p, nullptr) {} - - directory_iterator(const path& __p, directory_options __opts) - : directory_iterator(__p, nullptr, __opts) {} - - directory_iterator(const path& __p, error_code& __ec) - : directory_iterator(__p, &__ec) {} - - directory_iterator(const path& __p, directory_options __opts, - error_code& __ec) - : directory_iterator(__p, &__ec, __opts) {} - - directory_iterator(const directory_iterator&) = default; - directory_iterator(directory_iterator&&) = default; - directory_iterator& operator=(const directory_iterator&) = default; - - directory_iterator& operator=(directory_iterator&& __o) noexcept { - // non-default implementation provided to support self-move assign. - if (this != &__o) { - __imp_ = _VSTD::move(__o.__imp_); - } - return *this; - } - - ~directory_iterator() = default; - - const directory_entry& operator*() const { - _LIBCPP_ASSERT(__imp_, "The end iterator cannot be dereferenced"); - return __dereference(); - } - - const directory_entry* operator->() const { return &**this; } - - directory_iterator& operator++() { return __increment(); } - - __dir_element_proxy operator++(int) { - __dir_element_proxy __p(**this); - __increment(); - return __p; - } - - directory_iterator& increment(error_code& __ec) { return __increment(&__ec); } - -private: - inline _LIBCPP_INLINE_VISIBILITY friend bool - operator==(const directory_iterator& __lhs, - const directory_iterator& __rhs) noexcept; - - // construct the dir_stream - _LIBCPP_FUNC_VIS - directory_iterator(const path&, error_code*, - directory_options = directory_options::none); - - _LIBCPP_FUNC_VIS - directory_iterator& __increment(error_code* __ec = nullptr); - - _LIBCPP_FUNC_VIS - const directory_entry& __dereference() const; - -private: - shared_ptr<__dir_stream> __imp_; -}; - -inline _LIBCPP_INLINE_VISIBILITY bool -operator==(const directory_iterator& __lhs, - const directory_iterator& __rhs) noexcept { - return __lhs.__imp_ == __rhs.__imp_; -} - -inline _LIBCPP_INLINE_VISIBILITY bool -operator!=(const directory_iterator& __lhs, - const directory_iterator& __rhs) noexcept { - return !(__lhs == __rhs); -} - -// enable directory_iterator range-based for statements -inline _LIBCPP_INLINE_VISIBILITY directory_iterator -begin(directory_iterator __iter) noexcept { - return __iter; -} - -inline _LIBCPP_INLINE_VISIBILITY directory_iterator -end(directory_iterator) noexcept { - return directory_iterator(); -} - -class recursive_directory_iterator { -public: - using value_type = directory_entry; - using difference_type = ptrdiff_t; - using pointer = directory_entry const*; - using reference = directory_entry const&; - using iterator_category = input_iterator_tag; - -public: - // constructors and destructor - _LIBCPP_INLINE_VISIBILITY - recursive_directory_iterator() noexcept : __rec_(false) {} - - _LIBCPP_INLINE_VISIBILITY - explicit recursive_directory_iterator( - const path& __p, directory_options __xoptions = directory_options::none) - : recursive_directory_iterator(__p, __xoptions, nullptr) {} - - _LIBCPP_INLINE_VISIBILITY - recursive_directory_iterator(const path& __p, directory_options __xoptions, - error_code& __ec) - : recursive_directory_iterator(__p, __xoptions, &__ec) {} - - _LIBCPP_INLINE_VISIBILITY - recursive_directory_iterator(const path& __p, error_code& __ec) - : recursive_directory_iterator(__p, directory_options::none, &__ec) {} - - recursive_directory_iterator(const recursive_directory_iterator&) = default; - recursive_directory_iterator(recursive_directory_iterator&&) = default; - - recursive_directory_iterator& - operator=(const recursive_directory_iterator&) = default; - - _LIBCPP_INLINE_VISIBILITY - recursive_directory_iterator& - operator=(recursive_directory_iterator&& __o) noexcept { - // non-default implementation provided to support self-move assign. - if (this != &__o) { - __imp_ = _VSTD::move(__o.__imp_); - __rec_ = __o.__rec_; - } - return *this; - } - - ~recursive_directory_iterator() = default; - - _LIBCPP_INLINE_VISIBILITY - const directory_entry& operator*() const { return __dereference(); } - - _LIBCPP_INLINE_VISIBILITY - const directory_entry* operator->() const { return &__dereference(); } - - recursive_directory_iterator& operator++() { return __increment(); } - - _LIBCPP_INLINE_VISIBILITY - __dir_element_proxy operator++(int) { - __dir_element_proxy __p(**this); - __increment(); - return __p; - } - - _LIBCPP_INLINE_VISIBILITY - recursive_directory_iterator& increment(error_code& __ec) { - return __increment(&__ec); - } - - _LIBCPP_FUNC_VIS directory_options options() const; - _LIBCPP_FUNC_VIS int depth() const; - - _LIBCPP_INLINE_VISIBILITY - void pop() { __pop(); } - - _LIBCPP_INLINE_VISIBILITY - void pop(error_code& __ec) { __pop(&__ec); } - - _LIBCPP_INLINE_VISIBILITY - bool recursion_pending() const { return __rec_; } - - _LIBCPP_INLINE_VISIBILITY - void disable_recursion_pending() { __rec_ = false; } - -private: - _LIBCPP_FUNC_VIS - recursive_directory_iterator(const path& __p, directory_options __opt, - error_code* __ec); - - _LIBCPP_FUNC_VIS - const directory_entry& __dereference() const; - - _LIBCPP_FUNC_VIS - bool __try_recursion(error_code* __ec); - - _LIBCPP_FUNC_VIS - void __advance(error_code* __ec = nullptr); - - _LIBCPP_FUNC_VIS - recursive_directory_iterator& __increment(error_code* __ec = nullptr); - - _LIBCPP_FUNC_VIS - void __pop(error_code* __ec = nullptr); - - inline _LIBCPP_INLINE_VISIBILITY friend bool - operator==(const recursive_directory_iterator&, - const recursive_directory_iterator&) noexcept; - - struct _LIBCPP_HIDDEN __shared_imp; - shared_ptr<__shared_imp> __imp_; - bool __rec_; -}; // class recursive_directory_iterator - -inline _LIBCPP_INLINE_VISIBILITY bool -operator==(const recursive_directory_iterator& __lhs, - const recursive_directory_iterator& __rhs) noexcept { - return __lhs.__imp_ == __rhs.__imp_; -} - -_LIBCPP_INLINE_VISIBILITY -inline bool operator!=(const recursive_directory_iterator& __lhs, - const recursive_directory_iterator& __rhs) noexcept { - return !(__lhs == __rhs); -} -// enable recursive_directory_iterator range-based for statements -inline _LIBCPP_INLINE_VISIBILITY recursive_directory_iterator -begin(recursive_directory_iterator __iter) noexcept { - return __iter; -} - -inline _LIBCPP_INLINE_VISIBILITY recursive_directory_iterator -end(recursive_directory_iterator) noexcept { - return recursive_directory_iterator(); -} - -_LIBCPP_AVAILABILITY_FILESYSTEM_POP - -_LIBCPP_END_NAMESPACE_FILESYSTEM - -#if !defined(_LIBCPP_HAS_NO_RANGES) -template <> -_LIBCPP_AVAILABILITY_FILESYSTEM -inline constexpr bool _VSTD::ranges::enable_borrowed_range<_VSTD_FS::directory_iterator> = true; -template <> -_LIBCPP_AVAILABILITY_FILESYSTEM -inline constexpr bool _VSTD::ranges::enable_borrowed_range<_VSTD_FS::recursive_directory_iterator> = true; - -template <> -_LIBCPP_AVAILABILITY_FILESYSTEM -inline constexpr bool _VSTD::ranges::enable_view<_VSTD_FS::directory_iterator> = true; -template <> -_LIBCPP_AVAILABILITY_FILESYSTEM -inline constexpr bool _VSTD::ranges::enable_view<_VSTD_FS::recursive_directory_iterator> = true; -#endif - -#endif // !_LIBCPP_CXX03_LANG - -_LIBCPP_POP_MACROS - #endif // _LIBCPP_FILESYSTEM diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index fa4170ba1ed3..4194a7fb83fe 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -450,6 +450,25 @@ module std [system] { module filesystem { header "filesystem" export * + + module __filesystem { + module copy_options { private header "__filesystem/copy_options.h" } + module directory_entry { private header "__filesystem/directory_entry.h" } + module directory_iterator { private header "__filesystem/directory_iterator.h" } + module directory_options { private header "__filesystem/directory_options.h" } + module file_status { private header "__filesystem/file_status.h" } + module file_time_type { private header "__filesystem/file_time_type.h" } + module file_type { private header "__filesystem/file_type.h" } + module filesystem_error { private header "__filesystem/filesystem_error.h" } + module operations { private header "__filesystem/operations.h" } + module path_iterator { private header "__filesystem/path_iterator.h" } + module path { private header "__filesystem/path.h" } + module perm_options { private header "__filesystem/perm_options.h" } + module perms { private header "__filesystem/perms.h" } + module recursive_directory_iterator { private header "__filesystem/recursive_directory_iterator.h" } + module space_info { private header "__filesystem/space_info.h" } + module u8path { private header "__filesystem/u8path.h" } + } } module format { header "format" diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/copy_options.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/copy_options.module.verify.cpp new file mode 100644 index 000000000000..064a034a9dd2 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/copy_options.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/copy_options.h'}} +#include <__filesystem/copy_options.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/directory_entry.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/directory_entry.module.verify.cpp new file mode 100644 index 000000000000..6fef616cd53b --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/directory_entry.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/directory_entry.h'}} +#include <__filesystem/directory_entry.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/directory_iterator.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/directory_iterator.module.verify.cpp new file mode 100644 index 000000000000..6d01cd743d85 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/directory_iterator.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/directory_iterator.h'}} +#include <__filesystem/directory_iterator.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/directory_options.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/directory_options.module.verify.cpp new file mode 100644 index 000000000000..2316023813f0 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/directory_options.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/directory_options.h'}} +#include <__filesystem/directory_options.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/file_status.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/file_status.module.verify.cpp new file mode 100644 index 000000000000..9419bdb2d573 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/file_status.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/file_status.h'}} +#include <__filesystem/file_status.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/file_time_type.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/file_time_type.module.verify.cpp new file mode 100644 index 000000000000..ac940261c096 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/file_time_type.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/file_time_type.h'}} +#include <__filesystem/file_time_type.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/file_type.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/file_type.module.verify.cpp new file mode 100644 index 000000000000..70b5d9792c45 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/file_type.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/file_type.h'}} +#include <__filesystem/file_type.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/filesystem_error.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/filesystem_error.module.verify.cpp new file mode 100644 index 000000000000..095a71569ff5 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/filesystem_error.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/filesystem_error.h'}} +#include <__filesystem/filesystem_error.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/operations.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/operations.module.verify.cpp new file mode 100644 index 000000000000..dc156cd5145b --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/operations.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/operations.h'}} +#include <__filesystem/operations.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/path.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/path.module.verify.cpp new file mode 100644 index 000000000000..f232bf7b98f8 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/path.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/path.h'}} +#include <__filesystem/path.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/path_iterator.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/path_iterator.module.verify.cpp new file mode 100644 index 000000000000..71e1928d2508 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/path_iterator.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/path_iterator.h'}} +#include <__filesystem/path_iterator.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/perm_options.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/perm_options.module.verify.cpp new file mode 100644 index 000000000000..8a8c25edea98 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/perm_options.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/perm_options.h'}} +#include <__filesystem/perm_options.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/perms.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/perms.module.verify.cpp new file mode 100644 index 000000000000..0df582305e8a --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/perms.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/perms.h'}} +#include <__filesystem/perms.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/recursive_directory_iterator.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/recursive_directory_iterator.module.verify.cpp new file mode 100644 index 000000000000..73b7de8f8196 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/recursive_directory_iterator.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/recursive_directory_iterator.h'}} +#include <__filesystem/recursive_directory_iterator.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/space_info.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/space_info.module.verify.cpp new file mode 100644 index 000000000000..d5f7586ca5d2 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/space_info.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/space_info.h'}} +#include <__filesystem/space_info.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/u8path.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/u8path.module.verify.cpp new file mode 100644 index 000000000000..5f54ab6908f2 --- /dev/null +++ b/libcxx/test/libcxx/diagnostics/detail.headers/filesystem/u8path.module.verify.cpp @@ -0,0 +1,15 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: modules-build + +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. + +// expected-error@*:* {{use of private header from outside its module: '__filesystem/u8path.h'}} +#include <__filesystem/u8path.h> From 19030717e38efb531934cfe4edb4864e3271411d Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 22 Dec 2021 01:35:25 +0000 Subject: [PATCH 020/293] [gn build] Port 7056250f517a --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index b356af422f78..80d4c15503af 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -201,6 +201,22 @@ if (current_toolchain == default_toolchain) { "__coroutine/trivial_awaitables.h", "__debug", "__errc", + "__filesystem/copy_options.h", + "__filesystem/directory_entry.h", + "__filesystem/directory_iterator.h", + "__filesystem/directory_options.h", + "__filesystem/file_status.h", + "__filesystem/file_time_type.h", + "__filesystem/file_type.h", + "__filesystem/filesystem_error.h", + "__filesystem/operations.h", + "__filesystem/path.h", + "__filesystem/path_iterator.h", + "__filesystem/perm_options.h", + "__filesystem/perms.h", + "__filesystem/recursive_directory_iterator.h", + "__filesystem/space_info.h", + "__filesystem/u8path.h", "__format/format_arg.h", "__format/format_args.h", "__format/format_context.h", From 759f3e297c072165d044a3ac6fa3fc2f30df86ab Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Wed, 22 Dec 2021 11:09:08 +0800 Subject: [PATCH 021/293] [NFC] [C++20] [Modules] Add tests for template instantiation in transitively imported module This commit adds two test about template class instantiation in transitively imported module. They are used as pre-commit tests for successive patches. Differential Revision: https://reviews.llvm.org/D116097 --- .../module-transtive-instantiation/Templ.cppm | 8 ++++++++ .../Inputs/module-transtive-instantiation/bar.cppm | 6 ++++++ .../Modules/module-transtive-instantiation-2.cpp | 11 +++++++++++ .../test/Modules/module-transtive-instantiation.cpp | 13 +++++++++++++ 4 files changed, 38 insertions(+) create mode 100644 clang/test/Modules/Inputs/module-transtive-instantiation/Templ.cppm create mode 100644 clang/test/Modules/Inputs/module-transtive-instantiation/bar.cppm create mode 100644 clang/test/Modules/module-transtive-instantiation-2.cpp create mode 100644 clang/test/Modules/module-transtive-instantiation.cpp diff --git a/clang/test/Modules/Inputs/module-transtive-instantiation/Templ.cppm b/clang/test/Modules/Inputs/module-transtive-instantiation/Templ.cppm new file mode 100644 index 000000000000..ee41b5374613 --- /dev/null +++ b/clang/test/Modules/Inputs/module-transtive-instantiation/Templ.cppm @@ -0,0 +1,8 @@ +export module Templ; +export template +class G { +public: + T operator()() { + return T(); + } +}; diff --git a/clang/test/Modules/Inputs/module-transtive-instantiation/bar.cppm b/clang/test/Modules/Inputs/module-transtive-instantiation/bar.cppm new file mode 100644 index 000000000000..63cd595a642d --- /dev/null +++ b/clang/test/Modules/Inputs/module-transtive-instantiation/bar.cppm @@ -0,0 +1,6 @@ +export module bar; +import Templ; +export template +int bar() { + return G()(); +} diff --git a/clang/test/Modules/module-transtive-instantiation-2.cpp b/clang/test/Modules/module-transtive-instantiation-2.cpp new file mode 100644 index 000000000000..b058cd77f436 --- /dev/null +++ b/clang/test/Modules/module-transtive-instantiation-2.cpp @@ -0,0 +1,11 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: %clang_cc1 -std=c++20 %S/Inputs/module-transtive-instantiation/Templ.cppm -emit-module-interface -o %t/Templ.pcm +// RUN: %clang_cc1 -std=c++20 %S/Inputs/module-transtive-instantiation/bar.cppm -emit-module-interface -fprebuilt-module-path=%t -o %t/bar.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %s -fsyntax-only -verify + +import bar; +int foo() { + G g; // expected-error {{declaration of 'G' must be imported from module 'Templ' before it is required}} + return g(); // expected-note@Inputs/module-transtive-instantiation/Templ.cppm:3 {{declaration here is not visible}} +} diff --git a/clang/test/Modules/module-transtive-instantiation.cpp b/clang/test/Modules/module-transtive-instantiation.cpp new file mode 100644 index 000000000000..2c5c7ead8ad5 --- /dev/null +++ b/clang/test/Modules/module-transtive-instantiation.cpp @@ -0,0 +1,13 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: %clang_cc1 -std=c++20 %S/Inputs/module-transtive-instantiation/Templ.cppm -emit-module-interface -o %t/Templ.pcm +// RUN: %clang_cc1 -std=c++20 %S/Inputs/module-transtive-instantiation/bar.cppm -emit-module-interface -fprebuilt-module-path=%t -o %t/bar.pcm +// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %s -fsyntax-only -verify + +import bar; +int foo() { + // FIXME: It shouldn't be an error. Since the `G` is already imported in bar. + return bar(); // expected-error@Inputs/module-transtive-instantiation/bar.cppm:5 {{definition of 'G' must be imported from module 'Templ' before it is required}} + // expected-note@-1 {{in instantiation of function template specialization 'bar' requested here}} + // expected-note@Inputs/module-transtive-instantiation/Templ.cppm:3 {{definition here is not reachable}} +} From 9effb6f81620058b57316401807b23db99e1682d Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Wed, 22 Dec 2021 11:43:56 +0800 Subject: [PATCH 022/293] [NFC] Use %clang_cc instead of %clang in c++20 module tests --- .../CXX/basic/basic.namespace/basic.namespace.general/p2.cppm | 4 ++-- clang/test/Modules/concept.cppm | 4 ++-- clang/test/Modules/odr_using_dependent_name.cppm | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/test/CXX/basic/basic.namespace/basic.namespace.general/p2.cppm b/clang/test/CXX/basic/basic.namespace/basic.namespace.general/p2.cppm index 61f13ce01925..9fa5120f8842 100644 --- a/clang/test/CXX/basic/basic.namespace/basic.namespace.general/p2.cppm +++ b/clang/test/CXX/basic/basic.namespace/basic.namespace.general/p2.cppm @@ -1,8 +1,8 @@ // Check that the compiler wouldn't crash due to inconsistent namesapce linkage // RUN: rm -rf %t // RUN: mkdir -p %t -// RUN: %clang -std=c++20 %S/Inputs/p2.cppm --precompile -o %t/Y.pcm -// RUN: %clang -std=c++20 -fprebuilt-module-path=%t -I%S/Inputs %s -c -Xclang -verify +// RUN: %clang_cc1 -x c++ -std=c++20 %S/Inputs/p2.cppm -emit-module-interface -o %t/Y.pcm +// RUN: %clang_cc1 -x c++ -std=c++20 -fprebuilt-module-path=%t -I%S/Inputs %s -fsyntax-only -verify // expected-no-diagnostics export module X; import Y; diff --git a/clang/test/Modules/concept.cppm b/clang/test/Modules/concept.cppm index e14a158644af..f171e6d08237 100644 --- a/clang/test/Modules/concept.cppm +++ b/clang/test/Modules/concept.cppm @@ -1,7 +1,7 @@ // RUN: rm -rf %t // RUN: mkdir %t -// RUN: %clang -std=c++20 %S/Inputs/concept/A.cppm --precompile -o %t/A.pcm -// RUN: %clang -std=c++20 -fprebuilt-module-path=%t -I%S/Inputs/concept %s -c -Xclang -verify +// RUN: %clang_cc1 -x c++ -std=c++20 %S/Inputs/concept/A.cppm -emit-module-interface -o %t/A.pcm +// RUN: %clang_cc1 -x c++ -std=c++20 -fprebuilt-module-path=%t -I%S/Inputs/concept %s -fsyntax-only -verify // expected-no-diagnostics module; diff --git a/clang/test/Modules/odr_using_dependent_name.cppm b/clang/test/Modules/odr_using_dependent_name.cppm index 8d7ea9f57bed..c2938855fdbe 100644 --- a/clang/test/Modules/odr_using_dependent_name.cppm +++ b/clang/test/Modules/odr_using_dependent_name.cppm @@ -1,7 +1,7 @@ // RUN: rm -rf %t // RUN: mkdir -p %t -// RUN: %clang -std=c++20 %S/Inputs/odr_using_dependent_name/X.cppm --precompile -o %t/X.pcm -// RUN: %clang -std=c++20 -I%S/Inputs/odr_using_dependent_name -fprebuilt-module-path=%t %s --precompile -c +// RUN: %clang_cc1 -std=c++20 %S/Inputs/odr_using_dependent_name/X.cppm -emit-module-interface -o %t/X.pcm +// RUN: %clang_cc1 -std=c++20 -I%S/Inputs/odr_using_dependent_name -fprebuilt-module-path=%t %s -emit-module-interface -fsyntax-only -verify // expected-no-diagnostics module; #include "foo.h" From eb37330ac77774a9fe030aaa3c5759afa3377db5 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 21 Dec 2021 20:19:51 -0800 Subject: [PATCH 023/293] [ELF] Change mipsGotIndex to uint32_t This does not decrease sizeof(InputSection) (important for memory usage) on ELF64 by itself but allows we to add another uint32_t. --- lld/ELF/InputFiles.h | 2 +- lld/ELF/SyntheticSections.cpp | 17 ++++++++--------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index 1075029b8df9..e15f8798ae1c 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -111,7 +111,7 @@ class InputFile { SmallVector symbols; // Index of MIPS GOT built for this file. - llvm::Optional mipsGotIndex; + uint32_t mipsGotIndex = -1; // outSecOff of .got2 in the current file. This is used by PPC32 -fPIC/-fPIE // to compute offsets in PLT call stubs. diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index a901c898b270..e25401baabcb 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -763,18 +763,18 @@ size_t MipsGotSection::FileGot::getIndexedEntriesNum() const { } MipsGotSection::FileGot &MipsGotSection::getGot(InputFile &f) { - if (!f.mipsGotIndex.hasValue()) { + if (f.mipsGotIndex == uint32_t(-1)) { gots.emplace_back(); gots.back().file = &f; f.mipsGotIndex = gots.size() - 1; } - return gots[*f.mipsGotIndex]; + return gots[f.mipsGotIndex]; } uint64_t MipsGotSection::getPageEntryOffset(const InputFile *f, const Symbol &sym, int64_t addend) const { - const FileGot &g = gots[*f->mipsGotIndex]; + const FileGot &g = gots[f->mipsGotIndex]; uint64_t index = 0; if (const OutputSection *outSec = sym.getOutputSection()) { uint64_t secAddr = getMipsPageAddr(outSec->addr); @@ -788,7 +788,7 @@ uint64_t MipsGotSection::getPageEntryOffset(const InputFile *f, uint64_t MipsGotSection::getSymEntryOffset(const InputFile *f, const Symbol &s, int64_t addend) const { - const FileGot &g = gots[*f->mipsGotIndex]; + const FileGot &g = gots[f->mipsGotIndex]; Symbol *sym = const_cast(&s); if (sym->isTls()) return g.tls.lookup(sym) * config->wordsize; @@ -798,13 +798,13 @@ uint64_t MipsGotSection::getSymEntryOffset(const InputFile *f, const Symbol &s, } uint64_t MipsGotSection::getTlsIndexOffset(const InputFile *f) const { - const FileGot &g = gots[*f->mipsGotIndex]; + const FileGot &g = gots[f->mipsGotIndex]; return g.dynTlsSymbols.lookup(nullptr) * config->wordsize; } uint64_t MipsGotSection::getGlobalDynOffset(const InputFile *f, const Symbol &s) const { - const FileGot &g = gots[*f->mipsGotIndex]; + const FileGot &g = gots[f->mipsGotIndex]; Symbol *sym = const_cast(&s); return g.dynTlsSymbols.lookup(sym) * config->wordsize; } @@ -1061,10 +1061,9 @@ uint64_t MipsGotSection::getGp(const InputFile *f) const { // For files without related GOT or files refer a primary GOT // returns "common" _gp value. For secondary GOTs calculate // individual _gp values. - if (!f || !f->mipsGotIndex.hasValue() || *f->mipsGotIndex == 0) + if (!f || f->mipsGotIndex == uint32_t(-1) || f->mipsGotIndex == 0) return ElfSym::mipsGp->getVA(0); - return getVA() + gots[*f->mipsGotIndex].startIndex * config->wordsize + - 0x7ff0; + return getVA() + gots[f->mipsGotIndex].startIndex * config->wordsize + 0x7ff0; } void MipsGotSection::writeTo(uint8_t *buf) { From 9db0e21660b5cdaebd98aea79cccfe40f1a1ee98 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 21 Dec 2021 22:28:48 -0800 Subject: [PATCH 024/293] [llvm] Use depth_first (NFC) --- llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp | 8 ++++---- llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp | 2 +- llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp | 6 +++--- llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp b/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp index 209f9f7255a5..793663ef97d7 100644 --- a/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp +++ b/llvm/lib/Target/AArch64/AArch64FalkorHWPFFix.cpp @@ -138,8 +138,8 @@ bool FalkorMarkStridedAccesses::run() { bool MadeChange = false; for (Loop *L : LI) - for (auto LIt = df_begin(L), LE = df_end(L); LIt != LE; ++LIt) - MadeChange |= runOnLoop(**LIt); + for (Loop *LIt : depth_first(L)) + MadeChange |= runOnLoop(*LIt); return MadeChange; } @@ -828,10 +828,10 @@ bool FalkorHWPFFix::runOnMachineFunction(MachineFunction &Fn) { Modified = false; for (MachineLoop *I : LI) - for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L) + for (MachineLoop *L : depth_first(I)) // Only process inner-loops if (L->isInnermost()) - runOnLoop(**L, Fn); + runOnLoop(*L, Fn); return Modified; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp index 2a2338ac7016..2475b44b42a3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp @@ -428,7 +428,7 @@ class CollectReachableCallees { // // FIXME: Need to handle bit-casted function pointers. // - SmallVector CGNStack(df_begin(KCGN), df_end(KCGN)); + SmallVector CGNStack(depth_first(KCGN)); SmallPtrSet VisitedCGNodes; while (!CGNStack.empty()) { auto *CGN = CGNStack.pop_back_val(); diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp index 7f63827afbd6..0c7be96a0595 100644 --- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp +++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp @@ -413,9 +413,9 @@ bool PPCLoopInstrFormPrep::runOnFunction(Function &F) { bool MadeChange = false; - for (auto I = LI->begin(), IE = LI->end(); I != IE; ++I) - for (auto L = df_begin(*I), LE = df_end(*I); L != LE; ++L) - MadeChange |= runOnLoop(*L); + for (Loop *I : *LI) + for (Loop *L : depth_first(I)) + MadeChange |= runOnLoop(L); return MadeChange; } diff --git a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp index 77d76609c926..57e36e5b9b90 100644 --- a/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp @@ -224,8 +224,8 @@ bool LoopDataPrefetch::run() { bool MadeChange = false; for (Loop *I : *LI) - for (auto L = df_begin(I), LE = df_end(I); L != LE; ++L) - MadeChange |= runOnLoop(*L); + for (Loop *L : depth_first(I)) + MadeChange |= runOnLoop(L); return MadeChange; } From 77b923d0dbe30f99b27ad533ef4322ee176b2046 Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Thu, 16 Dec 2021 18:05:41 +0700 Subject: [PATCH 025/293] [ConstantFolding] Do not remove side effect from constrained functions According to the discussion in https://reviews.llvm.org/D110322 the code that removes side effect from replaced function call is deleted. Differential Revision: https://reviews.llvm.org/D115870 --- llvm/lib/Analysis/ConstantFolding.cpp | 9 +-------- .../Transforms/InstSimplify/constfold-constrained.ll | 2 ++ llvm/test/Transforms/InstSimplify/fdiv-strictfp.ll | 4 +++- llvm/test/Transforms/InstSimplify/strictfp-fadd.ll | 1 + 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index e4920c351cca..922b38e92785 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1778,15 +1778,8 @@ static bool mayFoldConstrained(ConstrainedFPIntrinsic *CI, // If the operation does not change exception status flags, it is safe // to fold. - if (St == APFloat::opStatus::opOK) { - // When FP exceptions are not ignored, intrinsic call will not be - // eliminated, because it is considered as having side effect. But we - // know that its evaluation does not raise exceptions, so side effect - // is absent. To allow removing the call, mark it as not accessing memory. - if (EB && *EB != fp::ExceptionBehavior::ebIgnore) - CI->addFnAttr(Attribute::ReadNone); + if (St == APFloat::opStatus::opOK) return true; - } // If evaluation raised FP exception, the result can depend on rounding // mode. If the latter is unknown, folding is not possible. diff --git a/llvm/test/Transforms/InstSimplify/constfold-constrained.ll b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll index 4db5fbff30af..2bfd4e03b1e6 100644 --- a/llvm/test/Transforms/InstSimplify/constfold-constrained.ll +++ b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll @@ -282,6 +282,7 @@ entry: define double @fadd_05() #0 { ; CHECK-LABEL: @fadd_05( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double 1.000000e+00, double 2.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] ; CHECK-NEXT: ret double 3.000000e+00 ; entry: @@ -293,6 +294,7 @@ entry: define double @fadd_06() #0 { ; CHECK-LABEL: @fadd_06( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double 1.000000e+00, double 2.000000e+00, metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR0]] ; CHECK-NEXT: ret double 3.000000e+00 ; entry: diff --git a/llvm/test/Transforms/InstSimplify/fdiv-strictfp.ll b/llvm/test/Transforms/InstSimplify/fdiv-strictfp.ll index 82101a4ef828..39222b1916af 100644 --- a/llvm/test/Transforms/InstSimplify/fdiv-strictfp.ll +++ b/llvm/test/Transforms/InstSimplify/fdiv-strictfp.ll @@ -12,6 +12,7 @@ define float @fdiv_constant_fold() #0 { define float @fdiv_constant_fold_strict() #0 { ; CHECK-LABEL: @fdiv_constant_fold_strict( +; CHECK-NEXT: [[F:%.*]] = call float @llvm.experimental.constrained.fdiv.f32(float 3.000000e+00, float 2.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0:[0-9]+]] ; CHECK-NEXT: ret float 1.500000e+00 ; %f = call float @llvm.experimental.constrained.fdiv.f32(float 3.0, float 2.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 @@ -21,7 +22,7 @@ define float @fdiv_constant_fold_strict() #0 { define float @fdiv_constant_fold_strict2() #0 { ; CHECK-LABEL: @fdiv_constant_fold_strict2( -; CHECK-NEXT: [[F:%.*]] = call float @llvm.experimental.constrained.fdiv.f32(float 2.000000e+00, float 3.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0:[0-9]+]] +; CHECK-NEXT: [[F:%.*]] = call float @llvm.experimental.constrained.fdiv.f32(float 2.000000e+00, float 3.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] ; CHECK-NEXT: ret float [[F]] ; %f = call float @llvm.experimental.constrained.fdiv.f32(float 2.0, float 3.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 @@ -39,6 +40,7 @@ define float @frem_constant_fold() #0 { define float @frem_constant_fold_strict() #0 { ; CHECK-LABEL: @frem_constant_fold_strict( +; CHECK-NEXT: [[F:%.*]] = call float @llvm.experimental.constrained.frem.f32(float 3.000000e+00, float 2.000000e+00, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] ; CHECK-NEXT: ret float 1.000000e+00 ; %f = call float @llvm.experimental.constrained.frem.f32(float 3.0, float 2.0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 diff --git a/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll b/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll index 61b2cafc63ad..61d06e964ac5 100644 --- a/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll +++ b/llvm/test/Transforms/InstSimplify/strictfp-fadd.ll @@ -363,6 +363,7 @@ define float @fold_fadd_qnan_qnan_ebmaytrap() #0 { define float @fold_fadd_qnan_qnan_ebstrict() #0 { ; CHECK-LABEL: @fold_fadd_qnan_qnan_ebstrict( +; CHECK-NEXT: [[ADD:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float 0x7FF8000000000000, float 0x7FF8000000000000, metadata !"round.tonearest", metadata !"fpexcept.strict") #[[ATTR0]] ; CHECK-NEXT: ret float 0x7FF8000000000000 ; %add = call float @llvm.experimental.constrained.fadd.f32(float 0x7ff8000000000000, float 0x7ff8000000000000, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 From 682532ca575b8b6c4d3e2104a7526743ff9446a9 Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Wed, 22 Dec 2021 00:02:27 -0800 Subject: [PATCH 026/293] Support v2 of 'main bin spec' Mach-O LC_NOTE in corefiles Version 2 of 'main bin spec' LC_NOTE allows for the specification of a slide of where the binary is loaded in the corefile virtual address space. It also adds a (currently unused) platform field for the main binary. Some corefile creators will only have a UUID and an offset to be applied to the binary. Changed TestFirmwareCorefiles.py to test this new form of 'main bin spec' with a slide, and also to run on both x86_64 and arm64 macOS systems. Differential Revision: https://reviews.llvm.org/D116094 rdar://85938455 --- lldb/include/lldb/Symbol/ObjectFile.h | 20 +- .../ObjectFile/Mach-O/ObjectFileMachO.cpp | 64 +++- .../ObjectFile/Mach-O/ObjectFileMachO.h | 2 +- .../Process/mach-core/ProcessMachCore.cpp | 68 ++-- .../TestFirmwareCorefiles.py | 329 ++++++++++-------- .../create-empty-corefile.cpp | 115 +++--- 6 files changed, 366 insertions(+), 232 deletions(-) diff --git a/lldb/include/lldb/Symbol/ObjectFile.h b/lldb/include/lldb/Symbol/ObjectFile.h index bfc5d382dbf9..8bf047cd4514 100644 --- a/lldb/include/lldb/Symbol/ObjectFile.h +++ b/lldb/include/lldb/Symbol/ObjectFile.h @@ -527,11 +527,15 @@ class ObjectFile : public std::enable_shared_from_this, /// binary is exactly which removes ambiguity when there are multiple /// binaries present in the captured memory pages. /// - /// \param[out] address - /// If the address of the binary is specified, this will be set. - /// This is an address is the virtual address space of the core file - /// memory segments; it is not an offset into the object file. - /// If no address is available, will be set to LLDB_INVALID_ADDRESS. + /// \param[out] value + /// The address or offset (slide) where the binary is loaded in memory. + /// LLDB_INVALID_ADDRESS for unspecified. If an offset is given, + /// this offset should be added to the binary's file address to get + /// the load address. + /// + /// \param[out] value_is_offset + /// Specifies if \b value is a load address, or an offset to calculate + /// the load address. /// /// \param[out] uuid /// If the uuid of the binary is specified, this will be set. @@ -543,9 +547,11 @@ class ObjectFile : public std::enable_shared_from_this, /// /// \return /// Returns true if either address or uuid has been set. - virtual bool GetCorefileMainBinaryInfo(lldb::addr_t &address, UUID &uuid, + virtual bool GetCorefileMainBinaryInfo(lldb::addr_t &value, + bool &value_is_offset, UUID &uuid, ObjectFile::BinaryType &type) { - address = LLDB_INVALID_ADDRESS; + value = LLDB_INVALID_ADDRESS; + value_is_offset = false; uuid.Clear(); return false; } diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index e0087dbd4941..7668b68650b4 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -5620,10 +5620,15 @@ addr_t ObjectFileMachO::GetAddressMask() { return mask; } -bool ObjectFileMachO::GetCorefileMainBinaryInfo(addr_t &address, UUID &uuid, +bool ObjectFileMachO::GetCorefileMainBinaryInfo(addr_t &value, + bool &value_is_offset, + UUID &uuid, ObjectFile::BinaryType &type) { - address = LLDB_INVALID_ADDRESS; + value = LLDB_INVALID_ADDRESS; + value_is_offset = false; uuid.Clear(); + uint32_t log2_pagesize = 0; // not currently passed up to caller + uint32_t platform = 0; // not currently passed up to caller ModuleSP module_sp(GetModule()); if (module_sp) { std::lock_guard guard(module_sp->GetMutex()); @@ -5641,6 +5646,26 @@ bool ObjectFileMachO::GetCorefileMainBinaryInfo(addr_t &address, UUID &uuid, uint64_t fileoff = m_data.GetU64_unchecked(&offset); uint64_t size = m_data.GetU64_unchecked(&offset); + // struct main_bin_spec + // { + // uint32_t version; // currently 2 + // uint32_t type; // 0 == unspecified, 1 == kernel, + // // 2 == user process, + // // 3 == standalone binary + // uint64_t address; // UINT64_MAX if address not specified + // uint64_t slide; // slide, UINT64_MAX if unspecified + // // 0 if no slide needs to be applied to + // // file address + // uuid_t uuid; // all zero's if uuid not specified + // uint32_t log2_pagesize; // process page size in log base 2, + // // e.g. 4k pages are 12. + // // 0 for unspecified + // uint32_t platform; // The Mach-O platform for this corefile. + // // 0 for unspecified. + // // The values are defined in + // // , PLATFORM_*. + // } __attribute((packed)); + // "main bin spec" (main binary specification) data payload is // formatted: // uint32_t version [currently 1] @@ -5656,14 +5681,25 @@ bool ObjectFileMachO::GetCorefileMainBinaryInfo(addr_t &address, UUID &uuid, if (strcmp("main bin spec", data_owner) == 0 && size >= 32) { offset = fileoff; uint32_t version; - if (m_data.GetU32(&offset, &version, 1) != nullptr && version == 1) { + if (m_data.GetU32(&offset, &version, 1) != nullptr && version <= 2) { uint32_t binspec_type = 0; uuid_t raw_uuid; memset(raw_uuid, 0, sizeof(uuid_t)); - if (m_data.GetU32(&offset, &binspec_type, 1) && - m_data.GetU64(&offset, &address, 1) && - m_data.CopyData(offset, sizeof(uuid_t), raw_uuid) != 0) { + if (!m_data.GetU32(&offset, &binspec_type, 1)) + return false; + if (!m_data.GetU64(&offset, &value, 1)) + return false; + uint64_t slide = LLDB_INVALID_ADDRESS; + if (version > 1 && !m_data.GetU64(&offset, &slide, 1)) + return false; + if (value == LLDB_INVALID_ADDRESS && + slide != LLDB_INVALID_ADDRESS) { + value = slide; + value_is_offset = true; + } + + if (m_data.CopyData(offset, sizeof(uuid_t), raw_uuid) != 0) { uuid = UUID::fromOptionalData(raw_uuid, sizeof(uuid_t)); // convert the "main bin spec" type into our // ObjectFile::BinaryType enum @@ -5681,6 +5717,10 @@ bool ObjectFileMachO::GetCorefileMainBinaryInfo(addr_t &address, UUID &uuid, type = eBinaryTypeStandalone; break; } + if (!m_data.GetU32(&offset, &log2_pagesize, 1)) + return false; + if (version > 1 && !m_data.GetU32(&offset, &platform, 1)) + return false; return true; } } @@ -7006,7 +7046,17 @@ bool ObjectFileMachO::LoadCoreFileImages(lldb_private::Process &process) { for (const MachOCorefileImageEntry &image : image_infos.all_image_infos) { ModuleSpec module_spec; module_spec.GetUUID() = image.uuid; - module_spec.GetFileSpec() = FileSpec(image.filename.c_str()); + if (image.filename.empty()) { + char namebuf[80]; + if (image.load_address != LLDB_INVALID_ADDRESS) + snprintf(namebuf, sizeof(namebuf), "mem-image-0x%" PRIx64, + image.load_address); + else + snprintf(namebuf, sizeof(namebuf), "mem-image+0x%" PRIx64, image.slide); + module_spec.GetFileSpec() = FileSpec(namebuf); + } else { + module_spec.GetFileSpec() = FileSpec(image.filename.c_str()); + } if (image.currently_executing) { Symbols::DownloadObjectAndSymbolFile(module_spec, true); if (FileSystem::Instance().Exists(module_spec.GetFileSpec())) { diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h index 084b0ac110ba..c666ef32b85e 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h @@ -120,7 +120,7 @@ class ObjectFileMachO : public lldb_private::ObjectFile { lldb::addr_t GetAddressMask() override; - bool GetCorefileMainBinaryInfo(lldb::addr_t &address, + bool GetCorefileMainBinaryInfo(lldb::addr_t &value, bool &value_is_offset, lldb_private::UUID &uuid, ObjectFile::BinaryType &type) override; diff --git a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp index 6aed04565eb0..4b3e244dcb23 100644 --- a/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp +++ b/lldb/source/Plugins/Process/mach-core/ProcessMachCore.cpp @@ -184,7 +184,8 @@ bool ProcessMachCore::GetDynamicLoaderAddress(lldb::addr_t addr) { // Try to load a file with that UUID into lldb, and if we have a load // address, set it correctly. Else assume that the binary was loaded // with no slide. -static bool load_standalone_binary(UUID uuid, addr_t addr, Target &target) { +static bool load_standalone_binary(UUID uuid, addr_t value, + bool value_is_offset, Target &target) { if (uuid.IsValid()) { ModuleSpec module_spec; module_spec.GetUUID() = uuid; @@ -205,18 +206,34 @@ static bool load_standalone_binary(UUID uuid, addr_t addr, Target &target) { } } - if (module_sp.get() && module_sp->GetObjectFile()) { + // If we couldn't find the binary anywhere else, as a last resort, + // read it out of memory in the corefile. + if (!module_sp.get() && value != LLDB_INVALID_ADDRESS && !value_is_offset) { + char namebuf[80]; + snprintf(namebuf, sizeof(namebuf), "mem-image-0x%" PRIx64, value); + module_sp = + target.GetProcessSP()->ReadModuleFromMemory(FileSpec(namebuf), value); + } + + if (module_sp.get()) { target.SetArchitecture(module_sp->GetObjectFile()->GetArchitecture()); target.GetImages().AppendIfNeeded(module_sp, false); - Address base_addr = module_sp->GetObjectFile()->GetBaseAddress(); - addr_t slide = 0; - if (addr != LLDB_INVALID_ADDRESS && base_addr.IsValid()) { - addr_t file_load_addr = base_addr.GetFileAddress(); - slide = addr - file_load_addr; - } bool changed = false; - module_sp->SetLoadAddress(target, slide, true, changed); + if (module_sp->GetObjectFile()) { + if (value != LLDB_INVALID_ADDRESS) { + module_sp->SetLoadAddress(target, value, value_is_offset, changed); + } else { + // No address/offset/slide, load the binary at file address, + // offset 0. + const bool value_is_slide = true; + module_sp->SetLoadAddress(target, 0, value_is_slide, changed); + } + } else { + // In-memory image, load at its true address, offset 0. + const bool value_is_slide = true; + module_sp->SetLoadAddress(target, 0, value_is_slide, changed); + } ModuleList added_module; added_module.Append(module_sp, false); @@ -316,33 +333,38 @@ Status ProcessMachCore::DoLoadCore() { bool found_main_binary_definitively = false; - addr_t objfile_binary_addr; + addr_t objfile_binary_value; + bool objfile_binary_value_is_offset; UUID objfile_binary_uuid; ObjectFile::BinaryType type; - if (core_objfile->GetCorefileMainBinaryInfo(objfile_binary_addr, + if (core_objfile->GetCorefileMainBinaryInfo(objfile_binary_value, + objfile_binary_value_is_offset, objfile_binary_uuid, type)) { if (log) { log->Printf( "ProcessMachCore::DoLoadCore: using binary hint from 'main bin spec' " - "LC_NOTE with UUID %s address 0x%" PRIx64 " and type %d", - objfile_binary_uuid.GetAsString().c_str(), objfile_binary_addr, type); + "LC_NOTE with UUID %s value 0x%" PRIx64 + " value is offset %d and type %d", + objfile_binary_uuid.GetAsString().c_str(), objfile_binary_value, + objfile_binary_value_is_offset, type); } - if (objfile_binary_addr != LLDB_INVALID_ADDRESS) { + if (objfile_binary_value != LLDB_INVALID_ADDRESS && + !objfile_binary_value_is_offset) { if (type == ObjectFile::eBinaryTypeUser) { - m_dyld_addr = objfile_binary_addr; + m_dyld_addr = objfile_binary_value; m_dyld_plugin_name = DynamicLoaderMacOSXDYLD::GetPluginNameStatic(); found_main_binary_definitively = true; } if (type == ObjectFile::eBinaryTypeKernel) { - m_mach_kernel_addr = objfile_binary_addr; + m_mach_kernel_addr = objfile_binary_value; m_dyld_plugin_name = DynamicLoaderDarwinKernel::GetPluginNameStatic(); found_main_binary_definitively = true; } } if (!found_main_binary_definitively) { // ObjectFile::eBinaryTypeStandalone, undeclared types - if (load_standalone_binary(objfile_binary_uuid, objfile_binary_addr, - GetTarget())) { + if (load_standalone_binary(objfile_binary_uuid, objfile_binary_value, + objfile_binary_value_is_offset, GetTarget())) { found_main_binary_definitively = true; m_dyld_plugin_name = DynamicLoaderStatic::GetPluginNameStatic(); } @@ -388,17 +410,21 @@ Status ProcessMachCore::DoLoadCore() { m_mach_kernel_addr = ident_binary_addr; found_main_binary_definitively = true; } else if (ident_uuid.IsValid()) { - if (load_standalone_binary(ident_uuid, ident_binary_addr, GetTarget())) { + // We have no address specified, only a UUID. Load it at the file + // address. + const bool value_is_offset = false; + if (load_standalone_binary(ident_uuid, ident_binary_addr, value_is_offset, + GetTarget())) { found_main_binary_definitively = true; m_dyld_plugin_name = DynamicLoaderStatic::GetPluginNameStatic(); } } } + bool did_load_extra_binaries = core_objfile->LoadCoreFileImages(*this); // If we have a "all image infos" LC_NOTE, try to load all of the // binaries listed, and set their Section load addresses in the Target. - if (found_main_binary_definitively == false && - core_objfile->LoadCoreFileImages(*this)) { + if (found_main_binary_definitively == false && did_load_extra_binaries) { m_dyld_plugin_name = DynamicLoaderDarwinKernel::GetPluginNameStatic(); found_main_binary_definitively = true; } diff --git a/lldb/test/API/macosx/lc-note/firmware-corefile/TestFirmwareCorefiles.py b/lldb/test/API/macosx/lc-note/firmware-corefile/TestFirmwareCorefiles.py index a28b01bb1789..6600c6344c7d 100644 --- a/lldb/test/API/macosx/lc-note/firmware-corefile/TestFirmwareCorefiles.py +++ b/lldb/test/API/macosx/lc-note/firmware-corefile/TestFirmwareCorefiles.py @@ -16,26 +16,174 @@ class TestFirmwareCorefiles(TestBase): mydir = TestBase.compute_mydir(__file__) - def initial_setup(self): + @skipIf(debug_info=no_match(["dsym"]), bugnumber="This test is looking explicitly for a dSYM") + @skipIf(archs=no_match(['x86_64', 'arm64', 'arm64e', 'aarch64'])) + @skipIfRemote + @skipUnlessDarwin + def test_lc_note_version_string(self): + self.build() + aout_exe_basename = "a.out" + aout_exe = self.getBuildArtifact(aout_exe_basename) + verstr_corefile = self.getBuildArtifact("verstr.core") + verstr_corefile_addr = self.getBuildArtifact("verstr-addr.core") + create_corefile = self.getBuildArtifact("create-empty-corefile") + slide = 0x70000000000 + call(create_corefile + " version-string " + verstr_corefile + " " + aout_exe + " 0xffffffffffffffff 0xffffffffffffffff", shell=True) + call(create_corefile + " version-string " + verstr_corefile_addr + " " + aout_exe + (" 0x%x" % slide) + " 0xffffffffffffffff", shell=True) + + if self.TraceOn(): + self.runCmd("log enable lldb dyld host") + self.addTearDownHook(lambda: self.runCmd("log disable lldb dyld host")) + + # Register the a.out binary with this UUID in lldb's global module + # cache, then throw the Target away. + target = self.dbg.CreateTarget(aout_exe) + self.dbg.DeleteTarget(target) + + # First, try the "kern ver str" corefile + target = self.dbg.CreateTarget('') + err = lldb.SBError() + if self.TraceOn(): + self.runCmd("script print('loading corefile %s')" % verstr_corefile) + process = target.LoadCore(verstr_corefile) + self.assertEqual(process.IsValid(), True) + if self.TraceOn(): + self.runCmd("image list") + self.runCmd("target mod dump sections") + self.assertEqual(target.GetNumModules(), 1) + fspec = target.GetModuleAtIndex(0).GetFileSpec() + self.assertEqual(fspec.GetFilename(), aout_exe_basename) + self.dbg.DeleteTarget(target) + + # Second, try the "kern ver str" corefile where it loads at an address + target = self.dbg.CreateTarget('') + err = lldb.SBError() + if self.TraceOn(): + self.runCmd("script print('loading corefile %s')" % verstr_corefile_addr) + process = target.LoadCore(verstr_corefile_addr) + self.assertEqual(process.IsValid(), True) + if self.TraceOn(): + self.runCmd("image list") + self.runCmd("target mod dump sections") + self.assertEqual(target.GetNumModules(), 1) + fspec = target.GetModuleAtIndex(0).GetFileSpec() + self.assertEqual(fspec.GetFilename(), aout_exe_basename) + main_sym = target.GetModuleAtIndex(0).FindSymbol("main", lldb.eSymbolTypeAny) + main_addr = main_sym.GetStartAddress() + self.assertGreater(main_addr.GetLoadAddress(target), slide) + self.assertNotEqual(main_addr.GetLoadAddress(target), lldb.LLDB_INVALID_ADDRESS) + self.dbg.DeleteTarget(target) + + @skipIf(debug_info=no_match(["dsym"]), bugnumber="This test is looking explicitly for a dSYM") + @skipIf(archs=no_match(['x86_64', 'arm64', 'arm64e', 'aarch64'])) + @skipIfRemote + @skipUnlessDarwin + def test_lc_note_main_bin_spec(self): + self.build() + aout_exe_basename = "a.out" + aout_exe = self.getBuildArtifact(aout_exe_basename) + create_corefile = self.getBuildArtifact("create-empty-corefile") + binspec_corefile = self.getBuildArtifact("binspec.core") + binspec_corefile_addr = self.getBuildArtifact("binspec-addr.core") + binspec_corefile_slideonly = self.getBuildArtifact("binspec-addr-slideonly.core") + + slide = 0x70000000000 + + ### Create our corefile + # 0xffffffffffffffff means load address unknown + call(create_corefile + " main-bin-spec " + binspec_corefile + " " + aout_exe + " 0xffffffffffffffff 0xffffffffffffffff", shell=True) + call(create_corefile + " main-bin-spec " + binspec_corefile_addr + " " + aout_exe + (" 0x%x" % slide) + " 0xffffffffffffffff", shell=True) + call(create_corefile + " main-bin-spec " + binspec_corefile_slideonly + " " + aout_exe + " 0xffffffffffffffff" + (" 0x%x" % slide), shell=True) + + if self.TraceOn(): + self.runCmd("log enable lldb dyld host") + self.addTearDownHook(lambda: self.runCmd("log disable lldb dyld host")) + + # Register the a.out binary with this UUID in lldb's global module + # cache, then throw the Target away. + target = self.dbg.CreateTarget(aout_exe) + self.dbg.DeleteTarget(target) + + # First, try the "main bin spec" corefile + target = self.dbg.CreateTarget('') + if self.TraceOn(): + self.runCmd("script print('loading corefile %s')" % binspec_corefile) + process = target.LoadCore(binspec_corefile) + self.assertEqual(process.IsValid(), True) + if self.TraceOn(): + self.runCmd("image list") + self.runCmd("target mod dump sections") + self.assertEqual(target.GetNumModules(), 1) + fspec = target.GetModuleAtIndex(0).GetFileSpec() + self.assertEqual(fspec.GetFilename(), aout_exe_basename) + self.dbg.DeleteTarget(target) + + # Second, try the "main bin spec" corefile where it loads at an address + target = self.dbg.CreateTarget('') + if self.TraceOn(): + self.runCmd("script print('loading corefile %s')" % binspec_corefile_addr) + process = target.LoadCore(binspec_corefile_addr) + self.assertEqual(process.IsValid(), True) + if self.TraceOn(): + self.runCmd("image list") + self.runCmd("target mod dump sections") + self.assertEqual(target.GetNumModules(), 1) + fspec = target.GetModuleAtIndex(0).GetFileSpec() + self.assertEqual(fspec.GetFilename(), aout_exe_basename) + main_sym = target.GetModuleAtIndex(0).FindSymbol("main", lldb.eSymbolTypeAny) + main_addr = main_sym.GetStartAddress() + self.assertGreater(main_addr.GetLoadAddress(target), slide) + self.assertNotEqual(main_addr.GetLoadAddress(target), lldb.LLDB_INVALID_ADDRESS) + self.dbg.DeleteTarget(target) + + # Third, try the "main bin spec" corefile where it loads at a slide + target = self.dbg.CreateTarget('') + if self.TraceOn(): + self.runCmd("script print('loading corefile %s')" % binspec_corefile_slideonly) + process = target.LoadCore(binspec_corefile_slideonly) + self.assertEqual(process.IsValid(), True) + if self.TraceOn(): + self.runCmd("image list") + self.runCmd("target mod dump sections") + self.assertEqual(target.GetNumModules(), 1) + fspec = target.GetModuleAtIndex(0).GetFileSpec() + self.assertEqual(fspec.GetFilename(), aout_exe_basename) + main_sym = target.GetModuleAtIndex(0).FindSymbol("main", lldb.eSymbolTypeAny) + main_addr = main_sym.GetStartAddress() + self.assertGreater(main_addr.GetLoadAddress(target), slide) + self.assertNotEqual(main_addr.GetLoadAddress(target), lldb.LLDB_INVALID_ADDRESS) + self.dbg.DeleteTarget(target) + + @skipIf(debug_info=no_match(["dsym"]), bugnumber="This test is looking explicitly for a dSYM") + @skipIf(archs=no_match(['x86_64', 'arm64', 'arm64e', 'aarch64'])) + @skipIfRemote + @skipUnlessDarwin + def test_lc_note_main_bin_spec_os_plugin(self): + self.build() - self.aout_exe = self.getBuildArtifact("a.out") - self.aout_exe_basename = "a.out" - self.create_corefile = self.getBuildArtifact("create-empty-corefile") - self.dsym_for_uuid = self.getBuildArtifact("dsym-for-uuid.sh") - self.verstr_corefile = self.getBuildArtifact("verstr.core") - self.verstr_corefile_addr = self.getBuildArtifact("verstr-addr.core") - self.binspec_corefile = self.getBuildArtifact("binspec.core") - self.binspec_corefile_addr = self.getBuildArtifact("binspec-addr.core") + aout_exe = self.getBuildArtifact("a.out") + aout_exe_basename = "a.out" + create_corefile = self.getBuildArtifact("create-empty-corefile") + binspec_corefile_addr = self.getBuildArtifact("binspec-addr.core") + + slide = 0x70000000000 + + ### Create our corefile + # 0xffffffffffffffff means load address unknown + call(create_corefile + " main-bin-spec " + binspec_corefile_addr + " " + aout_exe + (" 0x%x" % slide) + " 0xffffffffffffffff", shell=True) ## We can hook in our dsym-for-uuid shell script to lldb with this env ## var instead of requiring a defaults write. - os.environ['LLDB_APPLE_DSYMFORUUID_EXECUTABLE'] = self.dsym_for_uuid + dsym_for_uuid = self.getBuildArtifact("dsym-for-uuid.sh") + os.environ['LLDB_APPLE_DSYMFORUUID_EXECUTABLE'] = dsym_for_uuid + if self.TraceOn(): + print("Setting env var LLDB_APPLE_DSYMFORUUID_EXECUTABLE=" + dsym_for_uuid) self.addTearDownHook(lambda: os.environ.pop('LLDB_APPLE_DSYMFORUUID_EXECUTABLE', None)) self.runCmd("settings set target.load-script-from-symbol-file true") self.addTearDownHook(lambda: self.runCmd("settings set target.load-script-from-symbol-file false")) - dsym_python_dir = '%s.dSYM/Contents/Resources/Python' % (self.aout_exe) + dsym_python_dir = os.path.join('%s.dSYM' % aout_exe, 'Contents', 'Resources', 'Python') os.makedirs(dsym_python_dir) python_os_plugin_path = os.path.join(self.getSourceDir(), 'operating_system.py') @@ -43,14 +191,14 @@ def initial_setup(self): 'def __lldb_init_module(debugger, internal_dict):', ' debugger.HandleCommand(\'settings set target.process.python-os-plugin-path %s\')' % python_os_plugin_path, ] - with open(dsym_python_dir + "/a_out.py", "w") as writer: + with open(os.path.join(dsym_python_dir, "a_out.py"), "w") as writer: for l in python_init: writer.write(l + '\n') dwarfdump_uuid_regex = re.compile( 'UUID: ([-0-9a-fA-F]+) \(([^\(]+)\) .*') dwarfdump_cmd_output = subprocess.check_output( - ('/usr/bin/dwarfdump --uuid "%s"' % self.aout_exe), shell=True).decode("utf-8") + ('/usr/bin/dwarfdump --uuid "%s"' % aout_exe), shell=True).decode("utf-8") aout_uuid = None for line in dwarfdump_cmd_output.splitlines(): match = dwarfdump_uuid_regex.search(line) @@ -58,7 +206,7 @@ def initial_setup(self): aout_uuid = match.group(1) self.assertNotEqual(aout_uuid, None, "Could not get uuid of built a.out") - ### Create our dsym-for-uuid shell script which returns self.aout_exe + ### Create our dsym-for-uuid shell script which returns aout_exe shell_cmds = [ '#! /bin/sh', '# the last argument is the uuid', @@ -78,8 +226,8 @@ def initial_setup(self): ' exit 1', 'fi', ' uuid=%s' % aout_uuid, - ' bin=%s' % self.aout_exe, - ' dsym=%s.dSYM/Contents/Resources/DWARF/%s' % (self.aout_exe, os.path.basename(self.aout_exe)), + ' bin=%s' % aout_exe, + ' dsym=%s.dSYM/Contents/Resources/DWARF/%s' % (aout_exe, os.path.basename(aout_exe)), 'echo "$uuid"', '', 'echo "DBGDSYMPath$dsym"', @@ -88,30 +236,11 @@ def initial_setup(self): 'exit $ret' ] - with open(self.dsym_for_uuid, "w") as writer: + with open(dsym_for_uuid, "w") as writer: for l in shell_cmds: writer.write(l + '\n') - os.chmod(self.dsym_for_uuid, 0o755) - - self.slide = 0x70000000000 - - ### Create our corefile - # 0xffffffffffffffff means load address unknown - retcode = call(self.create_corefile + " version-string " + self.verstr_corefile + " " + self.aout_exe + " 0xffffffffffffffff", shell=True) - retcode = call(self.create_corefile + " version-string " + self.verstr_corefile_addr + " " + self.aout_exe + (" 0x%x" % self.slide), shell=True) - retcode = call(self.create_corefile + " main-bin-spec " + self.binspec_corefile + " " + self.aout_exe + " 0xffffffffffffffff", shell=True) - retcode = call(self.create_corefile + " main-bin-spec " + self.binspec_corefile_addr + " " + self.aout_exe + (" 0x%x" % self.slide), shell=True) - - @skipIf(debug_info=no_match(["dsym"]), bugnumber="This test is looking explicitly for a dSYM") - @skipIf(archs=no_match(['x86_64'])) - @skipUnlessDarwin - def test_lc_note_version_string(self): - self.initial_setup() - - if self.TraceOn(): - self.runCmd("log enable lldb dyld host") - self.addTearDownHook(lambda: self.runCmd("log disable lldb dyld host")) + os.chmod(dsym_for_uuid, 0o755) ### Now run lldb on the corefile ### which will give us a UUID @@ -119,135 +248,35 @@ def test_lc_note_version_string(self): ### which gives us a binary and dSYM ### which lldb should load! - # First, try the "kern ver str" corefile - self.target = self.dbg.CreateTarget('') - err = lldb.SBError() - if self.TraceOn(): - self.runCmd("script print('loading corefile %s')" % self.verstr_corefile) - self.process = self.target.LoadCore(self.verstr_corefile) - self.assertEqual(self.process.IsValid(), True) - if self.TraceOn(): - self.runCmd("image list") - self.runCmd("target mod dump sections") - self.assertEqual(self.target.GetNumModules(), 1) - fspec = self.target.GetModuleAtIndex(0).GetFileSpec() - self.assertEqual(fspec.GetFilename(), self.aout_exe_basename) - self.process.Kill() - self.process = None - self.target.Clear() - self.target = None - self.dbg.MemoryPressureDetected() - - # Second, try the "kern ver str" corefile where it loads at an address - self.target = self.dbg.CreateTarget('') - err = lldb.SBError() - if self.TraceOn(): - self.runCmd("script print('loading corefile %s')" % self.verstr_corefile_addr) - self.process = self.target.LoadCore(self.verstr_corefile_addr) - self.assertEqual(self.process.IsValid(), True) - if self.TraceOn(): - self.runCmd("image list") - self.runCmd("target mod dump sections") - self.assertEqual(self.target.GetNumModules(), 1) - fspec = self.target.GetModuleAtIndex(0).GetFileSpec() - self.assertEqual(fspec.GetFilename(), self.aout_exe_basename) - main_sym = self.target.GetModuleAtIndex(0).FindSymbol("main", lldb.eSymbolTypeAny) - main_addr = main_sym.GetStartAddress() - self.assertGreater(main_addr.GetLoadAddress(self.target), self.slide) - self.assertNotEqual(main_addr.GetLoadAddress(self.target), lldb.LLDB_INVALID_ADDRESS) - self.process.Kill() - self.process = None - self.target.Clear() - self.target = None - self.dbg.MemoryPressureDetected() - - @skipIf(debug_info=no_match(["dsym"]), bugnumber="This test is looking explicitly for a dSYM") - @skipIf(archs=no_match(['x86_64'])) - @skipUnlessDarwin - def test_lc_note_main_bin_spec(self): - self.initial_setup() - - if self.TraceOn(): - self.runCmd("log enable lldb dyld host") - self.addTearDownHook(lambda: self.runCmd("log disable lldb dyld host")) - - # Third, try the "main bin spec" corefile - self.target = self.dbg.CreateTarget('') - if self.TraceOn(): - self.runCmd("script print('loading corefile %s')" % self.binspec_corefile) - self.process = self.target.LoadCore(self.binspec_corefile) - self.assertEqual(self.process.IsValid(), True) - if self.TraceOn(): - self.runCmd("image list") - self.runCmd("target mod dump sections") - self.assertEqual(self.target.GetNumModules(), 1) - fspec = self.target.GetModuleAtIndex(0).GetFileSpec() - self.assertEqual(fspec.GetFilename(), self.aout_exe_basename) - self.process.Kill() - self.process = None - self.target.Clear() - self.target = None - self.dbg.MemoryPressureDetected() - - # Fourth, try the "main bin spec" corefile where it loads at an address - self.target = self.dbg.CreateTarget('') - if self.TraceOn(): - self.runCmd("script print('loading corefile %s')" % self.binspec_corefile_addr) - self.process = self.target.LoadCore(self.binspec_corefile_addr) - self.assertEqual(self.process.IsValid(), True) - if self.TraceOn(): - self.runCmd("image list") - self.runCmd("target mod dump sections") - self.assertEqual(self.target.GetNumModules(), 1) - fspec = self.target.GetModuleAtIndex(0).GetFileSpec() - self.assertEqual(fspec.GetFilename(), self.aout_exe_basename) - main_sym = self.target.GetModuleAtIndex(0).FindSymbol("main", lldb.eSymbolTypeAny) - main_addr = main_sym.GetStartAddress() - self.assertGreater(main_addr.GetLoadAddress(self.target), self.slide) - self.assertNotEqual(main_addr.GetLoadAddress(self.target), lldb.LLDB_INVALID_ADDRESS) - self.process.Kill() - self.process = None - self.target.Clear() - self.target = None - self.dbg.MemoryPressureDetected() - - @skipIf(debug_info=no_match(["dsym"]), bugnumber="This test is looking explicitly for a dSYM") - @skipIf(archs=no_match(['x86_64'])) - @skipUnlessDarwin - def test_lc_note_main_bin_spec_os_plugin(self): - self.initial_setup() - if self.TraceOn(): self.runCmd("log enable lldb dyld host") self.addTearDownHook(lambda: self.runCmd("log disable lldb dyld host")) # Now load the binary and confirm that we load the OS plugin. - self.target = self.dbg.CreateTarget('') + target = self.dbg.CreateTarget('') if self.TraceOn(): - self.runCmd("script print('loading corefile %s with OS plugin')" % self.binspec_corefile_addr) - self.process = self.target.LoadCore(self.binspec_corefile_addr) - self.assertEqual(self.process.IsValid(), True) + self.runCmd("script print('loading corefile %s with OS plugin')" % binspec_corefile_addr) + + process = target.LoadCore(binspec_corefile_addr) + self.assertEqual(process.IsValid(), True) if self.TraceOn(): self.runCmd("image list") self.runCmd("target mod dump sections") self.runCmd("thread list") - self.assertEqual(self.target.GetNumModules(), 1) - fspec = self.target.GetModuleAtIndex(0).GetFileSpec() - self.assertEqual(fspec.GetFilename(), self.aout_exe_basename) + self.assertEqual(target.GetNumModules(), 1) + fspec = target.GetModuleAtIndex(0).GetFileSpec() + self.assertEqual(fspec.GetFilename(), aout_exe_basename) # Verify our OS plug-in threads showed up - thread = self.process.GetThreadByID(0x111111111) + thread = process.GetThreadByID(0x111111111) self.assertTrue(thread.IsValid(), "Make sure there is a thread 0x111111111 after we load the python OS plug-in") - thread = self.process.GetThreadByID(0x222222222) + thread = process.GetThreadByID(0x222222222) self.assertTrue(thread.IsValid(), "Make sure there is a thread 0x222222222 after we load the python OS plug-in") - thread = self.process.GetThreadByID(0x333333333) + thread = process.GetThreadByID(0x333333333) self.assertTrue(thread.IsValid(), "Make sure there is a thread 0x333333333 after we load the python OS plug-in") - self.process.Kill() - self.process = None - self.target.Clear() - self.target = None - self.dbg.MemoryPressureDetected() + self.runCmd("settings clear target.process.python-os-plugin-path") + self.dbg.DeleteTarget(target) diff --git a/lldb/test/API/macosx/lc-note/firmware-corefile/create-empty-corefile.cpp b/lldb/test/API/macosx/lc-note/firmware-corefile/create-empty-corefile.cpp index 2845fd453150..8bd6aaabecd6 100644 --- a/lldb/test/API/macosx/lc-note/firmware-corefile/create-empty-corefile.cpp +++ b/lldb/test/API/macosx/lc-note/firmware-corefile/create-empty-corefile.cpp @@ -20,9 +20,10 @@ struct main_bin_spec_payload { uint32_t version; uint32_t type; uint64_t address; + uint64_t slide; uuid_t uuid; uint32_t log2_pagesize; - uint32_t unused; + uint32_t platform; }; union uint32_buf { @@ -49,33 +50,36 @@ void add_uint32(std::vector &buf, uint32_t val) { buf.push_back(conv.bytebuf[i]); } -std::vector x86_lc_thread_load_command() { +std::vector lc_thread_load_command(cpu_type_t cputype) { std::vector data; - add_uint32(data, LC_THREAD); // thread_command.cmd - add_uint32(data, 184); // thread_command.cmdsize - add_uint32(data, x86_THREAD_STATE64); // thread_command.flavor - add_uint32(data, x86_THREAD_STATE64_COUNT); // thread_command.count - add_uint64(data, 0x0000000000000000); // rax - add_uint64(data, 0x0000000000000400); // rbx - add_uint64(data, 0x0000000000000000); // rcx - add_uint64(data, 0x0000000000000000); // rdx - add_uint64(data, 0x0000000000000000); // rdi - add_uint64(data, 0x0000000000000000); // rsi - add_uint64(data, 0xffffff9246e2ba20); // rbp - add_uint64(data, 0xffffff9246e2ba10); // rsp - add_uint64(data, 0x0000000000000000); // r8 - add_uint64(data, 0x0000000000000000); // r9 - add_uint64(data, 0x0000000000000000); // r10 - add_uint64(data, 0x0000000000000000); // r11 - add_uint64(data, 0xffffff7f96ce5fe1); // r12 - add_uint64(data, 0x0000000000000000); // r13 - add_uint64(data, 0x0000000000000000); // r14 - add_uint64(data, 0xffffff9246e2bac0); // r15 - add_uint64(data, 0xffffff8015a8f6d0); // rip - add_uint64(data, 0x0000000000011111); // rflags - add_uint64(data, 0x0000000000022222); // cs - add_uint64(data, 0x0000000000033333); // fs - add_uint64(data, 0x0000000000044444); // gs + // Emit an LC_THREAD register context appropriate for the cputype + // of the binary we're embedded. The tests in this case do not + // use the register values, so 0's are fine, lldb needs to see at + // least one LC_THREAD in the corefile. +#if defined(__x86_64__) + if (cputype == CPU_TYPE_X86_64) { + add_uint32(data, LC_THREAD); // thread_command.cmd + add_uint32(data, + 16 + (x86_THREAD_STATE64_COUNT * 4)); // thread_command.cmdsize + add_uint32(data, x86_THREAD_STATE64); // thread_command.flavor + add_uint32(data, x86_THREAD_STATE64_COUNT); // thread_command.count + for (int i = 0; i < x86_THREAD_STATE64_COUNT; i++) { + add_uint32(data, 0); // whatever, just some empty register values + } + } +#endif +#if defined(__arm64__) || defined(__aarch64__) + if (cputype == CPU_TYPE_ARM64) { + add_uint32(data, LC_THREAD); // thread_command.cmd + add_uint32(data, + 16 + (ARM_THREAD_STATE64_COUNT * 4)); // thread_command.cmdsize + add_uint32(data, ARM_THREAD_STATE64); // thread_command.flavor + add_uint32(data, ARM_THREAD_STATE64_COUNT); // thread_command.count + for (int i = 0; i < ARM_THREAD_STATE64_COUNT; i++) { + add_uint32(data, 0); // whatever, just some empty register values + } + } +#endif return data; } @@ -121,7 +125,8 @@ void add_lc_note_kern_ver_str_load_command( void add_lc_note_main_bin_spec_load_command( std::vector> &loadcmds, std::vector &payload, - int payload_file_offset, std::string uuidstr, uint64_t address) { + int payload_file_offset, std::string uuidstr, uint64_t address, + uint64_t slide) { std::vector loadcmd_data; add_uint32(loadcmd_data, LC_NOTE); // note_command.cmd @@ -145,15 +150,16 @@ void add_lc_note_main_bin_spec_load_command( loadcmds.push_back(loadcmd_data); // Now write the "main bin spec" payload. - add_uint32(payload, 1); // version + add_uint32(payload, 2); // version add_uint32(payload, 3); // type == 3 [ firmware, standalone, etc ] add_uint64(payload, address); // load address + add_uint64(payload, slide); // slide uuid_t uuid; uuid_parse(uuidstr.c_str(), uuid); for (int i = 0; i < sizeof(uuid_t); i++) payload.push_back(uuid[i]); add_uint32(payload, 0); // log2_pagesize unspecified - add_uint32(payload, 0); // unused + add_uint32(payload, 0); // platform unspecified } void add_lc_segment(std::vector> &loadcmds, @@ -179,7 +185,8 @@ void add_lc_segment(std::vector> &loadcmds, loadcmds.push_back(loadcmd_data); } -std::string get_uuid_from_binary(const char *fn) { +std::string get_uuid_from_binary(const char *fn, cpu_type_t &cputype, + cpu_subtype_t &cpusubtype) { FILE *f = fopen(fn, "r"); if (f == nullptr) { fprintf(stderr, "Unable to open binary '%s' to get uuid\n", fn); @@ -213,9 +220,9 @@ std::string get_uuid_from_binary(const char *fn) { fprintf(stderr, "error reading mach header from input file\n"); exit(1); } - if (mh.cputype != CPU_TYPE_X86_64) { + if (mh.cputype != CPU_TYPE_X86_64 && mh.cputype != CPU_TYPE_ARM64) { fprintf(stderr, - "This tool creates an x86_64 corefile but " + "This tool creates an x86_64/arm64 corefile but " "the supplied binary '%s' is cputype 0x%x\n", fn, (uint32_t)mh.cputype); exit(1); @@ -223,15 +230,17 @@ std::string get_uuid_from_binary(const char *fn) { num_of_load_cmds = mh.ncmds; size_of_load_cmds = mh.sizeofcmds; file_offset += sizeof(struct mach_header); + cputype = mh.cputype; + cpusubtype = mh.cpusubtype; } else { struct mach_header_64 mh; if (::fread(&mh, 1, sizeof(mh), f) != sizeof(mh)) { fprintf(stderr, "error reading mach header from input file\n"); exit(1); } - if (mh.cputype != CPU_TYPE_X86_64) { + if (mh.cputype != CPU_TYPE_X86_64 && mh.cputype != CPU_TYPE_ARM64) { fprintf(stderr, - "This tool creates an x86_64 corefile but " + "This tool creates an x86_64/arm64 corefile but " "the supplied binary '%s' is cputype 0x%x\n", fn, (uint32_t)mh.cputype); exit(1); @@ -239,6 +248,8 @@ std::string get_uuid_from_binary(const char *fn) { num_of_load_cmds = mh.ncmds; size_of_load_cmds = mh.sizeofcmds; file_offset += sizeof(struct mach_header_64); + cputype = mh.cputype; + cpusubtype = mh.cpusubtype; } off_t load_cmds_offset = file_offset; @@ -269,12 +280,15 @@ std::string get_uuid_from_binary(const char *fn) { } int main(int argc, char **argv) { - if (argc != 5) { - fprintf(stderr, - "usage: create-empty-corefile version-string|main-bin-spec " - "
\n"); + if (argc != 6) { + fprintf( + stderr, + "usage: create-empty-corefile version-string|main-bin-spec " + "
\n"); fprintf(stderr, "
is base 16, 0xffffffffffffffff means unknown\n"); + fprintf(stderr, + " is base 16, 0xffffffffffffffff means unknown\n"); fprintf( stderr, "Create a Mach-O corefile with an either LC_NOTE 'kern ver str' or \n"); @@ -289,7 +303,9 @@ int main(int argc, char **argv) { exit(1); } - std::string uuid = get_uuid_from_binary(argv[3]); + cpu_type_t cputype; + cpu_subtype_t cpusubtype; + std::string uuid = get_uuid_from_binary(argv[3], cputype, cpusubtype); // An array of load commands (in the form of byte arrays) std::vector> load_commands; @@ -304,15 +320,22 @@ int main(int argc, char **argv) { exit(1); } + errno = 0; + uint64_t slide = strtoull(argv[5], NULL, 16); + if (errno != 0) { + fprintf(stderr, "Unable to parse slide %s as base 16", argv[4]); + exit(1); + } + // First add all the load commands / payload so we can figure out how large // the load commands will actually be. - load_commands.push_back(x86_lc_thread_load_command()); + load_commands.push_back(lc_thread_load_command(cputype)); if (strcmp(argv[1], "version-string") == 0) add_lc_note_kern_ver_str_load_command(load_commands, payload, 0, uuid, address); else add_lc_note_main_bin_spec_load_command(load_commands, payload, 0, uuid, - address); + address, slide); add_lc_segment(load_commands, payload, 0); int size_of_load_commands = 0; @@ -327,22 +350,22 @@ int main(int argc, char **argv) { load_commands.clear(); payload.clear(); - load_commands.push_back(x86_lc_thread_load_command()); + load_commands.push_back(lc_thread_load_command(cputype)); if (strcmp(argv[1], "version-string") == 0) add_lc_note_kern_ver_str_load_command( load_commands, payload, header_and_load_cmd_room, uuid, address); else add_lc_note_main_bin_spec_load_command( - load_commands, payload, header_and_load_cmd_room, uuid, address); + load_commands, payload, header_and_load_cmd_room, uuid, address, slide); add_lc_segment(load_commands, payload, header_and_load_cmd_room); struct mach_header_64 mh; mh.magic = MH_MAGIC_64; - mh.cputype = CPU_TYPE_X86_64; + mh.cputype = cputype; - mh.cpusubtype = CPU_SUBTYPE_X86_64_ALL; + mh.cpusubtype = cpusubtype; mh.filetype = MH_CORE; mh.ncmds = load_commands.size(); mh.sizeofcmds = size_of_load_commands; From 958e7a284d1146842ee2b115172b5eed00e56649 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Wed, 22 Dec 2021 15:07:41 +0700 Subject: [PATCH 027/293] [Test] Add test showing missing opportunity in IndVar's handling of lshr --- .../IndVarSimplify/shift-range-checks.ll | 244 ++++++++++++++++++ 1 file changed, 244 insertions(+) create mode 100644 llvm/test/Transforms/IndVarSimplify/shift-range-checks.ll diff --git a/llvm/test/Transforms/IndVarSimplify/shift-range-checks.ll b/llvm/test/Transforms/IndVarSimplify/shift-range-checks.ll new file mode 100644 index 000000000000..1dc91d105ab3 --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/shift-range-checks.ll @@ -0,0 +1,244 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S < %s -indvars | FileCheck %s +; RUN: opt -S < %s -passes=indvars | FileCheck %s + +declare i1 @cond() +declare void @exit(i32 %code) + +; FIXME: We can remove 2nd check here because it is implied by check +; against the shifted value. +define void @test_01(i32* %p, i32 %shift) { +; CHECK-LABEL: @test_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]], align 4, !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[X_SHIFTED:%.*]] = lshr i32 [[X]], [[SHIFT:%.*]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[LESS_THAN_SHIFTED:%.*]] = icmp slt i32 [[IV]], [[X_SHIFTED]] +; CHECK-NEXT: br i1 [[LESS_THAN_SHIFTED]], label [[GUARDED:%.*]], label [[FAILURE:%.*]] +; CHECK: guarded: +; CHECK-NEXT: [[LESS_THAN_X:%.*]] = icmp ult i32 [[IV]], [[X]] +; CHECK-NEXT: br i1 [[LESS_THAN_X]], label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]] +; CHECK: backedge: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[DONE:%.*]] +; CHECK: done: +; CHECK-NEXT: ret void +; CHECK: failure: +; CHECK-NEXT: call void @exit(i32 1) +; CHECK-NEXT: unreachable +; CHECK: never_happens: +; CHECK-NEXT: call void @exit(i32 0) +; CHECK-NEXT: unreachable +; +entry: + %x = load i32, i32* %p, !range !0 + %x.shifted = lshr i32 %x, %shift + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %backedge] + %less.than.shifted = icmp slt i32 %iv, %x.shifted + br i1 %less.than.shifted, label %guarded, label %failure + +guarded: + %less.than.x = icmp slt i32 %iv, %x + br i1 %less.than.x, label %backedge, label %never_happens + +backedge: + %iv.next = add nuw nsw i32 %iv, 1 + %loop.cond = call i1 @cond() + br i1 %loop.cond, label %loop, label %done + +done: + ret void + +failure: + call void @exit(i32 1) + unreachable + +never_happens: + call void @exit(i32 0) + unreachable +} + +; FIXME: We can remove 2nd check here because it is implied by check +; against the shifted value. +define void @test_02(i32* %p, i32 %shift) { +; CHECK-LABEL: @test_02( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[X_SHIFTED:%.*]] = lshr i32 [[X]], [[SHIFT:%.*]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[LESS_THAN_SHIFTED:%.*]] = icmp sgt i32 [[X_SHIFTED]], [[IV]] +; CHECK-NEXT: br i1 [[LESS_THAN_SHIFTED]], label [[GUARDED:%.*]], label [[FAILURE:%.*]] +; CHECK: guarded: +; CHECK-NEXT: [[LESS_THAN_X:%.*]] = icmp ugt i32 [[X]], [[IV]] +; CHECK-NEXT: br i1 [[LESS_THAN_X]], label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]] +; CHECK: backedge: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[DONE:%.*]] +; CHECK: done: +; CHECK-NEXT: ret void +; CHECK: failure: +; CHECK-NEXT: call void @exit(i32 1) +; CHECK-NEXT: unreachable +; CHECK: never_happens: +; CHECK-NEXT: call void @exit(i32 0) +; CHECK-NEXT: unreachable +; +entry: + %x = load i32, i32* %p, !range !0 + %x.shifted = lshr i32 %x, %shift + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %backedge] + %less.than.shifted = icmp sgt i32 %x.shifted, %iv + br i1 %less.than.shifted, label %guarded, label %failure + +guarded: + %less.than.x = icmp sgt i32 %x, %iv + br i1 %less.than.x, label %backedge, label %never_happens + +backedge: + %iv.next = add nuw nsw i32 %iv, 1 + %loop.cond = call i1 @cond() + br i1 %loop.cond, label %loop, label %done + +done: + ret void + +failure: + call void @exit(i32 1) + unreachable + +never_happens: + call void @exit(i32 0) + unreachable +} + +; FIXME: We can remove 2nd check here because it is implied by check +; against the shifted value. +define void @test_03(i32* %p, i32 %shift) { +; CHECK-LABEL: @test_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[X_SHIFTED:%.*]] = lshr i32 [[X]], [[SHIFT:%.*]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[LESS_THAN_SHIFTED:%.*]] = icmp ult i32 [[IV]], [[X_SHIFTED]] +; CHECK-NEXT: br i1 [[LESS_THAN_SHIFTED]], label [[GUARDED:%.*]], label [[FAILURE:%.*]] +; CHECK: guarded: +; CHECK-NEXT: [[LESS_THAN_X:%.*]] = icmp ult i32 [[IV]], [[X]] +; CHECK-NEXT: br i1 [[LESS_THAN_X]], label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]] +; CHECK: backedge: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[DONE:%.*]] +; CHECK: done: +; CHECK-NEXT: ret void +; CHECK: failure: +; CHECK-NEXT: call void @exit(i32 1) +; CHECK-NEXT: unreachable +; CHECK: never_happens: +; CHECK-NEXT: call void @exit(i32 0) +; CHECK-NEXT: unreachable +; +entry: + %x = load i32, i32* %p, !range !0 + %x.shifted = lshr i32 %x, %shift + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %backedge] + %less.than.shifted = icmp ult i32 %iv, %x.shifted + br i1 %less.than.shifted, label %guarded, label %failure + +guarded: + %less.than.x = icmp ult i32 %iv, %x + br i1 %less.than.x, label %backedge, label %never_happens + +backedge: + %iv.next = add nuw nsw i32 %iv, 1 + %loop.cond = call i1 @cond() + br i1 %loop.cond, label %loop, label %done + +done: + ret void + +failure: + call void @exit(i32 1) + unreachable + +never_happens: + call void @exit(i32 0) + unreachable +} + +; FIXME: We can remove 2nd check here because it is implied by check +; against the shifted value. +define void @test_04(i32* %p, i32 %shift) { +; CHECK-LABEL: @test_04( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[X_SHIFTED:%.*]] = lshr i32 [[X]], [[SHIFT:%.*]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[LESS_THAN_SHIFTED:%.*]] = icmp ugt i32 [[X_SHIFTED]], [[IV]] +; CHECK-NEXT: br i1 [[LESS_THAN_SHIFTED]], label [[GUARDED:%.*]], label [[FAILURE:%.*]] +; CHECK: guarded: +; CHECK-NEXT: [[LESS_THAN_X:%.*]] = icmp ugt i32 [[X]], [[IV]] +; CHECK-NEXT: br i1 [[LESS_THAN_X]], label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]] +; CHECK: backedge: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[LOOP_COND:%.*]] = call i1 @cond() +; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[DONE:%.*]] +; CHECK: done: +; CHECK-NEXT: ret void +; CHECK: failure: +; CHECK-NEXT: call void @exit(i32 1) +; CHECK-NEXT: unreachable +; CHECK: never_happens: +; CHECK-NEXT: call void @exit(i32 0) +; CHECK-NEXT: unreachable +; +entry: + %x = load i32, i32* %p, !range !0 + %x.shifted = lshr i32 %x, %shift + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %backedge] + %less.than.shifted = icmp ugt i32 %x.shifted, %iv + br i1 %less.than.shifted, label %guarded, label %failure + +guarded: + %less.than.x = icmp ugt i32 %x, %iv + br i1 %less.than.x, label %backedge, label %never_happens + +backedge: + %iv.next = add nuw nsw i32 %iv, 1 + %loop.cond = call i1 @cond() + br i1 %loop.cond, label %loop, label %done + +done: + ret void + +failure: + call void @exit(i32 1) + unreachable + +never_happens: + call void @exit(i32 0) + unreachable +} + +!0 = !{i32 0, i32 2147483647} From f5ac23b5ae090d64d31f0b6624470af97dc20bf6 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 20 Dec 2021 12:27:05 +0100 Subject: [PATCH 028/293] [ArgPromotion][TTI] Pass types to ABI compatibility hook The areFunctionArgsABICompatible() hook currently accepts a list of pointer arguments, though what we're actually interested in is the ABI compatibility after these pointer arguments have been converted into value arguments. This means that a) the current API is incompatible with opaque pointers (because it requires inspection of pointee types) and b) it can only be used in the specific context of ArgPromotion. I would like to reuse the API when inspecting calls during inlining. This patch converts it into an areTypesABICompatible() hook, which accepts a list of types. This makes the method more generally usable, and compatible with opaque pointers from an API perspective (the actual usage in ArgPromotion/Attributor is still incompatible, I'll follow up on that in separate patches). Differential Revision: https://reviews.llvm.org/D116031 --- .../llvm/Analysis/TargetTransformInfo.h | 24 +++++++++---------- .../llvm/Analysis/TargetTransformInfoImpl.h | 5 ++-- llvm/lib/Analysis/TargetTransformInfo.cpp | 6 ++--- .../Target/PowerPC/PPCTargetTransformInfo.cpp | 16 ++++++------- .../Target/PowerPC/PPCTargetTransformInfo.h | 5 ++-- .../lib/Target/X86/X86TargetTransformInfo.cpp | 17 +++++-------- llvm/lib/Target/X86/X86TargetTransformInfo.h | 5 ++-- llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 10 ++++++-- 8 files changed, 41 insertions(+), 47 deletions(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index c7572dd674c8..b261ad8b2adf 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1298,13 +1298,12 @@ class TargetTransformInfo { bool areInlineCompatible(const Function *Caller, const Function *Callee) const; - /// \returns True if the caller and callee agree on how \p Args will be passed + /// \returns True if the caller and callee agree on how \p Types will be + /// passed to or returned from the callee. /// to the callee. - /// \param[out] Args The list of compatible arguments. The implementation may - /// filter out any incompatible args from this list. - bool areFunctionArgsABICompatible(const Function *Caller, - const Function *Callee, - SmallPtrSetImpl &Args) const; + /// \param Types List of types to check. + bool areTypesABICompatible(const Function *Caller, const Function *Callee, + const ArrayRef &Types) const; /// The type of load/store indexing. enum MemIndexedMode { @@ -1735,9 +1734,9 @@ class TargetTransformInfo::Concept { unsigned SrcAlign, unsigned DestAlign) const = 0; virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const = 0; - virtual bool - areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, - SmallPtrSetImpl &Args) const = 0; + virtual bool areTypesABICompatible(const Function *Caller, + const Function *Callee, + const ArrayRef &Types) const; virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0; virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0; virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0; @@ -2299,10 +2298,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { const Function *Callee) const override { return Impl.areInlineCompatible(Caller, Callee); } - bool areFunctionArgsABICompatible( - const Function *Caller, const Function *Callee, - SmallPtrSetImpl &Args) const override { - return Impl.areFunctionArgsABICompatible(Caller, Callee, Args); + bool areTypesABICompatible(const Function *Caller, const Function *Callee, + const ArrayRef &Types) const override { + return Impl.areTypesABICompatible(Caller, Callee, Types); } bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override { return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout()); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 1bcfe24e30d7..26a696f09b3d 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -708,9 +708,8 @@ class TargetTransformInfoImplBase { Callee->getFnAttribute("target-features")); } - bool areFunctionArgsABICompatible(const Function *Caller, - const Function *Callee, - SmallPtrSetImpl &Args) const { + bool areTypesABICompatible(const Function *Caller, const Function *Callee, + const ArrayRef &Types) const { return (Caller->getFnAttribute("target-cpu") == Callee->getFnAttribute("target-cpu")) && (Caller->getFnAttribute("target-features") == diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 8bb925feeffa..6aa9a77391dc 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -982,10 +982,10 @@ bool TargetTransformInfo::areInlineCompatible(const Function *Caller, return TTIImpl->areInlineCompatible(Caller, Callee); } -bool TargetTransformInfo::areFunctionArgsABICompatible( +bool TargetTransformInfo::areTypesABICompatible( const Function *Caller, const Function *Callee, - SmallPtrSetImpl &Args) const { - return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args); + const ArrayRef &Types) const { + return TTIImpl->areTypesABICompatible(Caller, Callee, Types); } bool TargetTransformInfo::isIndexedLoadLegal(MemIndexedMode Mode, diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp index deac0765b218..ed28731b8ef2 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -1272,23 +1272,21 @@ PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return BaseT::getIntrinsicInstrCost(ICA, CostKind); } -bool PPCTTIImpl::areFunctionArgsABICompatible( - const Function *Caller, const Function *Callee, - SmallPtrSetImpl &Args) const { +bool PPCTTIImpl::areTypesABICompatible(const Function *Caller, + const Function *Callee, + const ArrayRef &Types) const { // We need to ensure that argument promotion does not // attempt to promote pointers to MMA types (__vector_pair // and __vector_quad) since these types explicitly cannot be // passed as arguments. Both of these types are larger than // the 128-bit Altivec vectors and have a scalar size of 1 bit. - if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args)) + if (!BaseT::areTypesABICompatible(Caller, Callee, Types)) return false; - return llvm::none_of(Args, [](Argument *A) { - auto *EltTy = cast(A->getType())->getElementType(); - if (EltTy->isSized()) - return (EltTy->isIntOrIntVectorTy(1) && - EltTy->getPrimitiveSizeInBits() > 128); + return llvm::none_of(Types, [](Type *Ty) { + if (Ty->isSized()) + return Ty->isIntOrIntVectorTy(1) && Ty->getPrimitiveSizeInBits() > 128; return false; }); } diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h index ad926705a330..0af6f2a308d9 100644 --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -134,9 +134,8 @@ class PPCTTIImpl : public BasicTTIImplBase { bool UseMaskForCond = false, bool UseMaskForGaps = false); InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind); - bool areFunctionArgsABICompatible(const Function *Caller, - const Function *Callee, - SmallPtrSetImpl &Args) const; + bool areTypesABICompatible(const Function *Caller, const Function *Callee, + const ArrayRef &Types) const; bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const; InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 41f273a1d567..d8cd7311a0d5 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -5192,10 +5192,10 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller, return (RealCallerBits & RealCalleeBits) == RealCalleeBits; } -bool X86TTIImpl::areFunctionArgsABICompatible( - const Function *Caller, const Function *Callee, - SmallPtrSetImpl &Args) const { - if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args)) +bool X86TTIImpl::areTypesABICompatible(const Function *Caller, + const Function *Callee, + const ArrayRef &Types) const { + if (!BaseT::areTypesABICompatible(Caller, Callee, Types)) return false; // If we get here, we know the target features match. If one function @@ -5210,13 +5210,8 @@ bool X86TTIImpl::areFunctionArgsABICompatible( // Consider the arguments compatible if they aren't vectors or aggregates. // FIXME: Look at the size of vectors. // FIXME: Look at the element types of aggregates to see if there are vectors. - // FIXME: The API of this function seems intended to allow arguments - // to be removed from the set, but the caller doesn't check if the set - // becomes empty so that may not work in practice. - return llvm::none_of(Args, [](Argument *A) { - auto *EltTy = cast(A->getType())->getElementType(); - return EltTy->isVectorTy() || EltTy->isAggregateType(); - }); + return llvm::none_of(Types, + [](Type *T) { return T->isVectorTy() || T->isAggregateType(); }); } X86TTIImpl::TTI::MemCmpExpansionOptions diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index c53424ec0026..11e9cb09c7d5 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -234,9 +234,8 @@ class X86TTIImpl : public BasicTTIImplBase { bool isFCmpOrdCheaperThanFCmpZero(Type *Ty); bool areInlineCompatible(const Function *Caller, const Function *Callee) const; - bool areFunctionArgsABICompatible(const Function *Caller, - const Function *Callee, - SmallPtrSetImpl &Args) const; + bool areTypesABICompatible(const Function *Caller, const Function *Callee, + const ArrayRef &Type) const; TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const; bool prefersVectorizedAddressing() const; diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index 93bb11433775..3a42a2cac928 100644 --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -835,14 +835,20 @@ bool ArgumentPromotionPass::areFunctionArgsABICompatible( const Function &F, const TargetTransformInfo &TTI, SmallPtrSetImpl &ArgsToPromote, SmallPtrSetImpl &ByValArgsToTransform) { + // TODO: Check individual arguments so we can promote a subset? + SmallVector Types; + for (Argument *Arg : ArgsToPromote) + Types.push_back(Arg->getType()->getPointerElementType()); + for (Argument *Arg : ByValArgsToTransform) + Types.push_back(Arg->getParamByValType()); + for (const Use &U : F.uses()) { CallBase *CB = dyn_cast(U.getUser()); if (!CB) return false; const Function *Caller = CB->getCaller(); const Function *Callee = CB->getCalledFunction(); - if (!TTI.areFunctionArgsABICompatible(Caller, Callee, ArgsToPromote) || - !TTI.areFunctionArgsABICompatible(Caller, Callee, ByValArgsToTransform)) + if (!TTI.areTypesABICompatible(Caller, Callee, Types)) return false; } return true; From 3b0f5a4856fce76a6535feddd1692747b81b60cd Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 22 Dec 2021 09:41:11 +0100 Subject: [PATCH 029/293] [Mips16HardFloat] Simplify attribute change (NFC) As we're only removing and adding a single attribute, there is no need to go through AttrBuilder. --- llvm/lib/Target/Mips/Mips16HardFloat.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/Mips/Mips16HardFloat.cpp b/llvm/lib/Target/Mips/Mips16HardFloat.cpp index 203e05dde7ad..419f0ac1a8a7 100644 --- a/llvm/lib/Target/Mips/Mips16HardFloat.cpp +++ b/llvm/lib/Target/Mips/Mips16HardFloat.cpp @@ -479,14 +479,12 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV, // remove the use-soft-float attribute static void removeUseSoftFloat(Function &F) { - AttrBuilder B; LLVM_DEBUG(errs() << "removing -use-soft-float\n"); - B.addAttribute("use-soft-float", "false"); - F.removeFnAttrs(B); + F.removeFnAttr("use-soft-float"); if (F.hasFnAttribute("use-soft-float")) { LLVM_DEBUG(errs() << "still has -use-soft-float\n"); } - F.addFnAttrs(B); + F.addFnAttr("use-soft-float", "false"); } // This pass only makes sense when the underlying chip has floating point but From e8e8bfeeb7233bde17d0a811b87d392271b33f42 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 22 Dec 2021 09:42:55 +0100 Subject: [PATCH 030/293] [DataFlowSanitizer] Simplify attribute removal (NFC) We're only removing a single attribute, so there is no need to go through AttrBuilder. --- llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 9eedd9fd151e..9f26b37bbc79 100644 --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -1121,7 +1121,7 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName, BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF); if (F->isVarArg()) { - NewF->removeFnAttrs(AttrBuilder().addAttribute("split-stack")); + NewF->removeFnAttr("split-stack"); CallInst::Create(DFSanVarargWrapperFn, IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "", BB); From 9f24f010abe6b9db3d3fd913805c9e0779d00334 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 22 Dec 2021 09:55:54 +0100 Subject: [PATCH 031/293] [RS4GC] Clean up attribute removal (NFC) It is not necessary to explicitly check which attributes are present, and only add those to the builder. We can simply list all attributes that need to be stripped and remove them unconditionally. This also allows us to use some nicer APIs that don't require mucking about with attribute list indices. --- .../Scalar/RewriteStatepointsForGC.cpp | 51 +++++++------------ 1 file changed, 18 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 2d3490b2d29e..e12eca0ed287 100644 --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1359,16 +1359,6 @@ static constexpr Attribute::AttrKind FnAttrsToStrip[] = Attribute::InaccessibleMemOrArgMemOnly, Attribute::NoSync, Attribute::NoFree}; -// List of all parameter and return attributes which must be stripped when -// lowering from the abstract machine model. Note that we list attributes -// here which aren't valid as return attributes, that is okay. There are -// also some additional attributes with arguments which are handled -// explicitly and are not in this list. -static constexpr Attribute::AttrKind ParamAttrsToStrip[] = - {Attribute::ReadNone, Attribute::ReadOnly, Attribute::WriteOnly, - Attribute::NoAlias, Attribute::NoFree}; - - // Create new attribute set containing only attributes which can be transferred // from original call to the safepoint. static AttributeList legalizeCallAttributes(LLVMContext &Ctx, @@ -2650,24 +2640,19 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, return !Records.empty(); } -// Handles both return values and arguments for Functions and calls. -template -static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH, - unsigned Index) { +// List of all parameter and return attributes which must be stripped when +// lowering from the abstract machine model. Note that we list attributes +// here which aren't valid as return attributes, that is okay. +static AttrBuilder getParamAndReturnAttributesToRemove() { AttrBuilder R; - AttributeSet AS = AH.getAttributes().getAttributes(Index); - if (AS.getDereferenceableBytes()) - R.addAttribute(Attribute::get(Ctx, Attribute::Dereferenceable, - AS.getDereferenceableBytes())); - if (AS.getDereferenceableOrNullBytes()) - R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull, - AS.getDereferenceableOrNullBytes())); - for (auto Attr : ParamAttrsToStrip) - if (AS.hasAttribute(Attr)) - R.addAttribute(Attr); - - if (!R.empty()) - AH.setAttributes(AH.getAttributes().removeAttributesAtIndex(Ctx, Index, R)); + R.addDereferenceableAttr(1); + R.addDereferenceableOrNullAttr(1); + R.addAttribute(Attribute::ReadNone); + R.addAttribute(Attribute::ReadOnly); + R.addAttribute(Attribute::WriteOnly); + R.addAttribute(Attribute::NoAlias); + R.addAttribute(Attribute::NoFree); + return R; } static void stripNonValidAttributesFromPrototype(Function &F) { @@ -2683,13 +2668,13 @@ static void stripNonValidAttributesFromPrototype(Function &F) { return; } + AttrBuilder R = getParamAndReturnAttributesToRemove(); for (Argument &A : F.args()) if (isa(A.getType())) - RemoveNonValidAttrAtIndex(Ctx, F, - A.getArgNo() + AttributeList::FirstArgIndex); + F.removeParamAttrs(A.getArgNo(), R); if (isa(F.getReturnType())) - RemoveNonValidAttrAtIndex(Ctx, F, AttributeList::ReturnIndex); + F.removeRetAttrs(R); for (auto Attr : FnAttrsToStrip) F.removeFnAttr(Attr); @@ -2757,13 +2742,13 @@ static void stripNonValidDataFromBody(Function &F) { stripInvalidMetadataFromInstruction(I); + AttrBuilder R = getParamAndReturnAttributesToRemove(); if (auto *Call = dyn_cast(&I)) { for (int i = 0, e = Call->arg_size(); i != e; i++) if (isa(Call->getArgOperand(i)->getType())) - RemoveNonValidAttrAtIndex(Ctx, *Call, - i + AttributeList::FirstArgIndex); + Call->removeParamAttrs(i, R); if (isa(Call->getType())) - RemoveNonValidAttrAtIndex(Ctx, *Call, AttributeList::ReturnIndex); + Call->removeRetAttrs(R); } } From 37c8d5915f37432f49fd4b0e6ec318ae68750af1 Mon Sep 17 00:00:00 2001 From: Krasimir Georgiev Date: Wed, 22 Dec 2021 10:00:35 +0100 Subject: [PATCH 032/293] fix build bots after f5ac23b5ae090d64d31f0b6624470af97dc20bf6 The old member was `= 0`: https://github.com/llvm/llvm-project/commit/f5ac23b5ae090d64d31f0b6624470af97dc20bf6#diff-7781d63141d53242da0520361a554df579ecb079b33bdcfbe7a0db95d44cca49L1740 It looks like this caused some bots to fail: https://lab.llvm.org/buildbot/#/builders/127/builds/21684 --- llvm/include/llvm/Analysis/TargetTransformInfo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index b261ad8b2adf..d9f5c9689d5c 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1736,7 +1736,7 @@ class TargetTransformInfo::Concept { const Function *Callee) const = 0; virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, - const ArrayRef &Types) const; + const ArrayRef &Types) const = 0; virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0; virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0; virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0; From e07736fe3c78bf02c1eb922a04065840a8e13935 Mon Sep 17 00:00:00 2001 From: Diana Picus Date: Wed, 22 Dec 2021 09:14:53 +0000 Subject: [PATCH 033/293] [flang] Add missing include. NFCI. descriptor.h is using std::max, so it should include . This should fix a build issue on Windows on Arm. --- flang/include/flang/Runtime/descriptor.h | 1 + 1 file changed, 1 insertion(+) diff --git a/flang/include/flang/Runtime/descriptor.h b/flang/include/flang/Runtime/descriptor.h index dc2f2b77356d..9666f9c0422a 100644 --- a/flang/include/flang/Runtime/descriptor.h +++ b/flang/include/flang/Runtime/descriptor.h @@ -21,6 +21,7 @@ #include "flang/ISO_Fortran_binding.h" #include "flang/Runtime/memory.h" #include "flang/Runtime/type-code.h" +#include #include #include #include From b55ea2fbc0a0564eece1e58fc5624057d5d05639 Mon Sep 17 00:00:00 2001 From: Jun Zhan Date: Wed, 22 Dec 2021 10:00:27 +0000 Subject: [PATCH 034/293] [Clang] Add __builtin_reduce_xor This patch implements __builtin_reduce_xor as specified in D111529. Reviewed By: fhahn, aaron.ballman Differential Revision: https://reviews.llvm.org/D115231 --- clang/include/clang/Basic/Builtins.def | 1 + .../clang/Basic/DiagnosticSemaKinds.td | 3 +- clang/include/clang/Sema/Sema.h | 2 +- clang/lib/CodeGen/CGBuiltin.cpp | 7 ++++ clang/lib/Sema/SemaChecking.cpp | 42 ++++++++++++++----- clang/test/CodeGen/builtins-reduction-math.c | 11 +++++ clang/test/Sema/builtins-reduction-math.c | 17 ++++++++ 7 files changed, 70 insertions(+), 13 deletions(-) diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index f57635e010dc..45d445163749 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -649,6 +649,7 @@ BUILTIN(__builtin_elementwise_min, "v.", "nct") BUILTIN(__builtin_elementwise_ceil, "v.", "nct") BUILTIN(__builtin_reduce_max, "v.", "nct") BUILTIN(__builtin_reduce_min, "v.", "nct") +BUILTIN(__builtin_reduce_xor, "v.", "nct") BUILTIN(__builtin_matrix_transpose, "v.", "nFt") BUILTIN(__builtin_matrix_column_major_load, "v.", "nFt") diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 875e639cb2be..f50c5a0c711a 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11378,7 +11378,8 @@ def err_builtin_invalid_arg_type: Error < "%select{vector, integer or floating point type|matrix|" "pointer to a valid matrix element type|" "signed integer or floating point type|vector type|" - "floating point type}1 (was %2)">; + "floating point type|" + "vector of integers}1 (was %2)">; def err_builtin_matrix_disabled: Error< "matrix types extension is disabled. Pass -fenable-matrix to enable it">; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 73c5ad1dd7ac..55171767da10 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12784,7 +12784,7 @@ class Sema final { bool SemaBuiltinElementwiseMath(CallExpr *TheCall); bool PrepareBuiltinElementwiseMathOneArgCall(CallExpr *TheCall); - bool SemaBuiltinReduceMath(CallExpr *TheCall); + bool PrepareBuiltinReduceMathOneArgCall(CallExpr *TheCall); // Matrix builtin handling. ExprResult SemaBuiltinMatrixTranspose(CallExpr *TheCall, diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2cbc8f77bd39..7224b10f2e90 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -3211,6 +3211,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Result); } + case Builtin::BI__builtin_reduce_xor: { + Value *Op0 = EmitScalarExpr(E->getArg(0)); + Value *Result = Builder.CreateUnaryIntrinsic( + llvm::Intrinsic::vector_reduce_xor, Op0, nullptr, "rdx.xor"); + return RValue::get(Result); + } + case Builtin::BI__builtin_matrix_transpose: { const auto *MatrixTy = E->getArg(0)->getType()->getAs(); Value *MatValue = EmitScalarExpr(E->getArg(0)); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index fb99591656d3..4e83fa1fffca 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2216,10 +2216,38 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, return ExprError(); break; case Builtin::BI__builtin_reduce_max: - case Builtin::BI__builtin_reduce_min: - if (SemaBuiltinReduceMath(TheCall)) + case Builtin::BI__builtin_reduce_min: { + if (PrepareBuiltinReduceMathOneArgCall(TheCall)) return ExprError(); + + const Expr *Arg = TheCall->getArg(0); + const auto *TyA = Arg->getType()->getAs(); + if (!TyA) { + Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type) + << 1 << /* vector ty*/ 4 << Arg->getType(); + return ExprError(); + } + + TheCall->setType(TyA->getElementType()); break; + } + + // __builtin_reduce_xor supports vector of integers only. + case Builtin::BI__builtin_reduce_xor: { + if (PrepareBuiltinReduceMathOneArgCall(TheCall)) + return ExprError(); + + const Expr *Arg = TheCall->getArg(0); + const auto *TyA = Arg->getType()->getAs(); + if (!TyA || !TyA->getElementType()->isIntegerType()) { + Diag(Arg->getBeginLoc(), diag::err_builtin_invalid_arg_type) + << 1 << /* vector of integers */ 6 << Arg->getType(); + return ExprError(); + } + TheCall->setType(TyA->getElementType()); + break; + } + case Builtin::BI__builtin_matrix_transpose: return SemaBuiltinMatrixTranspose(TheCall, TheCallResult); @@ -16882,7 +16910,7 @@ bool Sema::SemaBuiltinElementwiseMath(CallExpr *TheCall) { return false; } -bool Sema::SemaBuiltinReduceMath(CallExpr *TheCall) { +bool Sema::PrepareBuiltinReduceMathOneArgCall(CallExpr *TheCall) { if (checkArgCount(*this, TheCall, 1)) return true; @@ -16891,14 +16919,6 @@ bool Sema::SemaBuiltinReduceMath(CallExpr *TheCall) { return true; TheCall->setArg(0, A.get()); - const VectorType *TyA = A.get()->getType()->getAs(); - if (!TyA) { - SourceLocation ArgLoc = TheCall->getArg(0)->getBeginLoc(); - return Diag(ArgLoc, diag::err_builtin_invalid_arg_type) - << 1 << /* vector ty*/ 4 << A.get()->getType(); - } - - TheCall->setType(TyA->getElementType()); return false; } diff --git a/clang/test/CodeGen/builtins-reduction-math.c b/clang/test/CodeGen/builtins-reduction-math.c index 417caed494d9..babd3345d787 100644 --- a/clang/test/CodeGen/builtins-reduction-math.c +++ b/clang/test/CodeGen/builtins-reduction-math.c @@ -57,3 +57,14 @@ void test_builtin_reduce_min(float4 vf1, si8 vi1, u4 vu1) { const si8 cvi1 = vi1; unsigned long long r5 = __builtin_reduce_min(cvi1); } + +void test_builtin_reduce_xor(si8 vi1, u4 vu1) { + + // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 + // CHECK-NEXT: call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> [[VI1]]) + short r2 = __builtin_reduce_xor(vi1); + + // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16 + // CHECK-NEXT: call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[VU1]]) + unsigned r3 = __builtin_reduce_xor(vu1); +} diff --git a/clang/test/Sema/builtins-reduction-math.c b/clang/test/Sema/builtins-reduction-math.c index 0d1aecaa99c3..8ee64d50de38 100644 --- a/clang/test/Sema/builtins-reduction-math.c +++ b/clang/test/Sema/builtins-reduction-math.c @@ -35,3 +35,20 @@ void test_builtin_reduce_min(int i, float4 v, int3 iv) { i = __builtin_reduce_min(i); // expected-error@-1 {{1st argument must be a vector type (was 'int')}} } + +void test_builtin_reduce_xor(int i, float4 v, int3 iv) { + struct Foo s = __builtin_reduce_xor(iv); + // expected-error@-1 {{initializing 'struct Foo' with an expression of incompatible type 'int'}} + + i = __builtin_reduce_xor(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} + + i = __builtin_reduce_xor(iv, iv); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} + + i = __builtin_reduce_xor(i); + // expected-error@-1 {{1st argument must be a vector of integers (was 'int')}} + + i = __builtin_reduce_xor(v); + // expected-error@-1 {{1st argument must be a vector of integers (was 'float4' (vector of 4 'float' values))}} +} From 185c80b89a254db86c954f93b674ad365680018f Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Wed, 22 Dec 2021 17:12:54 +0700 Subject: [PATCH 035/293] [ConstantFolding] Tests for constrained compare intrinsics This are tests extracted from https://reviews.llvm.org/D110322, committed prior to that patch to show the change in behavior. --- .../InstSimplify/constfold-constrained.ll | 116 ++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/llvm/test/Transforms/InstSimplify/constfold-constrained.ll b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll index 2bfd4e03b1e6..5b8d615a6770 100644 --- a/llvm/test/Transforms/InstSimplify/constfold-constrained.ll +++ b/llvm/test/Transforms/InstSimplify/constfold-constrained.ll @@ -417,6 +417,120 @@ entry: } +; When exceptions are ignored, comparison of constants can be folded, even for (signaling) NaNs. +define i1 @cmp_eq_01() #0 { +; CHECK-LABEL: @cmp_eq_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double 1.000000e+00, double 2.000000e+00, metadata !"oeq", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: ret i1 [[RESULT]] +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmp.f64(double 1.0, double 2.0, metadata !"oeq", metadata !"fpexcept.ignore") #0 + ret i1 %result +} + +define i1 @cmp_eq_02() #0 { +; CHECK-LABEL: @cmp_eq_02( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double 2.000000e+00, double 2.000000e+00, metadata !"oeq", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: ret i1 [[RESULT]] +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmp.f64(double 2.0, double 2.0, metadata !"oeq", metadata !"fpexcept.ignore") #0 + ret i1 %result +} + +define i1 @cmp_eq_03() #0 { +; CHECK-LABEL: @cmp_eq_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double 2.000000e+00, double 0x7FF8000000000000, metadata !"oeq", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: ret i1 [[RESULT]] +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmp.f64(double 2.0, double 0x7ff8000000000000, metadata !"oeq", metadata !"fpexcept.ignore") #0 + ret i1 %result +} + +define i1 @cmp_eq_04() #0 { +; CHECK-LABEL: @cmp_eq_04( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double 2.000000e+00, double 0x7FF4000000000000, metadata !"oeq", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: ret i1 [[RESULT]] +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmp.f64(double 2.0, double 0x7ff4000000000000, metadata !"oeq", metadata !"fpexcept.ignore") #0 + ret i1 %result +} + +define i1 @cmp_eq_05() #0 { +; CHECK-LABEL: @cmp_eq_05( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double 2.000000e+00, double 0x7FF8000000000000, metadata !"oeq", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: ret i1 [[RESULT]] +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmps.f64(double 2.0, double 0x7ff8000000000000, metadata !"oeq", metadata !"fpexcept.ignore") #0 + ret i1 %result +} + +define i1 @cmp_eq_06() #0 { +; CHECK-LABEL: @cmp_eq_06( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double 2.000000e+00, double 0x7FF4000000000000, metadata !"oeq", metadata !"fpexcept.ignore") #[[ATTR0]] +; CHECK-NEXT: ret i1 [[RESULT]] +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmps.f64(double 2.0, double 0x7ff4000000000000, metadata !"oeq", metadata !"fpexcept.ignore") #0 + ret i1 %result +} + +; Compare with SNAN is NOT folded if the exception behavior mode is not 'ignore'. +define i1 @cmp_eq_nan_01() #0 { +; CHECK-LABEL: @cmp_eq_nan_01( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double 0x7FF4000000000000, double 1.000000e+00, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: ret i1 [[RESULT]] +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmp.f64(double 0x7ff4000000000000, double 1.0, metadata !"oeq", metadata !"fpexcept.strict") #0 + ret i1 %result +} + +define i1 @cmp_eq_nan_02() #0 { +; CHECK-LABEL: @cmp_eq_nan_02( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double 0x7FF4000000000000, double 1.000000e+00, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: ret i1 [[RESULT]] +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmps.f64(double 0x7ff4000000000000, double 1.0, metadata !"oeq", metadata !"fpexcept.strict") #0 + ret i1 %result +} + +; Compare with QNAN is folded for fcmp but is NOT folded for fcmps if the exception behavior mode is not 'ignore'. +define i1 @cmp_eq_nan_03() #0 { +; CHECK-LABEL: @cmp_eq_nan_03( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double 0x7FF8000000000000, double 1.000000e+00, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: ret i1 [[RESULT]] +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmp.f64(double 0x7ff8000000000000, double 1.0, metadata !"oeq", metadata !"fpexcept.strict") #0 + ret i1 %result +} + +define i1 @cmp_eq_nan_04() #0 { +; CHECK-LABEL: @cmp_eq_nan_04( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RESULT:%.*]] = call i1 @llvm.experimental.constrained.fcmps.f64(double 0x7FF8000000000000, double 1.000000e+00, metadata !"oeq", metadata !"fpexcept.strict") #[[ATTR0]] +; CHECK-NEXT: ret i1 [[RESULT]] +; +entry: + %result = call i1 @llvm.experimental.constrained.fcmps.f64(double 0x7ff8000000000000, double 1.0, metadata !"oeq", metadata !"fpexcept.strict") #0 + ret i1 %result +} + + attributes #0 = { strictfp } declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) @@ -435,4 +549,6 @@ declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, declare double @llvm.experimental.constrained.frem.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata) declare double @llvm.experimental.constrained.fmuladd.f64(double, double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmp.f64(double, double, metadata, metadata) +declare i1 @llvm.experimental.constrained.fcmps.f64(double, double, metadata, metadata) From 8a0e35f3a78d314d4661ff740a35e590f07b3297 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 22 Dec 2021 10:54:49 +0100 Subject: [PATCH 036/293] [MemoryLocation] Don't require nocapture in getForDest() As reames mentioned on related reviews, we don't need the nocapture requirement here. First of all, from an API perspective, this is not something that MemoryLocation::getForDest() should be checking in the first place, because it does not affect which memory this particular call can access; it's an orthogonal concern that should be handled by the caller if necessary. However, for both of the motivating users in DSE and InstCombine, we don't need the nocapture requirement, because the capture can either be purely local to the call (a pointer identity check that is irrelevant to us), be part of the return value (which we check is unused), or be written in the dest location, which we have determined to be dead. This allows us to remove the special handling for libcalls as well. Differential Revision: https://reviews.llvm.org/D116148 --- llvm/lib/Analysis/MemoryLocation.cpp | 16 ---------- .../DeadStoreElimination/trivial-dse-calls.ll | 31 ++++++++++++++----- .../InstCombine/trivial-dse-calls.ll | 31 ++++++++++++++----- 3 files changed, 48 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp index 636582827297..120f4cda1e35 100644 --- a/llvm/lib/Analysis/MemoryLocation.cpp +++ b/llvm/lib/Analysis/MemoryLocation.cpp @@ -134,19 +134,6 @@ MemoryLocation::getForDest(const CallBase *CB, const TargetLibraryInfo &TLI) { } } - LibFunc LF; - if (TLI.getLibFunc(*CB, LF) && TLI.has(LF)) { - switch (LF) { - case LibFunc_strncpy: - case LibFunc_strcpy: - case LibFunc_strcat: - case LibFunc_strncat: - return getForArgument(CB, 0, &TLI); - default: - break; - } - } - if (!CB->onlyAccessesArgMemory()) return None; @@ -159,9 +146,6 @@ MemoryLocation::getForDest(const CallBase *CB, const TargetLibraryInfo &TLI) { for (unsigned i = 0; i < CB->arg_size(); i++) { if (!CB->getArgOperand(i)->getType()->isPointerTy()) continue; - if (!CB->doesNotCapture(i)) - // capture would allow the address to be read back in an untracked manner - return None; if (CB->onlyReadsMemory(i)) continue; if (!UsedV) { diff --git a/llvm/test/Transforms/DeadStoreElimination/trivial-dse-calls.ll b/llvm/test/Transforms/DeadStoreElimination/trivial-dse-calls.ll index 13fd0e2ded0c..2b3838b00b3e 100644 --- a/llvm/test/Transforms/DeadStoreElimination/trivial-dse-calls.ll +++ b/llvm/test/Transforms/DeadStoreElimination/trivial-dse-calls.ll @@ -7,6 +7,7 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) declare void @unknown() declare void @f(i8*) declare void @f2(i8*, i8*) +declare i8* @f3(i8*, i8*) ; Basic case for DSEing a trivially dead writing call define void @test_dead() { @@ -205,22 +206,38 @@ define void @test_unreleated_read() { ret void } -; But that removing a capture of an unrelated pointer isn't okay. -define void @test_neg_unreleated_capture() { -; CHECK-LABEL: @test_neg_unreleated_capture( +; Removing a capture is also okay. The capture can only be in the return value +; (which is unused) or written into the dead out parameter. +define void @test_unrelated_capture() { +; CHECK-LABEL: @test_unrelated_capture( +; CHECK-NEXT: ret void +; + %a = alloca i32, align 4 + %a2 = alloca i32, align 4 + %bitcast = bitcast i32* %a to i8* + %bitcast2 = bitcast i32* %a2 to i8* + call i8* @f3(i8* nocapture writeonly %bitcast, i8* readonly %bitcast2) argmemonly nounwind willreturn + ret void +} + +; Cannot remove call, as %bitcast2 is captured via the return value. +define i8 @test_neg_unrelated_capture_used_via_return() { +; CHECK-LABEL: @test_neg_unrelated_capture_used_via_return( ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[A]] to i8* ; CHECK-NEXT: [[BITCAST2:%.*]] = bitcast i32* [[A2]] to i8* -; CHECK-NEXT: call void @f2(i8* nocapture writeonly [[BITCAST]], i8* readonly [[BITCAST2]]) #[[ATTR1]] -; CHECK-NEXT: ret void +; CHECK-NEXT: [[CAPTURE:%.*]] = call i8* @f3(i8* nocapture writeonly [[BITCAST]], i8* readonly [[BITCAST2]]) #[[ATTR1]] +; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[CAPTURE]], align 1 +; CHECK-NEXT: ret i8 [[V]] ; %a = alloca i32, align 4 %a2 = alloca i32, align 4 %bitcast = bitcast i32* %a to i8* %bitcast2 = bitcast i32* %a2 to i8* - call void @f2(i8* nocapture writeonly %bitcast, i8* readonly %bitcast2) argmemonly nounwind willreturn - ret void + %capture = call i8* @f3(i8* nocapture writeonly %bitcast, i8* readonly %bitcast2) argmemonly nounwind willreturn + %v = load i8, i8* %capture + ret i8 %v } ; As long as the result is unused, we can even remove reads of the alloca diff --git a/llvm/test/Transforms/InstCombine/trivial-dse-calls.ll b/llvm/test/Transforms/InstCombine/trivial-dse-calls.ll index f2c009ac3796..6e2433336d1a 100644 --- a/llvm/test/Transforms/InstCombine/trivial-dse-calls.ll +++ b/llvm/test/Transforms/InstCombine/trivial-dse-calls.ll @@ -7,6 +7,7 @@ declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) declare void @unknown() declare void @f(i8*) declare void @f2(i8*, i8*) +declare i8* @f3(i8*, i8*) ; Basic case for DSEing a trivially dead writing call define void @test_dead() { @@ -192,22 +193,38 @@ define void @test_unreleated_read() { ret void } -; But that removing a capture of an unrelated pointer isn't okay. -define void @test_neg_unreleated_capture() { -; CHECK-LABEL: @test_neg_unreleated_capture( +; Removing a capture is also okay. The capture can only be in the return value +; (which is unused) or written into the dead out parameter. +define void @test_unrelated_capture() { +; CHECK-LABEL: @test_unrelated_capture( +; CHECK-NEXT: ret void +; + %a = alloca i32, align 4 + %a2 = alloca i32, align 4 + %bitcast = bitcast i32* %a to i8* + %bitcast2 = bitcast i32* %a2 to i8* + call i8* @f3(i8* nocapture writeonly %bitcast, i8* readonly %bitcast2) argmemonly nounwind willreturn + ret void +} + +; Cannot remove call, as %bitcast2 is captured via the return value. +define i8 @test_neg_unrelated_capture_used_via_return() { +; CHECK-LABEL: @test_neg_unrelated_capture_used_via_return( ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[A]] to i8* ; CHECK-NEXT: [[BITCAST2:%.*]] = bitcast i32* [[A2]] to i8* -; CHECK-NEXT: call void @f2(i8* nocapture nonnull writeonly [[BITCAST]], i8* nonnull readonly [[BITCAST2]]) #[[ATTR1]] -; CHECK-NEXT: ret void +; CHECK-NEXT: [[CAPTURE:%.*]] = call i8* @f3(i8* nocapture nonnull writeonly [[BITCAST]], i8* nonnull readonly [[BITCAST2]]) #[[ATTR1]] +; CHECK-NEXT: [[V:%.*]] = load i8, i8* [[CAPTURE]], align 1 +; CHECK-NEXT: ret i8 [[V]] ; %a = alloca i32, align 4 %a2 = alloca i32, align 4 %bitcast = bitcast i32* %a to i8* %bitcast2 = bitcast i32* %a2 to i8* - call void @f2(i8* nocapture writeonly %bitcast, i8* readonly %bitcast2) argmemonly nounwind willreturn - ret void + %capture = call i8* @f3(i8* nocapture writeonly %bitcast, i8* readonly %bitcast2) argmemonly nounwind willreturn + %v = load i8, i8* %capture + ret i8 %v } ; As long as the result is unused, we can even remove reads of the alloca From 35c00803338f17d2aec0899c72b824697a641c18 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Wed, 22 Dec 2021 12:23:57 +0100 Subject: [PATCH 037/293] [mlir][NFC] Fix typo in VectorOps.cpp --- mlir/lib/Dialect/Vector/VectorOps.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index 27a0fc6d9dcb..7b6534ff1faa 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -1125,11 +1125,11 @@ static Value foldExtractFromBroadcast(ExtractOp extractOp) { auto getRank = [](Type type) { return type.isa() ? type.cast().getRank() : 0; }; - unsigned broadcasrSrcRank = getRank(source.getType()); + unsigned broadcastSrcRank = getRank(source.getType()); unsigned extractResultRank = getRank(extractOp.getType()); - if (extractResultRank < broadcasrSrcRank) { + if (extractResultRank < broadcastSrcRank) { auto extractPos = extractVector(extractOp.position()); - unsigned rankDiff = broadcasrSrcRank - extractResultRank; + unsigned rankDiff = broadcastSrcRank - extractResultRank; extractPos.erase( extractPos.begin(), std::next(extractPos.begin(), extractPos.size() - rankDiff)); @@ -1236,12 +1236,12 @@ class ExtractOpFromBroadcast final : public OpRewritePattern { auto getRank = [](Type type) { return type.isa() ? type.cast().getRank() : 0; }; - unsigned broadcasrSrcRank = getRank(source.getType()); + unsigned broadcastSrcRank = getRank(source.getType()); unsigned extractResultRank = getRank(extractOp.getType()); // We only consider the case where the rank of the source is smaller than // the rank of the extract dst. The other cases are handled in the folding // patterns. - if (extractResultRank <= broadcasrSrcRank) + if (extractResultRank <= broadcastSrcRank) return failure(); rewriter.replaceOpWithNewOp( extractOp, extractOp.getType(), source); From 0af628152a0547ad02856a880830ec33aa79e21c Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 22 Dec 2021 12:21:54 +0100 Subject: [PATCH 038/293] [CodeGen] Make lifetime marker test more robust (NFC) Mark the first function optnone as well, to make sure that the test is independent of optimization. --- clang/test/CodeGen/aggregate-assign-call.c | 31 +++++++++++++++------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/clang/test/CodeGen/aggregate-assign-call.c b/clang/test/CodeGen/aggregate-assign-call.c index 5aeaca19379a..d25fc39cfe75 100644 --- a/clang/test/CodeGen/aggregate-assign-call.c +++ b/clang/test/CodeGen/aggregate-assign-call.c @@ -4,7 +4,8 @@ // Ensure that we place appropriate lifetime markers around indirectly returned // temporaries, and that the lifetime.ends appear in a timely manner. // -// -O1 is used so lifetime markers actually get emitted. +// -O1 is used so lifetime markers actually get emitted and optnone is added +// to avoid elimination of lifetime markers by optimizations. struct S { int ns[40]; @@ -13,25 +14,39 @@ struct S { struct S foo(void); // CHECK-LABEL: define dso_local void @bar +__attribute__((optnone)) struct S bar() { // O0-NOT: @llvm.lifetime.start // O0-NOT: @llvm.lifetime.end struct S r; - // O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* nonnull %[[TMP1:[^)]+]]) + // O1: %[[TMP1_ALLOCA:[^ ]+]] = alloca %struct.S + // O1: %[[TMP2_ALLOCA:[^ ]+]] = alloca %struct.S + // O1: %[[TMP3_ALLOCA:[^ ]+]] = alloca %struct.S + + // O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP1_ALLOCA]] to i8* + // O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* %[[P]]) // O1: call void @foo r = foo(); - // O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* nonnull %[[TMP1]]) + // O1: memcpy + // O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP1_ALLOCA]] to i8* + // O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* %[[P]]) - // O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* nonnull %[[TMP2:[^)]+]]) + // O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP2_ALLOCA]] to i8* + // O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* %[[P]]) // O1: call void @foo r = foo(); - // O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* nonnull %[[TMP2]]) + // O1: memcpy + // O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP2_ALLOCA]] to i8* + // O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* %[[P]]) - // O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* nonnull %[[TMP3:[^)]+]]) + // O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP3_ALLOCA]] to i8* + // O1: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* %[[P]]) // O1: call void @foo r = foo(); - // O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* nonnull %[[TMP3]]) + // O1: memcpy + // O1: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP3_ALLOCA]] to i8* + // O1: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* %[[P]]) return r; } @@ -39,8 +54,6 @@ struct S bar() { struct S foo_int(int); // Be sure that we're placing the lifetime.end so that all paths go through it. -// Since this function turns out to be large-ish, optnone to hopefully keep it -// stable. // CHECK-LABEL: define dso_local void @baz __attribute__((optnone)) struct S baz(int i, volatile int *j) { From 6e9be9f7c124a181118e293e8f57ce7559d9478e Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Wed, 22 Dec 2021 12:29:53 +0100 Subject: [PATCH 039/293] [mlir][NFC] Use .empty() instead of .size() --- mlir/lib/Dialect/Affine/Utils/Utils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp index 13bcb9fedb71..480c09aa97a1 100644 --- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp @@ -591,7 +591,7 @@ static bool hasNoInterveningEffect(Operation *start, T memOp) { SmallPtrSet done; // Traverse the CFG until hitting `to`. - while (todoBlocks.size()) { + while (!todoBlocks.empty()) { Block *blk = todoBlocks.pop_back_val(); if (done.count(blk)) continue; From 5fc05a0a81ed06e936196b68ba99ffddcbffd14d Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Wed, 22 Dec 2021 18:52:36 +0700 Subject: [PATCH 040/293] [unittest][DebugInfo/DWARF] Check that dwarfgen::Generator is created If Generator::create() returns an error, tests should fail gracefully and report the cause, for example: [ RUN ] DebugLineBasicFixture.ParserSkipsCorrectly .../llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp:47: Failure Value of: llvm::detail::TakeExpected(ExpectedGenerator) Expected: succeeded Actual: failed (no asm backend for target nvptx64-nvidia-cuda) Differential Revision: https://reviews.llvm.org/D116106 --- .../DebugInfo/DWARF/DWARFDebugLineTest.cpp | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp index a8502df2b34b..71b7361f4fd5 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp @@ -36,16 +36,21 @@ struct CommonFixture { EXPECT_FALSE(Unrecoverable); } - bool setupGenerator(uint16_t Version = 4, uint8_t AddrSize = 8) { + // Note: ASSERT_THAT_EXPECTED cannot be used in a non-void function, so + // setupGenerator() is split into two. + void setupGeneratorImpl(uint16_t Version, uint8_t AddrSize) { AddressSize = AddrSize; - Triple T = - getDefaultTargetTripleForAddrSize(AddressSize == 0 ? 8 : AddressSize); + Triple T = getDefaultTargetTripleForAddrSize(AddressSize ? AddressSize : 8); if (!isConfigurationSupported(T)) - return false; + return; auto ExpectedGenerator = Generator::create(T, Version); - if (ExpectedGenerator) - Gen.reset(ExpectedGenerator->release()); - return true; + ASSERT_THAT_EXPECTED(ExpectedGenerator, Succeeded()); + Gen = std::move(*ExpectedGenerator); + } + + bool setupGenerator(uint16_t Version = 4, uint8_t AddrSize = 8) { + setupGeneratorImpl(Version, AddrSize); + return Gen != nullptr; } void generate() { @@ -60,8 +65,7 @@ struct CommonFixture { } std::unique_ptr createContext() { - if (!Gen) - return nullptr; + assert(Gen != nullptr && "Generator is not set up"); StringRef FileBytes = Gen->generate(); MemoryBufferRef FileBuffer(FileBytes, "dwarf"); auto Obj = object::ObjectFile::createObjectFile(FileBuffer); From 2e11e8885c68f7ec6e1f2699a9461708d29ec4df Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Wed, 22 Dec 2021 18:52:43 +0700 Subject: [PATCH 041/293] [unittest][DebugInfo/DWARF] Do not create dwarfgen::Generator if MCAsmBackend is missing dwarfgen::Generator cannot be created if there is no asm backend for a target. For example, if the default target triple is nvptx-nvidia-cuda, some tests fail even after D98400, which added checks for most cases. This patch extends the approach to the remaining cases. Differential Revision: https://reviews.llvm.org/D116107 --- .../DebugInfo/DWARF/DWARFDebugInfoTest.cpp | 24 +++++++++---------- .../DWARF/DWARFDieManualExtractTest.cpp | 2 +- llvm/unittests/DebugInfo/DWARF/DwarfUtils.cpp | 6 ----- llvm/unittests/DebugInfo/DWARF/DwarfUtils.h | 1 - 4 files changed, 13 insertions(+), 20 deletions(-) diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp index 68d06003a5e5..44f438e91f99 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp @@ -43,7 +43,7 @@ namespace { template void TestAllForms() { Triple Triple = getDefaultTargetTripleForAddrSize(sizeof(AddrType)); - if (!isObjectEmissionSupported(Triple)) + if (!isConfigurationSupported(Triple)) return; // Test that we can decode all DW_FORM values correctly. @@ -477,7 +477,7 @@ TEST(DWARFDebugInfo, TestDWARF32Version5Addr8AllForms) { template void TestChildren() { Triple Triple = getDefaultTargetTripleForAddrSize(sizeof(AddrType)); - if (!isObjectEmissionSupported(Triple)) + if (!isConfigurationSupported(Triple)) return; // Test that we can decode DW_FORM_ref_addr values correctly in DWARF 2 with @@ -619,7 +619,7 @@ TEST(DWARFDebugInfo, TestDWARF32Version4Addr8Children) { template void TestReferences() { Triple Triple = getDefaultTargetTripleForAddrSize(sizeof(AddrType)); - if (!isObjectEmissionSupported(Triple)) + if (!isConfigurationSupported(Triple)) return; // Test that we can decode DW_FORM_refXXX values correctly in DWARF. @@ -881,7 +881,7 @@ TEST(DWARFDebugInfo, TestDWARF32Version4Addr8References) { template void TestAddresses() { Triple Triple = getDefaultTargetTripleForAddrSize(sizeof(AddrType)); - if (!isObjectEmissionSupported(Triple)) + if (!isConfigurationSupported(Triple)) return; // Test the DWARF APIs related to accessing the DW_AT_low_pc and @@ -1069,7 +1069,7 @@ TEST(DWARFDebugInfo, DISABLED_TestStringOffsets) { TEST(DWARFDebugInfo, TestStringOffsets) { #endif Triple Triple = getNormalizedDefaultTargetTriple(); - if (!isObjectEmissionSupported(Triple)) + if (!isConfigurationSupported(Triple)) return; const char *String1 = "Hello"; @@ -1133,7 +1133,7 @@ TEST(DWARFDebugInfo, TestStringOffsets) { TEST(DWARFDebugInfo, TestEmptyStringOffsets) { Triple Triple = getNormalizedDefaultTargetTriple(); - if (!isObjectEmissionSupported(Triple)) + if (!isConfigurationSupported(Triple)) return; const char *String1 = "Hello"; @@ -1162,7 +1162,7 @@ TEST(DWARFDebugInfo, TestEmptyStringOffsets) { TEST(DWARFDebugInfo, TestRelations) { Triple Triple = getNormalizedDefaultTargetTriple(); - if (!isObjectEmissionSupported(Triple)) + if (!isConfigurationSupported(Triple)) return; // Test the DWARF APIs related to accessing the DW_AT_low_pc and @@ -1349,7 +1349,7 @@ TEST(DWARFDebugInfo, TestDWARFDie) { TEST(DWARFDebugInfo, TestChildIterators) { Triple Triple = getNormalizedDefaultTargetTriple(); - if (!isObjectEmissionSupported(Triple)) + if (!isConfigurationSupported(Triple)) return; // Test the DWARF APIs related to iterating across the children of a DIE using @@ -1458,7 +1458,7 @@ TEST(DWARFDebugInfo, TestEmptyChildren) { TEST(DWARFDebugInfo, TestAttributeIterators) { Triple Triple = getNormalizedDefaultTargetTriple(); - if (!isObjectEmissionSupported(Triple)) + if (!isConfigurationSupported(Triple)) return; // Test the DWARF APIs related to iterating across all attribute values in a @@ -1520,7 +1520,7 @@ TEST(DWARFDebugInfo, TestAttributeIterators) { TEST(DWARFDebugInfo, TestFindRecurse) { Triple Triple = getNormalizedDefaultTargetTriple(); - if (!isObjectEmissionSupported(Triple)) + if (!isConfigurationSupported(Triple)) return; uint16_t Version = 4; @@ -1734,7 +1734,7 @@ TEST(DWARFDebugInfo, TestDwarfToFunctions) { TEST(DWARFDebugInfo, TestFindAttrs) { Triple Triple = getNormalizedDefaultTargetTriple(); - if (!isObjectEmissionSupported(Triple)) + if (!isConfigurationSupported(Triple)) return; // Test the DWARFDie::find() and DWARFDie::findRecursively() that take an @@ -1797,7 +1797,7 @@ TEST(DWARFDebugInfo, TestFindAttrs) { TEST(DWARFDebugInfo, TestImplicitConstAbbrevs) { Triple Triple = getNormalizedDefaultTargetTriple(); - if (!isObjectEmissionSupported(Triple)) + if (!isConfigurationSupported(Triple)) return; uint16_t Version = 5; diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDieManualExtractTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDieManualExtractTest.cpp index 2333fc619edb..fd5f138f7733 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDieManualExtractTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDieManualExtractTest.cpp @@ -24,7 +24,7 @@ TEST(DWARFDie, manualExtractDump) { typedef uint32_t AddrType; uint16_t Version = 4; Triple Triple = getDefaultTargetTripleForAddrSize(sizeof(AddrType)); - if (!isObjectEmissionSupported(Triple)) + if (!isConfigurationSupported(Triple)) return; auto ExpectedDG = dwarfgen::Generator::create(Triple, Version); diff --git a/llvm/unittests/DebugInfo/DWARF/DwarfUtils.cpp b/llvm/unittests/DebugInfo/DWARF/DwarfUtils.cpp index e337f34d1972..9a4a794b4451 100644 --- a/llvm/unittests/DebugInfo/DWARF/DwarfUtils.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DwarfUtils.cpp @@ -48,12 +48,6 @@ Triple llvm::dwarf::utils::getDefaultTargetTripleForAddrSize(uint8_t AddrSize) { } bool llvm::dwarf::utils::isConfigurationSupported(Triple &T) { - initLLVMIfNeeded(); - std::string Err; - return TargetRegistry::lookupTarget(T.getTriple(), Err); -} - -bool llvm::dwarf::utils::isObjectEmissionSupported(Triple &T) { initLLVMIfNeeded(); std::string Err; const Target *TheTarget = TargetRegistry::lookupTarget(T.getTriple(), Err); diff --git a/llvm/unittests/DebugInfo/DWARF/DwarfUtils.h b/llvm/unittests/DebugInfo/DWARF/DwarfUtils.h index 00eaef25cfba..036071e0b567 100644 --- a/llvm/unittests/DebugInfo/DWARF/DwarfUtils.h +++ b/llvm/unittests/DebugInfo/DWARF/DwarfUtils.h @@ -21,7 +21,6 @@ namespace utils { Triple getDefaultTargetTripleForAddrSize(uint8_t AddrSize); Triple getNormalizedDefaultTargetTriple(); bool isConfigurationSupported(Triple &T); -bool isObjectEmissionSupported(Triple &T); } // end namespace utils } // end namespace dwarf From f5907ea1c0debddac72f5e4d15fc7fed30e36a72 Mon Sep 17 00:00:00 2001 From: Igor Kudrin Date: Wed, 22 Dec 2021 18:52:49 +0700 Subject: [PATCH 042/293] [unittest][DebugInfo/DWARF] Do not report skipped tests as passed This is similar to what we have already done to some other tests. See D102643, D102710, D102754. Differential Revision: https://reviews.llvm.org/D116108 --- .../DebugInfo/DWARF/DWARFDebugInfoTest.cpp | 26 ++++---- .../DebugInfo/DWARF/DWARFDebugLineTest.cpp | 66 +++++++++---------- .../DWARF/DWARFDieManualExtractTest.cpp | 2 +- .../DWARFExpressionCompactPrinterTest.cpp | 2 +- .../DWARF/DWARFExpressionCopyBytesTest.cpp | 2 +- 5 files changed, 49 insertions(+), 49 deletions(-) diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp index 44f438e91f99..ddd057aa1121 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp @@ -44,7 +44,7 @@ template void TestAllForms() { Triple Triple = getDefaultTargetTripleForAddrSize(sizeof(AddrType)); if (!isConfigurationSupported(Triple)) - return; + GTEST_SKIP(); // Test that we can decode all DW_FORM values correctly. const AddrType AddrValue = (AddrType)0x0123456789abcdefULL; @@ -478,7 +478,7 @@ TEST(DWARFDebugInfo, TestDWARF32Version5Addr8AllForms) { template void TestChildren() { Triple Triple = getDefaultTargetTripleForAddrSize(sizeof(AddrType)); if (!isConfigurationSupported(Triple)) - return; + GTEST_SKIP(); // Test that we can decode DW_FORM_ref_addr values correctly in DWARF 2 with // 4 byte addresses. DW_FORM_ref_addr values should be 4 bytes when using @@ -620,7 +620,7 @@ TEST(DWARFDebugInfo, TestDWARF32Version4Addr8Children) { template void TestReferences() { Triple Triple = getDefaultTargetTripleForAddrSize(sizeof(AddrType)); if (!isConfigurationSupported(Triple)) - return; + GTEST_SKIP(); // Test that we can decode DW_FORM_refXXX values correctly in DWARF. auto ExpectedDG = dwarfgen::Generator::create(Triple, Version); @@ -882,7 +882,7 @@ TEST(DWARFDebugInfo, TestDWARF32Version4Addr8References) { template void TestAddresses() { Triple Triple = getDefaultTargetTripleForAddrSize(sizeof(AddrType)); if (!isConfigurationSupported(Triple)) - return; + GTEST_SKIP(); // Test the DWARF APIs related to accessing the DW_AT_low_pc and // DW_AT_high_pc. @@ -1070,7 +1070,7 @@ TEST(DWARFDebugInfo, TestStringOffsets) { #endif Triple Triple = getNormalizedDefaultTargetTriple(); if (!isConfigurationSupported(Triple)) - return; + GTEST_SKIP(); const char *String1 = "Hello"; const char *String2 = "World"; @@ -1134,7 +1134,7 @@ TEST(DWARFDebugInfo, TestStringOffsets) { TEST(DWARFDebugInfo, TestEmptyStringOffsets) { Triple Triple = getNormalizedDefaultTargetTriple(); if (!isConfigurationSupported(Triple)) - return; + GTEST_SKIP(); const char *String1 = "Hello"; @@ -1163,7 +1163,7 @@ TEST(DWARFDebugInfo, TestEmptyStringOffsets) { TEST(DWARFDebugInfo, TestRelations) { Triple Triple = getNormalizedDefaultTargetTriple(); if (!isConfigurationSupported(Triple)) - return; + GTEST_SKIP(); // Test the DWARF APIs related to accessing the DW_AT_low_pc and // DW_AT_high_pc. @@ -1350,7 +1350,7 @@ TEST(DWARFDebugInfo, TestDWARFDie) { TEST(DWARFDebugInfo, TestChildIterators) { Triple Triple = getNormalizedDefaultTargetTriple(); if (!isConfigurationSupported(Triple)) - return; + GTEST_SKIP(); // Test the DWARF APIs related to iterating across the children of a DIE using // the DWARFDie::iterator class. @@ -1459,7 +1459,7 @@ TEST(DWARFDebugInfo, TestEmptyChildren) { TEST(DWARFDebugInfo, TestAttributeIterators) { Triple Triple = getNormalizedDefaultTargetTriple(); if (!isConfigurationSupported(Triple)) - return; + GTEST_SKIP(); // Test the DWARF APIs related to iterating across all attribute values in a // a DWARFDie. @@ -1521,7 +1521,7 @@ TEST(DWARFDebugInfo, TestAttributeIterators) { TEST(DWARFDebugInfo, TestFindRecurse) { Triple Triple = getNormalizedDefaultTargetTriple(); if (!isConfigurationSupported(Triple)) - return; + GTEST_SKIP(); uint16_t Version = 4; auto ExpectedDG = dwarfgen::Generator::create(Triple, Version); @@ -1735,7 +1735,7 @@ TEST(DWARFDebugInfo, TestDwarfToFunctions) { TEST(DWARFDebugInfo, TestFindAttrs) { Triple Triple = getNormalizedDefaultTargetTriple(); if (!isConfigurationSupported(Triple)) - return; + GTEST_SKIP(); // Test the DWARFDie::find() and DWARFDie::findRecursively() that take an // ArrayRef value to make sure they work correctly. @@ -1798,7 +1798,7 @@ TEST(DWARFDebugInfo, TestFindAttrs) { TEST(DWARFDebugInfo, TestImplicitConstAbbrevs) { Triple Triple = getNormalizedDefaultTargetTriple(); if (!isConfigurationSupported(Triple)) - return; + GTEST_SKIP(); uint16_t Version = 5; auto ExpectedDG = dwarfgen::Generator::create(Triple, Version); @@ -1923,7 +1923,7 @@ TEST(DWARFDebugInfo, TestImplicitConstAbbrevs) { TEST(DWARFDebugInfo, TestErrorReporting) { Triple Triple("x86_64-pc-linux"); if (!isConfigurationSupported(Triple)) - return; + GTEST_SKIP(); auto ExpectedDG = dwarfgen::Generator::create(Triple, 4 /*DwarfVersion*/); ASSERT_THAT_EXPECTED(ExpectedDG, Succeeded()); diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp index 71b7361f4fd5..731aa4f0f3d0 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp @@ -187,7 +187,7 @@ TEST_F(DebugLineBasicFixture, DISABLED_GetOrParseLineTableAtInvalidOffset) { TEST_F(DebugLineBasicFixture, GetOrParseLineTableAtInvalidOffset) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); generate(); EXPECT_THAT_EXPECTED( @@ -214,7 +214,7 @@ TEST_F(DebugLineBasicFixture, TEST_F(DebugLineBasicFixture, GetOrParseLineTableAtInvalidOffsetAfterData) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); LT.setCustomPrologue({{0, LineTable::Byte}}); @@ -239,7 +239,7 @@ TEST_P(DebugLineParameterisedFixture, DISABLED_PrologueGetLength) { TEST_P(DebugLineParameterisedFixture, PrologueGetLength) { #endif if (!setupGenerator(Version)) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(Format); DWARFDebugLine::Prologue Prologue = LT.createBasicPrologue(); LT.setPrologue(Prologue); @@ -266,7 +266,7 @@ TEST_P(DebugLineParameterisedFixture, DISABLED_GetOrParseLineTableValidTable) { TEST_P(DebugLineParameterisedFixture, GetOrParseLineTableValidTable) { #endif if (!setupGenerator(Version)) - return; + GTEST_SKIP(); SCOPED_TRACE("Checking Version " + std::to_string(Version) + ", Format " + (Format == DWARF64 ? "DWARF64" : "DWARF32")); @@ -336,7 +336,7 @@ TEST_F(DebugLineBasicFixture, DISABLED_ErrorForReservedLength) { TEST_F(DebugLineBasicFixture, ErrorForReservedLength) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); LT.setCustomPrologue({{0xfffffff0, LineTable::Long}}); @@ -364,7 +364,7 @@ TEST_P(DebugLineUnsupportedVersionFixture, TEST_P(DebugLineUnsupportedVersionFixture, ErrorForUnsupportedVersion) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); LT.setCustomPrologue( @@ -390,7 +390,7 @@ TEST_F(DebugLineBasicFixture, DISABLED_ErrorForInvalidV5IncludeDirTable) { TEST_F(DebugLineBasicFixture, ErrorForInvalidV5IncludeDirTable) { #endif if (!setupGenerator(5)) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); LT.setCustomPrologue({ @@ -435,7 +435,7 @@ TEST_P(DebugLineParameterisedFixture, DISABLED_ErrorForTooLargePrologueLength) { TEST_P(DebugLineParameterisedFixture, ErrorForTooLargePrologueLength) { #endif if (!setupGenerator(Version)) - return; + GTEST_SKIP(); SCOPED_TRACE("Checking Version " + std::to_string(Version) + ", Format " + (Format == DWARF64 ? "DWARF64" : "DWARF32")); @@ -475,7 +475,7 @@ TEST_P(DebugLineParameterisedFixture, DISABLED_ErrorForTooShortPrologueLength) { TEST_P(DebugLineParameterisedFixture, ErrorForTooShortPrologueLength) { #endif if (!setupGenerator(Version)) - return; + GTEST_SKIP(); SCOPED_TRACE("Checking Version " + std::to_string(Version) + ", Format " + (Format == DWARF64 ? "DWARF64" : "DWARF32")); @@ -534,7 +534,7 @@ TEST_F(DebugLineBasicFixture, TEST_F(DebugLineBasicFixture, ErrorForExtendedOpcodeLengthSmallerThanExpected) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); LT.addByte(0xaa); @@ -568,7 +568,7 @@ TEST_F(DebugLineBasicFixture, TEST_F(DebugLineBasicFixture, ErrorForExtendedOpcodeLengthLargerThanExpected) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); LT.addByte(0xaa); @@ -601,7 +601,7 @@ TEST_F(DebugLineBasicFixture, DISABLED_ErrorForUnitLengthTooLarge) { TEST_F(DebugLineBasicFixture, ErrorForUnitLengthTooLarge) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable &Padding = Gen->addLineTable(); // Add some padding to show that a non-zero offset is handled correctly. @@ -634,7 +634,7 @@ TEST_F(DebugLineBasicFixture, DISABLED_ErrorForMismatchedAddressSize) { TEST_F(DebugLineBasicFixture, ErrorForMismatchedAddressSize) { #endif if (!setupGenerator(4, 8)) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); // The line data extractor expects size 8 (Quad) addresses. @@ -669,7 +669,7 @@ TEST_F(DebugLineBasicFixture, ErrorForMismatchedAddressSizeUnsetInitialAddress) { #endif if (!setupGenerator(4, 0)) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); uint64_t Addr1 = 0x11223344; @@ -703,7 +703,7 @@ TEST_F(DebugLineBasicFixture, // Use DWARF v4, and 0 for data extractor address size so that the address // size is derived from the opcode length. if (!setupGenerator(4, 0)) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); // 4 == length of the extended opcode, i.e. 1 for the opcode itself and 3 for @@ -739,7 +739,7 @@ TEST_F(DebugLineBasicFixture, ErrorForAddressSizeGreaterThanByteSize) { // Use DWARF v4, and 0 for data extractor address size so that the address // size is derived from the opcode length. if (!setupGenerator(4, 0)) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); // Specifically use an operand size that has a trailing byte of a supported @@ -768,7 +768,7 @@ TEST_F(DebugLineBasicFixture, ErrorForUnsupportedAddressSizeDefinedInHeader) { // Use 0 for data extractor address size so that it does not clash with the // header address size. if (!setupGenerator(5, 0)) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); // AddressSize + 1 == length of the extended opcode, i.e. 1 for the opcode @@ -807,7 +807,7 @@ TEST_F(DebugLineBasicFixture, DISABLED_CallbackUsedForUnterminatedSequence) { TEST_F(DebugLineBasicFixture, CallbackUsedForUnterminatedSequence) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); LT.addExtendedOpcode(9, DW_LNE_set_address, @@ -952,7 +952,7 @@ struct AdjustAddressFixtureBase : public CommonFixture { void runTest(bool CheckAdvancePC, Twine MsgSuffix) { if (!setupGenerator(Version)) - return; + GTEST_SKIP(); setupTables(/*AddAdvancePCFirstTable=*/CheckAdvancePC); @@ -1134,7 +1134,7 @@ TEST_F(DebugLineBasicFixture, DISABLED_ParserParsesCorrectly) { TEST_F(DebugLineBasicFixture, ParserParsesCorrectly) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); DWARFDebugLine::SectionParser Parser = setupParser(); @@ -1165,7 +1165,7 @@ TEST_F(DebugLineBasicFixture, DISABLED_ParserSkipsCorrectly) { TEST_F(DebugLineBasicFixture, ParserSkipsCorrectly) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); DWARFDebugLine::SectionParser Parser = setupParser(); @@ -1190,7 +1190,7 @@ TEST_F(DebugLineBasicFixture, DISABLED_ParserAlwaysDoneForEmptySection) { TEST_F(DebugLineBasicFixture, ParserAlwaysDoneForEmptySection) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); generate(); DWARFDebugLine::SectionParser Parser(LineData, *Context, Units); @@ -1205,7 +1205,7 @@ TEST_F(DebugLineBasicFixture, TEST_F(DebugLineBasicFixture, ParserMarkedAsDoneForBadLengthWhenParsing) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); LT.setCustomPrologue({{0xfffffff0, LineTable::Long}}); @@ -1233,7 +1233,7 @@ TEST_F(DebugLineBasicFixture, TEST_F(DebugLineBasicFixture, ParserMarkedAsDoneForBadLengthWhenSkipping) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); LT.setCustomPrologue({{0xfffffff0, LineTable::Long}}); @@ -1261,7 +1261,7 @@ TEST_F(DebugLineBasicFixture, TEST_F(DebugLineBasicFixture, ParserReportsFirstErrorInEachTableWhenParsing) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(DWARF32); LT.setCustomPrologue({{2, LineTable::Long}, {0, LineTable::Half}}); @@ -1292,7 +1292,7 @@ TEST_F(DebugLineBasicFixture, TEST_F(DebugLineBasicFixture, ParserReportsNonPrologueProblemsWhenParsing) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(DWARF32); LT.addExtendedOpcode(0x42, DW_LNE_end_sequence, {}); @@ -1330,7 +1330,7 @@ TEST_F(DebugLineBasicFixture, ParserReportsPrologueErrorsInEachTableWhenSkipping) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(DWARF32); LT.setCustomPrologue({{2, LineTable::Long}, {0, LineTable::Half}}); @@ -1361,7 +1361,7 @@ TEST_F(DebugLineBasicFixture, TEST_F(DebugLineBasicFixture, ParserIgnoresNonPrologueErrorsWhenSkipping) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(DWARF32); LT.addExtendedOpcode(42, DW_LNE_end_sequence, {}); @@ -1381,7 +1381,7 @@ TEST_F(DebugLineBasicFixture, DISABLED_VerboseOutput) { TEST_F(DebugLineBasicFixture, VerboseOutput) { #endif if (!setupGenerator(5)) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); LT.addByte(0); // Extended opcode with zero length. @@ -1541,7 +1541,7 @@ TEST_P(TruncatedPrologueFixture, DISABLED_ErrorForTruncatedPrologue) { TEST_P(TruncatedPrologueFixture, ErrorForTruncatedPrologue) { #endif if (!setupGenerator(Version)) - return; + GTEST_SKIP(); LineTable &Padding = Gen->addLineTable(); // Add some padding to show that a non-zero offset is handled correctly. @@ -1723,7 +1723,7 @@ TEST_P(TruncatedExtendedOpcodeFixture, TEST_P(TruncatedExtendedOpcodeFixture, ErrorForTruncatedExtendedOpcode) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = setupTable(); LT.addExtendedOpcode(OpcodeLength, Opcode, Operands); runTest(0); @@ -1807,7 +1807,7 @@ TEST_P(TruncatedStandardOpcodeFixture, TEST_P(TruncatedStandardOpcodeFixture, ErrorForTruncatedStandardOpcode) { #endif if (!setupGenerator()) - return; + GTEST_SKIP(); LineTable < = setupTable(); LT.addStandardOpcode(Opcode, Operands); runTest(Opcode); @@ -1867,7 +1867,7 @@ TEST_F(DebugLineBasicFixture, DISABLED_PrintPathsProperly) { TEST_F(DebugLineBasicFixture, PrintPathsProperly) { #endif if (!setupGenerator(5)) - return; + GTEST_SKIP(); LineTable < = Gen->addLineTable(); DWARFDebugLine::Prologue P = LT.createBasicPrologue(); diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDieManualExtractTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDieManualExtractTest.cpp index fd5f138f7733..6e2ed2f62bcf 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFDieManualExtractTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFDieManualExtractTest.cpp @@ -25,7 +25,7 @@ TEST(DWARFDie, manualExtractDump) { uint16_t Version = 4; Triple Triple = getDefaultTargetTripleForAddrSize(sizeof(AddrType)); if (!isConfigurationSupported(Triple)) - return; + GTEST_SKIP(); auto ExpectedDG = dwarfgen::Generator::create(Triple, Version); ASSERT_THAT_EXPECTED(ExpectedDG, Succeeded()); diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp index ae7301b84314..68738e3a3044 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp @@ -51,7 +51,7 @@ void DWARFExpressionCompactPrinterTest::TestExprPrinter( ArrayRef ExprData, StringRef Expected) { // If we didn't build ARM, do not run the test. if (!MRI) - return; + GTEST_SKIP(); // Print the expression, passing in the subprogram DIE, and check that the // result is as expected. diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCopyBytesTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCopyBytesTest.cpp index d94341554719..5a6ce2ffbbe8 100644 --- a/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCopyBytesTest.cpp +++ b/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCopyBytesTest.cpp @@ -185,7 +185,7 @@ void DWARFExpressionCopyBytesTest::readAndCheckObjFile( void DWARFExpressionCopyBytesTest::testExpr(ArrayRef ExprData) { // If we didn't build x86, do not run the test. if (!MRI) - return; + GTEST_SKIP(); DataExtractor DE(ExprData, true, 8); DWARFExpression Expr(DE, 8); From c83ef407dfa1b6b96c3068512edb479e2b33fc7f Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 22 Dec 2021 11:56:40 +0000 Subject: [PATCH 043/293] [LV] Adjust comment to say the induction is created in header. Follow-up suggested post-commit for 1a54889f48fa. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index ec95cd62ed26..5dafc561baf4 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8448,7 +8448,7 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) { // Introduce the early-exit compare IV <= BTC to form header block mask. // This is used instead of IV < TC because TC may wrap, unlike BTC. - // Start by constructing the desired canonical IV. + // Start by constructing the desired canonical IV in the header block. VPValue *IV = nullptr; if (Legal->getPrimaryInduction()) IV = Plan->getOrAddVPValue(Legal->getPrimaryInduction()); From ede7c2438f39e8901a6479f180255ad0f5f36d32 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 22 Dec 2021 12:44:24 +0000 Subject: [PATCH 044/293] [VPlan] Create header & latch blocks for skeleton up front (NFC). By creating the header and latch blocks up front and adding blocks and recipes in between those 2 blocks we ensure that the entry and exits of the plan remain valid throughout construction. In order to avoid test changes and keep printing of the plans the same, we use the new header block instead of creating a new block on the first iteration of the loop traversing the original loop. We also fold the latch into its predecessor. This is a follow up to a post-commit suggestion in D114586. Reviewed By: Ayal Differential Revision: https://reviews.llvm.org/D115793 --- .../Transforms/Vectorize/LoopVectorize.cpp | 50 +++++++++++-------- llvm/lib/Transforms/Vectorize/VPlan.h | 48 ++++++++++++++---- .../LoopVectorize/reduction-order.ll | 4 +- .../LoopVectorize/select-reduction.ll | 2 +- 4 files changed, 72 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 5dafc561baf4..b3ba2aa13d37 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8795,13 +8795,17 @@ VPBasicBlock *VPRecipeBuilder::handleReplication( return VPBB; } LLVM_DEBUG(dbgs() << "LV: Scalarizing and predicating:" << *I << "\n"); - assert(VPBB->getSuccessors().empty() && - "VPBB has successors when handling predicated replication."); + + VPBlockBase *SingleSucc = VPBB->getSingleSuccessor(); + assert(SingleSucc && "VPBB must have a single successor when handling " + "predicated replication."); + VPBlockUtils::disconnectBlocks(VPBB, SingleSucc); // Record predicated instructions for above packing optimizations. VPBlockBase *Region = createReplicateRegion(I, Recipe, Plan); VPBlockUtils::insertBlockAfter(Region, VPBB); auto *RegSucc = new VPBasicBlock(); VPBlockUtils::insertBlockAfter(RegSucc, Region); + VPBlockUtils::connectBlocks(RegSucc, SingleSucc); return RegSucc; } @@ -9022,30 +9026,25 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( // visit each basic block after having visited its predecessor basic blocks. // --------------------------------------------------------------------------- - auto Plan = std::make_unique(); + // Create initial VPlan skeleton, with separate header and latch blocks. + VPBasicBlock *HeaderVPBB = new VPBasicBlock(); + VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch"); + VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB); + auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop"); + auto Plan = std::make_unique(TopRegion); // Scan the body of the loop in a topological order to visit each basic block // after having visited its predecessor basic blocks. LoopBlocksDFS DFS(OrigLoop); DFS.perform(LI); - VPBasicBlock *VPBB = nullptr; - VPBasicBlock *HeaderVPBB = nullptr; + VPBasicBlock *VPBB = HeaderVPBB; SmallVector InductionsToMove; for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) { // Relevant instructions from basic block BB will be grouped into VPRecipe // ingredients and fill a new VPBasicBlock. unsigned VPBBsForBB = 0; - auto *FirstVPBBForBB = new VPBasicBlock(BB->getName()); - if (VPBB) - VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB); - else { - auto *TopRegion = new VPRegionBlock("vector loop"); - TopRegion->setEntry(FirstVPBBForBB); - Plan->setEntry(TopRegion); - HeaderVPBB = FirstVPBBForBB; - } - VPBB = FirstVPBBForBB; + VPBB->setName(BB->getName()); Builder.setInsertPoint(VPBB); // Introduce each ingredient into VPlan. @@ -9112,8 +9111,17 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( : ""); } } + + VPBlockUtils::insertBlockAfter(new VPBasicBlock(), VPBB); + VPBB = cast(VPBB->getSingleSuccessor()); } + // Fold the last, empty block into its predecessor. + VPBB = VPBlockUtils::tryToMergeBlockIntoPredecessor(VPBB); + assert(VPBB && "expected to fold last (empty) block"); + // After here, VPBB should not be used. + VPBB = nullptr; + assert(isa(Plan->getEntry()) && !Plan->getEntry()->getEntryBasicBlock()->empty() && "entry block must be set to a VPRegionBlock having a non-empty entry " @@ -9183,13 +9191,9 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( VPBlockUtils::disconnectBlocks(SplitPred, SplitBlock); VPBlockUtils::connectBlocks(SplitPred, SinkRegion); VPBlockUtils::connectBlocks(SinkRegion, SplitBlock); - if (VPBB == SplitPred) - VPBB = SplitBlock; } } - cast(Plan->getEntry())->setExit(VPBB); - VPlanTransforms::removeRedundantInductionCasts(*Plan); // Now that sink-after is done, move induction recipes for optimized truncates @@ -9198,7 +9202,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( Ind->moveBefore(*HeaderVPBB, HeaderVPBB->getFirstNonPhi()); // Adjust the recipes for any inloop reductions. - adjustRecipesForReductions(VPBB, Plan, RecipeBuilder, Range.Start); + adjustRecipesForReductions(cast(TopRegion->getExit()), Plan, + RecipeBuilder, Range.Start); // Introduce a recipe to combine the incoming and previous values of a // first-order recurrence. @@ -9278,6 +9283,11 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes( RSO.flush(); Plan->setName(PlanName); + // Fold Exit block into its predecessor if possible. + // TODO: Fold block earlier once all VPlan transforms properly maintain a + // VPBasicBlock as exit. + VPBlockUtils::tryToMergeBlockIntoPredecessor(TopRegion->getExit()); + assert(VPlanVerifier::verifyPlanIsValid(*Plan) && "VPlan is invalid"); return Plan; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index dfe75105ac42..f4a1883e35d5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2352,18 +2352,23 @@ class VPBlockUtils { /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p - /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. If \p BlockPtr - /// has more than one successor, its conditional bit is propagated to \p - /// NewBlock. \p NewBlock must have neither successors nor predecessors. + /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's + /// successors are moved from \p BlockPtr to \p NewBlock and \p BlockPtr's + /// conditional bit is propagated to \p NewBlock. \p NewBlock must have + /// neither successors nor predecessors. static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) { assert(NewBlock->getSuccessors().empty() && - "Can't insert new block with successors."); - // TODO: move successors from BlockPtr to NewBlock when this functionality - // is necessary. For now, setBlockSingleSuccessor will assert if BlockPtr - // already has successors. - BlockPtr->setOneSuccessor(NewBlock); - NewBlock->setPredecessors({BlockPtr}); + NewBlock->getPredecessors().empty() && + "Can't insert new block with predecessors or successors."); NewBlock->setParent(BlockPtr->getParent()); + SmallVector Succs(BlockPtr->successors()); + for (VPBlockBase *Succ : Succs) { + disconnectBlocks(BlockPtr, Succ); + connectBlocks(NewBlock, Succ); + } + NewBlock->setCondBit(BlockPtr->getCondBit()); + BlockPtr->setCondBit(nullptr); + connectBlocks(BlockPtr, NewBlock); } /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p @@ -2406,6 +2411,31 @@ class VPBlockUtils { To->removePredecessor(From); } + /// Try to merge \p Block into its single predecessor, if \p Block is a + /// VPBasicBlock and its predecessor has a single successor. Returns a pointer + /// to the predecessor \p Block was merged into or nullptr otherwise. + static VPBasicBlock *tryToMergeBlockIntoPredecessor(VPBlockBase *Block) { + auto *VPBB = dyn_cast(Block); + auto *PredVPBB = + dyn_cast_or_null(Block->getSinglePredecessor()); + if (!VPBB || !PredVPBB || PredVPBB->getNumSuccessors() != 1) + return nullptr; + + for (VPRecipeBase &R : make_early_inc_range(*VPBB)) + R.moveBefore(*PredVPBB, PredVPBB->end()); + VPBlockUtils::disconnectBlocks(PredVPBB, VPBB); + auto *ParentRegion = cast(Block->getParent()); + if (ParentRegion->getExit() == Block) + ParentRegion->setExit(PredVPBB); + SmallVector Successors(Block->successors()); + for (auto *Succ : Successors) { + VPBlockUtils::disconnectBlocks(Block, Succ); + VPBlockUtils::connectBlocks(PredVPBB, Succ); + } + delete Block; + return PredVPBB; + } + /// Returns true if the edge \p FromBlock -> \p ToBlock is a back-edge. static bool isBackEdge(const VPBlockBase *FromBlock, const VPBlockBase *ToBlock, const VPLoopInfo *VPLI) { diff --git a/llvm/test/Transforms/LoopVectorize/reduction-order.ll b/llvm/test/Transforms/LoopVectorize/reduction-order.ll index 07201c0dfd78..47ac8e424fd6 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-order.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-order.ll @@ -7,9 +7,9 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 ; in deterministic order. ; CHECK-LABEL: @foo( ; CHECK: vector.body: -; CHECK: icmp ule <4 x i64> -; CHECK-NEXT: %[[VAR1:.*]] = add <4 x i32> , %vec.phi1 +; CHECK: %[[VAR1:.*]] = add <4 x i32> , %vec.phi1 ; CHECK-NEXT: %[[VAR2:.*]] = add <4 x i32> %vec.phi, +; CHECK-NEXT: icmp ule <4 x i64> ; CHECK-NEXT: select <4 x i1> {{.*}}, <4 x i32> %[[VAR2]], <4 x i32> ; CHECK-NEXT: select <4 x i1> {{.*}}, <4 x i32> %[[VAR1]], <4 x i32> ; CHECK: br i1 {{.*}}, label %middle.block, label %vector.body diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction.ll b/llvm/test/Transforms/LoopVectorize/select-reduction.ll index 70920bd2a986..e487b3a34f78 100644 --- a/llvm/test/Transforms/LoopVectorize/select-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction.ll @@ -33,9 +33,9 @@ define i32 @test(i64 %N, i32 %x) { ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i32 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT3]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT4]], -; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[VEC_PHI]], ; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_PHI]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] From 2efc6892d89dc15c1697c411e0031d126385a5f1 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Fri, 17 Dec 2021 13:02:21 +0100 Subject: [PATCH 045/293] [lldb/python] Avoid more dangling pointers in python glue code --- lldb/bindings/python/python-swigsafecast.swig | 56 ++++++++++--------- lldb/bindings/python/python-wrapper.swig | 54 +++++++----------- lldb/include/lldb/API/SBSymbolContext.h | 4 +- lldb/include/lldb/API/SBTypeSummary.h | 4 +- lldb/source/API/SBFrame.cpp | 6 +- lldb/source/API/SBSymbolContext.cpp | 17 ++---- lldb/source/API/SBSymbolContextList.cpp | 5 +- lldb/source/API/SBTypeSummary.cpp | 21 ++----- 8 files changed, 64 insertions(+), 103 deletions(-) diff --git a/lldb/bindings/python/python-swigsafecast.swig b/lldb/bindings/python/python-swigsafecast.swig index 25c2f44106bc..7d639e664f53 100644 --- a/lldb/bindings/python/python-swigsafecast.swig +++ b/lldb/bindings/python/python-swigsafecast.swig @@ -5,38 +5,11 @@ PyObject *SBTypeToSWIGWrapper(lldb::SBEvent &event_sb) { return SWIG_NewPointerObj(&event_sb, SWIGTYPE_p_lldb__SBEvent, 0); } -PyObject *SBTypeToSWIGWrapper(lldb::SBWatchpoint &watchpoint_sb) { - return SWIG_NewPointerObj(&watchpoint_sb, SWIGTYPE_p_lldb__SBWatchpoint, 0); -} - -PyObject * -SBTypeToSWIGWrapper(lldb::SBBreakpointLocation &breakpoint_location_sb) { - return SWIG_NewPointerObj(&breakpoint_location_sb, - SWIGTYPE_p_lldb__SBBreakpointLocation, 0); -} - PyObject *SBTypeToSWIGWrapper(lldb::SBCommandReturnObject &cmd_ret_obj_sb) { return SWIG_NewPointerObj(&cmd_ret_obj_sb, SWIGTYPE_p_lldb__SBCommandReturnObject, 0); } -PyObject *SBTypeToSWIGWrapper(lldb::SBExecutionContext &ctx_sb) { - return SWIG_NewPointerObj(&ctx_sb, SWIGTYPE_p_lldb__SBExecutionContext, 0); -} - -PyObject *SBTypeToSWIGWrapper(lldb::SBTypeSummaryOptions &summary_options_sb) { - return SWIG_NewPointerObj(&summary_options_sb, - SWIGTYPE_p_lldb__SBTypeSummaryOptions, 0); -} - -PyObject *SBTypeToSWIGWrapper(lldb::SBSymbolContext &sym_ctx_sb) { - return SWIG_NewPointerObj(&sym_ctx_sb, SWIGTYPE_p_lldb__SBSymbolContext, 0); -} - -PyObject *SBTypeToSWIGWrapper(lldb::SBStream &stream_sb) { - return SWIG_NewPointerObj(&stream_sb, SWIGTYPE_p_lldb__SBStream, 0); -} - PythonObject ToSWIGHelper(void *obj, swig_type_info *info) { return {PyRefType::Owned, SWIG_NewPointerObj(obj, info, SWIG_POINTER_OWN)}; } @@ -69,6 +42,10 @@ PythonObject ToSWIGWrapper(lldb::BreakpointSP breakpoint_sp) { SWIGTYPE_p_lldb__SBBreakpoint); } +PythonObject ToSWIGWrapper(std::unique_ptr stream_sb) { + return ToSWIGHelper(stream_sb.release(), SWIGTYPE_p_lldb__SBStream); +} + PythonObject ToSWIGWrapper(std::unique_ptr data_sb) { return ToSWIGHelper(data_sb.release(), SWIGTYPE_p_lldb__SBStructuredData); } @@ -92,5 +69,30 @@ PythonObject ToSWIGWrapper(lldb::DebuggerSP debugger_sp) { SWIGTYPE_p_lldb__SBDebugger); } +PythonObject ToSWIGWrapper(lldb::WatchpointSP watchpoint_sp) { + return ToSWIGHelper(new lldb::SBWatchpoint(std::move(watchpoint_sp)), + SWIGTYPE_p_lldb__SBWatchpoint); +} + +PythonObject ToSWIGWrapper(lldb::BreakpointLocationSP bp_loc_sp) { + return ToSWIGHelper(new lldb::SBBreakpointLocation(std::move(bp_loc_sp)), + SWIGTYPE_p_lldb__SBBreakpointLocation); +} + +PythonObject ToSWIGWrapper(lldb::ExecutionContextRefSP ctx_sp) { + return ToSWIGHelper(new lldb::SBExecutionContext(std::move(ctx_sp)), + SWIGTYPE_p_lldb__SBExecutionContext); +} + +PythonObject ToSWIGWrapper(const TypeSummaryOptions &summary_options) { + return ToSWIGHelper(new lldb::SBTypeSummaryOptions(summary_options), + SWIGTYPE_p_lldb__SBTypeSummaryOptions); +} + +PythonObject ToSWIGWrapper(const SymbolContext &sym_ctx) { + return ToSWIGHelper(new lldb::SBSymbolContext(sym_ctx), + SWIGTYPE_p_lldb__SBSymbolContext); +} + } // namespace python } // namespace lldb_private diff --git a/lldb/bindings/python/python-wrapper.swig b/lldb/bindings/python/python-wrapper.swig index 5f85af2ba6ce..a2c1f756a0a2 100644 --- a/lldb/bindings/python/python-wrapper.swig +++ b/lldb/bindings/python/python-wrapper.swig @@ -38,14 +38,12 @@ llvm::Expected lldb_private::LLDBSwigPythonBreakpointCallbackFunction( return arg_info.takeError(); PythonObject frame_arg = ToSWIGWrapper(frame_sp); - PythonObject bp_loc_arg(PyRefType::Owned, SBTypeToSWIGWrapper(sb_bp_loc)); + PythonObject bp_loc_arg = ToSWIGWrapper(bp_loc_sp); - auto result = [&]() -> Expected { - // If the called function doesn't take extra_args, drop them here: - if (max_positional_args < 4) - return pfunc.Call(frame_arg, bp_loc_arg, dict); - return pfunc.Call(frame_arg, bp_loc_arg, ToSWIGWrapper(args_impl), dict); - }(); + auto result = + max_positional_args < 4 + ? pfunc.Call(frame_arg, bp_loc_arg, dict) + : pfunc.Call(frame_arg, bp_loc_arg, ToSWIGWrapper(args_impl), dict); if (!result) return result.takeError(); @@ -70,7 +68,6 @@ llvm::Expected lldb_private::LLDBSwigPythonBreakpointCallbackFunction( bool lldb_private::LLDBSwigPythonWatchpointCallbackFunction( const char *python_function_name, const char *session_dictionary_name, const lldb::StackFrameSP &frame_sp, const lldb::WatchpointSP &wp_sp) { - lldb::SBWatchpoint sb_wp(wp_sp); bool stop_at_watchpoint = true; @@ -84,9 +81,8 @@ bool lldb_private::LLDBSwigPythonWatchpointCallbackFunction( if (!pfunc.IsAllocated()) return stop_at_watchpoint; - PythonObject frame_arg = ToSWIGWrapper(frame_sp); - PythonObject wp_arg(PyRefType::Owned, SBTypeToSWIGWrapper(sb_wp)); - PythonObject result = pfunc(frame_arg, wp_arg, dict); + PythonObject result = + pfunc(ToSWIGWrapper(frame_sp), ToSWIGWrapper(wp_sp), dict); if (result.get() == Py_False) stop_at_watchpoint = false; @@ -98,7 +94,6 @@ bool lldb_private::LLDBSwigPythonCallTypeScript( const char *python_function_name, const void *session_dictionary, const lldb::ValueObjectSP &valobj_sp, void **pyfunct_wrapper, const lldb::TypeSummaryOptionsSP &options_sp, std::string &retval) { - lldb::SBTypeSummaryOptions sb_options(options_sp.get()); retval.clear(); @@ -146,12 +141,11 @@ bool lldb_private::LLDBSwigPythonCallTypeScript( } PythonObject value_arg = ToSWIGWrapper(valobj_sp); - PythonObject options_arg(PyRefType::Owned, SBTypeToSWIGWrapper(sb_options)); if (argc.get().max_positional_args < 3) result = pfunc(value_arg, dict); else - result = pfunc(value_arg, dict, options_arg); + result = pfunc(value_arg, dict, ToSWIGWrapper(*options_sp)); retval = result.Str().GetString().str(); @@ -452,13 +446,7 @@ unsigned int lldb_private::LLDBSwigPythonCallBreakpointResolver( if (!pfunc.IsAllocated()) return 0; - PythonObject result; - if (sym_ctx != nullptr) { - lldb::SBSymbolContext sb_sym_ctx(sym_ctx); - PythonObject sym_ctx_arg(PyRefType::Owned, SBTypeToSWIGWrapper(sb_sym_ctx)); - result = pfunc(sym_ctx_arg); - } else - result = pfunc(); + PythonObject result = sym_ctx ? pfunc(ToSWIGWrapper(*sym_ctx)) : pfunc(); if (PyErr_Occurred()) { PyErr_Print(); @@ -560,12 +548,11 @@ bool lldb_private::LLDBSwigPythonStopHookCallHandleStop( if (!pfunc.IsAllocated()) return true; - PythonObject result; - lldb::SBExecutionContext sb_exc_ctx(exc_ctx_sp); - PythonObject exc_ctx_arg(PyRefType::Owned, SBTypeToSWIGWrapper(sb_exc_ctx)); - lldb::SBStream sb_stream; - PythonObject sb_stream_arg(PyRefType::Owned, SBTypeToSWIGWrapper(sb_stream)); - result = pfunc(exc_ctx_arg, sb_stream_arg); + auto *sb_stream = new lldb::SBStream(); + PythonObject sb_stream_arg = + ToSWIGWrapper(std::unique_ptr(sb_stream)); + PythonObject result = + pfunc(ToSWIGWrapper(std::move(exc_ctx_sp)), sb_stream_arg); if (PyErr_Occurred()) { stream->PutCString("Python error occurred handling stop-hook."); @@ -577,7 +564,7 @@ bool lldb_private::LLDBSwigPythonStopHookCallHandleStop( // Now add the result to the output stream. SBStream only // makes an internally help StreamString which I can't interpose, so I // have to copy it over here. - stream->PutCString(sb_stream.GetData()); + stream->PutCString(sb_stream->GetData()); if (result.get() == Py_False) return false; @@ -809,7 +796,6 @@ bool lldb_private::LLDBSwigPythonCallCommand( lldb_private::CommandReturnObject &cmd_retobj, lldb::ExecutionContextRefSP exe_ctx_ref_sp) { lldb::SBCommandReturnObject cmd_retobj_sb(cmd_retobj); - lldb::SBExecutionContext exe_ctx_sb(exe_ctx_ref_sp); PyErr_Cleaner py_err_cleaner(true); auto dict = PythonModule::MainModule().ResolveName( @@ -826,14 +812,14 @@ bool lldb_private::LLDBSwigPythonCallCommand( return false; } PythonObject debugger_arg = ToSWIGWrapper(std::move(debugger)); - PythonObject exe_ctx_arg(PyRefType::Owned, SBTypeToSWIGWrapper(exe_ctx_sb)); PythonObject cmd_retobj_arg(PyRefType::Owned, SBTypeToSWIGWrapper(cmd_retobj_sb)); if (argc.get().max_positional_args < 5u) pfunc(debugger_arg, PythonString(args), cmd_retobj_arg, dict); else - pfunc(debugger_arg, PythonString(args), exe_ctx_arg, cmd_retobj_arg, dict); + pfunc(debugger_arg, PythonString(args), + ToSWIGWrapper(std::move(exe_ctx_ref_sp)), cmd_retobj_arg, dict); return true; } @@ -843,7 +829,6 @@ bool lldb_private::LLDBSwigPythonCallCommandObject( lldb_private::CommandReturnObject &cmd_retobj, lldb::ExecutionContextRefSP exe_ctx_ref_sp) { lldb::SBCommandReturnObject cmd_retobj_sb(cmd_retobj); - lldb::SBExecutionContext exe_ctx_sb(exe_ctx_ref_sp); PyErr_Cleaner py_err_cleaner(true); @@ -853,12 +838,11 @@ bool lldb_private::LLDBSwigPythonCallCommandObject( if (!pfunc.IsAllocated()) return false; - PythonObject exe_ctx_arg(PyRefType::Owned, SBTypeToSWIGWrapper(exe_ctx_sb)); PythonObject cmd_retobj_arg(PyRefType::Owned, SBTypeToSWIGWrapper(cmd_retobj_sb)); - pfunc(ToSWIGWrapper(std::move(debugger)), PythonString(args), exe_ctx_arg, - cmd_retobj_arg); + pfunc(ToSWIGWrapper(std::move(debugger)), PythonString(args), + ToSWIGWrapper(exe_ctx_ref_sp), cmd_retobj_arg); return true; } diff --git a/lldb/include/lldb/API/SBSymbolContext.h b/lldb/include/lldb/API/SBSymbolContext.h index 16ad29ea8730..b4c5921d81a9 100644 --- a/lldb/include/lldb/API/SBSymbolContext.h +++ b/lldb/include/lldb/API/SBSymbolContext.h @@ -25,7 +25,7 @@ class LLDB_API SBSymbolContext { SBSymbolContext(const lldb::SBSymbolContext &rhs); - SBSymbolContext(const lldb_private::SymbolContext *sc_ptr); + SBSymbolContext(const lldb_private::SymbolContext &sc_ptr); ~SBSymbolContext(); @@ -72,8 +72,6 @@ class LLDB_API SBSymbolContext { lldb_private::SymbolContext *get() const; - void SetSymbolContext(const lldb_private::SymbolContext *sc_ptr); - private: std::unique_ptr m_opaque_up; }; diff --git a/lldb/include/lldb/API/SBTypeSummary.h b/lldb/include/lldb/API/SBTypeSummary.h index 929bfb6124b2..e9963682f7ab 100644 --- a/lldb/include/lldb/API/SBTypeSummary.h +++ b/lldb/include/lldb/API/SBTypeSummary.h @@ -19,7 +19,7 @@ class LLDB_API SBTypeSummaryOptions { SBTypeSummaryOptions(const lldb::SBTypeSummaryOptions &rhs); - SBTypeSummaryOptions(const lldb_private::TypeSummaryOptions *lldb_object_ptr); + SBTypeSummaryOptions(const lldb_private::TypeSummaryOptions &lldb_object); ~SBTypeSummaryOptions(); @@ -48,8 +48,6 @@ class LLDB_API SBTypeSummaryOptions { const lldb_private::TypeSummaryOptions &ref() const; - void SetOptions(const lldb_private::TypeSummaryOptions *lldb_object_ptr); - private: std::unique_ptr m_opaque_up; }; diff --git a/lldb/source/API/SBFrame.cpp b/lldb/source/API/SBFrame.cpp index 7107768ba884..c6bc3288c4b2 100644 --- a/lldb/source/API/SBFrame.cpp +++ b/lldb/source/API/SBFrame.cpp @@ -119,15 +119,13 @@ SBSymbolContext SBFrame::GetSymbolContext(uint32_t resolve_scope) const { std::unique_lock lock; ExecutionContext exe_ctx(m_opaque_sp.get(), lock); SymbolContextItem scope = static_cast(resolve_scope); - StackFrame *frame = nullptr; Target *target = exe_ctx.GetTargetPtr(); Process *process = exe_ctx.GetProcessPtr(); if (target && process) { Process::StopLocker stop_locker; if (stop_locker.TryLock(&process->GetRunLock())) { - frame = exe_ctx.GetFramePtr(); - if (frame) - sb_sym_ctx.SetSymbolContext(&frame->GetSymbolContext(scope)); + if (StackFrame *frame = exe_ctx.GetFramePtr()) + sb_sym_ctx = frame->GetSymbolContext(scope); } } diff --git a/lldb/source/API/SBSymbolContext.cpp b/lldb/source/API/SBSymbolContext.cpp index 488d49884903..89fe051658ff 100644 --- a/lldb/source/API/SBSymbolContext.cpp +++ b/lldb/source/API/SBSymbolContext.cpp @@ -22,12 +22,10 @@ SBSymbolContext::SBSymbolContext() : m_opaque_up() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBSymbolContext); } -SBSymbolContext::SBSymbolContext(const SymbolContext *sc_ptr) : m_opaque_up() { +SBSymbolContext::SBSymbolContext(const SymbolContext &sc) + : m_opaque_up(std::make_unique(sc)) { LLDB_RECORD_CONSTRUCTOR(SBSymbolContext, - (const lldb_private::SymbolContext *), sc_ptr); - - if (sc_ptr) - m_opaque_up = std::make_unique(*sc_ptr); + (const lldb_private::SymbolContext &), sc); } SBSymbolContext::SBSymbolContext(const SBSymbolContext &rhs) : m_opaque_up() { @@ -49,13 +47,6 @@ const SBSymbolContext &SBSymbolContext::operator=(const SBSymbolContext &rhs) { return LLDB_RECORD_RESULT(*this); } -void SBSymbolContext::SetSymbolContext(const SymbolContext *sc_ptr) { - if (sc_ptr) - m_opaque_up = std::make_unique(*sc_ptr); - else - m_opaque_up->Clear(true); -} - bool SBSymbolContext::IsValid() const { LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBSymbolContext, IsValid); return this->operator bool(); @@ -237,7 +228,7 @@ template <> void RegisterMethods(Registry &R) { LLDB_REGISTER_CONSTRUCTOR(SBSymbolContext, ()); LLDB_REGISTER_CONSTRUCTOR(SBSymbolContext, - (const lldb_private::SymbolContext *)); + (const lldb_private::SymbolContext &)); LLDB_REGISTER_CONSTRUCTOR(SBSymbolContext, (const lldb::SBSymbolContext &)); LLDB_REGISTER_METHOD( const lldb::SBSymbolContext &, diff --git a/lldb/source/API/SBSymbolContextList.cpp b/lldb/source/API/SBSymbolContextList.cpp index 9db84dc1bf4b..70a8bbe6694c 100644 --- a/lldb/source/API/SBSymbolContextList.cpp +++ b/lldb/source/API/SBSymbolContextList.cpp @@ -56,9 +56,8 @@ SBSymbolContext SBSymbolContextList::GetContextAtIndex(uint32_t idx) { SBSymbolContext sb_sc; if (m_opaque_up) { SymbolContext sc; - if (m_opaque_up->GetContextAtIndex(idx, sc)) { - sb_sc.SetSymbolContext(&sc); - } + if (m_opaque_up->GetContextAtIndex(idx, sc)) + sb_sc = sc; } return LLDB_RECORD_RESULT(sb_sc); } diff --git a/lldb/source/API/SBTypeSummary.cpp b/lldb/source/API/SBTypeSummary.cpp index 3800ae940c70..2d7f8ef340c9 100644 --- a/lldb/source/API/SBTypeSummary.cpp +++ b/lldb/source/API/SBTypeSummary.cpp @@ -100,20 +100,11 @@ const lldb_private::TypeSummaryOptions &SBTypeSummaryOptions::ref() const { } SBTypeSummaryOptions::SBTypeSummaryOptions( - const lldb_private::TypeSummaryOptions *lldb_object_ptr) { + const lldb_private::TypeSummaryOptions &lldb_object) + : m_opaque_up(std::make_unique(lldb_object)) { LLDB_RECORD_CONSTRUCTOR(SBTypeSummaryOptions, - (const lldb_private::TypeSummaryOptions *), - lldb_object_ptr); - - SetOptions(lldb_object_ptr); -} - -void SBTypeSummaryOptions::SetOptions( - const lldb_private::TypeSummaryOptions *lldb_object_ptr) { - if (lldb_object_ptr) - m_opaque_up = std::make_unique(*lldb_object_ptr); - else - m_opaque_up = std::make_unique(); + (const lldb_private::TypeSummaryOptions &), + lldb_object); } SBTypeSummary::SBTypeSummary() : m_opaque_sp() { @@ -175,7 +166,7 @@ SBTypeSummary SBTypeSummary::CreateWithCallback(FormatCallback cb, const TypeSummaryOptions &opt) -> bool { SBStream stream; SBValue sb_value(valobj.GetSP()); - SBTypeSummaryOptions options(&opt); + SBTypeSummaryOptions options(opt); if (!cb(sb_value, options, stream)) return false; stm.Write(stream.GetData(), stream.GetSize()); @@ -492,7 +483,7 @@ void RegisterMethods(Registry &R) { LLDB_REGISTER_METHOD(void, SBTypeSummaryOptions, SetCapping, (lldb::TypeSummaryCapping)); LLDB_REGISTER_CONSTRUCTOR(SBTypeSummaryOptions, - (const lldb_private::TypeSummaryOptions *)); + (const lldb_private::TypeSummaryOptions &)); } template <> From e7c48f3cd5eb51b3bb9b928b3bde5da28da68e39 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Fri, 17 Dec 2021 14:13:52 +0100 Subject: [PATCH 046/293] [lldb] Use GetSupportedArchitectures on darwin platforms This finishes the GetSupportedArchitectureAtIndex migration. There are opportunities to simplify this even further, but I am going to leave that to the platform owners. Differential Revision: https://reviews.llvm.org/D116028 --- .../MacOSX/PlatformAppleSimulator.cpp | 22 ++- .../Platform/MacOSX/PlatformAppleSimulator.h | 3 +- .../Platform/MacOSX/PlatformDarwin.cpp | 66 ++------ .../Plugins/Platform/MacOSX/PlatformDarwin.h | 12 +- .../Platform/MacOSX/PlatformDarwinKernel.cpp | 9 +- .../Platform/MacOSX/PlatformDarwinKernel.h | 3 +- .../Platform/MacOSX/PlatformMacOSX.cpp | 36 ++-- .../Plugins/Platform/MacOSX/PlatformMacOSX.h | 8 +- .../MacOSX/PlatformRemoteAppleBridge.cpp | 30 +--- .../MacOSX/PlatformRemoteAppleBridge.h | 3 +- .../Platform/MacOSX/PlatformRemoteAppleTV.cpp | 80 +-------- .../Platform/MacOSX/PlatformRemoteAppleTV.h | 3 +- .../MacOSX/PlatformRemoteAppleWatch.cpp | 155 +++--------------- .../MacOSX/PlatformRemoteAppleWatch.h | 3 +- .../MacOSX/PlatformRemoteDarwinDevice.cpp | 11 +- .../Platform/MacOSX/PlatformRemoteMacOSX.cpp | 34 +--- .../Platform/MacOSX/PlatformRemoteMacOSX.h | 6 +- .../Platform/MacOSX/PlatformRemoteiOS.cpp | 7 +- .../Platform/MacOSX/PlatformRemoteiOS.h | 3 +- 19 files changed, 96 insertions(+), 398 deletions(-) diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.cpp index 69692ddc77c4..8f27f6a39f1b 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.cpp @@ -265,12 +265,11 @@ CoreSimulatorSupport::Device PlatformAppleSimulator::GetSimulatorDevice() { } #endif -bool PlatformAppleSimulator::GetSupportedArchitectureAtIndex(uint32_t idx, - ArchSpec &arch) { - if (idx >= m_supported_triples.size()) - return false; - arch = ArchSpec(m_supported_triples[idx]); - return true; +std::vector PlatformAppleSimulator::GetSupportedArchitectures() { + std::vector result(m_supported_triples.size()); + llvm::transform(m_supported_triples, result.begin(), + [](llvm::StringRef triple) { return ArchSpec(triple); }); + return result; } PlatformSP PlatformAppleSimulator::CreateInstance( @@ -380,10 +379,11 @@ Status PlatformAppleSimulator::ResolveExecutable( // so ask the platform for the architectures that we should be using (in // the correct order) and see if we can find a match that way StreamString arch_names; + llvm::ListSeparator LS; ArchSpec platform_arch; - for (uint32_t idx = 0; GetSupportedArchitectureAtIndex( - idx, resolved_module_spec.GetArchitecture()); - ++idx) { + for (const ArchSpec &arch : GetSupportedArchitectures()) { + resolved_module_spec.GetArchitecture() = arch; + // Only match x86 with x86 and x86_64 with x86_64... if (!module_spec.GetArchitecture().IsValid() || module_spec.GetArchitecture().GetCore() == @@ -398,9 +398,7 @@ Status PlatformAppleSimulator::ResolveExecutable( error.SetErrorToGenericError(); } - if (idx > 0) - arch_names.PutCString(", "); - arch_names.PutCString(platform_arch.GetArchitectureName()); + arch_names << LS << platform_arch.GetArchitectureName(); } } diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.h b/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.h index 583399073c58..e87636d9999c 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformAppleSimulator.h @@ -65,8 +65,7 @@ class PlatformAppleSimulator : public PlatformDarwin { lldb_private::Target &target, lldb_private::Status &error) override; - bool GetSupportedArchitectureAtIndex(uint32_t idx, - lldb_private::ArchSpec &arch) override; + std::vector GetSupportedArchitectures() override; lldb_private::Status ResolveExecutable( const lldb_private::ModuleSpec &module_spec, lldb::ModuleSP &module_sp, diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp index f5ce3017a8f1..de64426d7b64 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp @@ -514,45 +514,19 @@ bool PlatformDarwin::ModuleIsExcludedForUnconstrainedSearches( return obj_type == ObjectFile::eTypeDynamicLinker; } -bool PlatformDarwin::x86GetSupportedArchitectureAtIndex(uint32_t idx, - ArchSpec &arch) { +void PlatformDarwin::x86GetSupportedArchitectures( + std::vector &archs) { ArchSpec host_arch = HostInfo::GetArchitecture(HostInfo::eArchKindDefault); - if (host_arch.GetCore() == ArchSpec::eCore_x86_64_x86_64h) { - switch (idx) { - case 0: - arch = host_arch; - return true; - - case 1: - arch.SetTriple("x86_64-apple-macosx"); - return true; - - case 2: - arch = HostInfo::GetArchitecture(HostInfo::eArchKind32); - return true; + archs.push_back(host_arch); - default: - return false; - } + if (host_arch.GetCore() == ArchSpec::eCore_x86_64_x86_64h) { + archs.push_back(ArchSpec("x86_64-apple-macosx")); + archs.push_back(HostInfo::GetArchitecture(HostInfo::eArchKind32)); } else { - if (idx == 0) { - arch = HostInfo::GetArchitecture(HostInfo::eArchKindDefault); - return arch.IsValid(); - } else if (idx == 1) { - ArchSpec platform_arch( - HostInfo::GetArchitecture(HostInfo::eArchKindDefault)); - ArchSpec platform_arch64( - HostInfo::GetArchitecture(HostInfo::eArchKind64)); - if (platform_arch.IsExactMatch(platform_arch64)) { - // This macosx platform supports both 32 and 64 bit. Since we already - // returned the 64 bit arch for idx == 0, return the 32 bit arch for - // idx == 1 - arch = HostInfo::GetArchitecture(HostInfo::eArchKind32); - return arch.IsValid(); - } - } + ArchSpec host_arch64 = HostInfo::GetArchitecture(HostInfo::eArchKind64); + if (host_arch.IsExactMatch(host_arch64)) + archs.push_back(HostInfo::GetArchitecture(HostInfo::eArchKind32)); } - return false; } static llvm::ArrayRef GetCompatibleArchs(ArchSpec::Core core) { @@ -669,21 +643,19 @@ const char *PlatformDarwin::GetCompatibleArch(ArchSpec::Core core, size_t idx) { /// The architecture selection rules for arm processors These cpu subtypes have /// distinct names (e.g. armv7f) but armv7 binaries run fine on an armv7f /// processor. -bool PlatformDarwin::ARMGetSupportedArchitectureAtIndex(uint32_t idx, - ArchSpec &arch) { +void PlatformDarwin::ARMGetSupportedArchitectures( + std::vector &archs) { const ArchSpec system_arch = GetSystemArchitecture(); const ArchSpec::Core system_core = system_arch.GetCore(); - if (const char *compatible_arch = GetCompatibleArch(system_core, idx)) { + const char *compatible_arch; + for (unsigned idx = 0; + (compatible_arch = GetCompatibleArch(system_core, idx)); ++idx) { llvm::Triple triple; triple.setArchName(compatible_arch); triple.setVendor(llvm::Triple::VendorType::Apple); - arch.SetTriple(triple); - return true; + archs.push_back(ArchSpec(triple)); } - - arch.Clear(); - return false; } static FileSpec GetXcodeSelectPath() { @@ -1367,11 +1339,3 @@ FileSpec PlatformDarwin::GetCurrentCommandLineToolsDirectory() { return FileSpec(FindComponentInPath(fspec.GetPath(), "CommandLineTools")); return {}; } - -std::vector PlatformDarwin::GetSupportedArchitectures() { - std::vector result; - ArchSpec arch; - for (uint32_t idx = 0; GetSupportedArchitectureAtIndex(idx, arch); ++idx) - result.push_back(arch); - return result; -} diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h index 1a97c22cafff..bbb2a336c56e 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.h @@ -60,11 +60,9 @@ class PlatformDarwin : public PlatformPOSIX { bool ModuleIsExcludedForUnconstrainedSearches( lldb_private::Target &target, const lldb::ModuleSP &module_sp) override; - bool ARMGetSupportedArchitectureAtIndex(uint32_t idx, - lldb_private::ArchSpec &arch); + void ARMGetSupportedArchitectures(std::vector &archs); - bool x86GetSupportedArchitectureAtIndex(uint32_t idx, - lldb_private::ArchSpec &arch); + void x86GetSupportedArchitectures(std::vector &archs); uint32_t GetResumeCountForLaunchInfo( lldb_private::ProcessLaunchInfo &launch_info) override; @@ -102,8 +100,6 @@ class PlatformDarwin : public PlatformPOSIX { /// located in. static lldb_private::FileSpec GetCurrentCommandLineToolsDirectory(); - std::vector GetSupportedArchitectures() override; - protected: static const char *GetCompatibleArch(lldb_private::ArchSpec::Core core, size_t idx); @@ -174,10 +170,6 @@ class PlatformDarwin : public PlatformPOSIX { static std::string FindComponentInPath(llvm::StringRef path, llvm::StringRef component); - virtual bool - GetSupportedArchitectureAtIndex(uint32_t idx, - lldb_private::ArchSpec &arch) = 0; - std::string m_developer_directory; llvm::StringMap m_sdk_path; std::mutex m_sdk_path_mutex; diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp index 823c5a2fd198..abb3b30e175a 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp @@ -911,13 +911,14 @@ Status PlatformDarwinKernel::ExamineKextForMatchingUUID( return {}; } -bool PlatformDarwinKernel::GetSupportedArchitectureAtIndex(uint32_t idx, - ArchSpec &arch) { +std::vector PlatformDarwinKernel::GetSupportedArchitectures() { + std::vector result; #if defined(__arm__) || defined(__arm64__) || defined(__aarch64__) - return ARMGetSupportedArchitectureAtIndex(idx, arch); + ARMGetSupportedArchitectures(result); #else - return x86GetSupportedArchitectureAtIndex(idx, arch); + x86GetSupportedArchitectures(result); #endif + return result; } void PlatformDarwinKernel::CalculateTrapHandlerSymbolNames() { diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.h b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.h index 4cb6c10fb833..738594bda140 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.h @@ -56,8 +56,7 @@ class PlatformDarwinKernel : public PlatformDarwin { llvm::SmallVectorImpl *old_modules, bool *did_create_ptr) override; - bool GetSupportedArchitectureAtIndex(uint32_t idx, - lldb_private::ArchSpec &arch) override; + std::vector GetSupportedArchitectures() override; bool SupportsModules() override { return false; } diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp index ac9604f681a0..3f2fb53ad654 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.cpp @@ -133,39 +133,23 @@ ConstString PlatformMacOSX::GetSDKDirectory(lldb_private::Target &target) { return {}; } -bool PlatformMacOSX::GetSupportedArchitectureAtIndex(uint32_t idx, - ArchSpec &arch) { +std::vector PlatformMacOSX::GetSupportedArchitectures() { + std::vector result; #if defined(__arm__) || defined(__arm64__) || defined(__aarch64__) // macOS for ARM64 support both native and translated x86_64 processes - if (!m_num_arm_arches || idx < m_num_arm_arches) { - bool res = ARMGetSupportedArchitectureAtIndex(idx, arch); - if (res) - return true; - if (!m_num_arm_arches) - m_num_arm_arches = idx; - } + ARMGetSupportedArchitectures(result); - // We can't use x86GetSupportedArchitectureAtIndex() because it uses + // We can't use x86GetSupportedArchitectures() because it uses // the system architecture for some of its return values and also // has a 32bits variant. - if (idx == m_num_arm_arches) { - arch.SetTriple("x86_64-apple-macosx"); - return true; - } else if (idx == m_num_arm_arches + 1) { - arch.SetTriple("x86_64-apple-ios-macabi"); - return true; - } else if (idx == m_num_arm_arches + 2) { - arch.SetTriple("arm64-apple-ios"); - return true; - } else if (idx == m_num_arm_arches + 3) { - arch.SetTriple("arm64e-apple-ios"); - return true; - } - - return false; + result.push_back(ArchSpec("x86_64-apple-macosx")); + result.push_back(ArchSpec("x86_64-apple-ios-macabi")); + result.push_back(ArchSpec("arm64-apple-ios")); + result.push_back(ArchSpec("arm64e-apple-ios")); #else - return x86GetSupportedArchitectureAtIndex(idx, arch); + x86GetSupportedArchitectures(result); #endif + return result; } lldb_private::Status PlatformMacOSX::GetSharedModule( diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.h b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.h index 404f93348dfa..113214befa92 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformMacOSX.h @@ -47,8 +47,7 @@ class PlatformMacOSX : public PlatformDarwin { return PlatformDarwin::GetFile(source, destination); } - bool GetSupportedArchitectureAtIndex(uint32_t idx, - lldb_private::ArchSpec &arch) override; + std::vector GetSupportedArchitectures() override; lldb_private::ConstString GetSDKDirectory(lldb_private::Target &target) override; @@ -59,11 +58,6 @@ class PlatformMacOSX : public PlatformDarwin { return PlatformDarwin::AddClangModuleCompilationOptionsForSDKType( target, options, lldb_private::XcodeSDK::Type::MacOSX); } - -private: -#if defined(__arm__) || defined(__arm64__) || defined(__aarch64__) - uint32_t m_num_arm_arches = 0; -#endif }; #endif // LLDB_SOURCE_PLUGINS_PLATFORM_MACOSX_PLATFORMMACOSX_H diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleBridge.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleBridge.cpp index b7ba2ea29bf6..495e8615c612 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleBridge.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleBridge.cpp @@ -137,34 +137,8 @@ llvm::StringRef PlatformRemoteAppleBridge::GetDescriptionStatic() { return "Remote BridgeOS platform plug-in."; } -bool PlatformRemoteAppleBridge::GetSupportedArchitectureAtIndex(uint32_t idx, - ArchSpec &arch) { - ArchSpec system_arch(GetSystemArchitecture()); - - const ArchSpec::Core system_core = system_arch.GetCore(); - switch (system_core) { - default: - switch (idx) { - case 0: - arch.SetTriple("arm64-apple-bridgeos"); - return true; - default: - break; - } - break; - - case ArchSpec::eCore_arm_arm64: - switch (idx) { - case 0: - arch.SetTriple("arm64-apple-bridgeos"); - return true; - default: - break; - } - break; - } - arch.Clear(); - return false; +std::vector PlatformRemoteAppleBridge::GetSupportedArchitectures() { + return {ArchSpec("arm64-apple-bridgeos")}; } llvm::StringRef PlatformRemoteAppleBridge::GetDeviceSupportDirectoryName() { diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleBridge.h b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleBridge.h index 1f4a0b0b0d4d..0d11df01d633 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleBridge.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleBridge.h @@ -40,8 +40,7 @@ class PlatformRemoteAppleBridge : public PlatformRemoteDarwinDevice { llvm::StringRef GetDescription() override { return GetDescriptionStatic(); } - bool GetSupportedArchitectureAtIndex(uint32_t idx, - lldb_private::ArchSpec &arch) override; + std::vector GetSupportedArchitectures() override; protected: llvm::StringRef GetDeviceSupportDirectoryName() override; diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleTV.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleTV.cpp index 42d0de0e36c7..d11e13cd8239 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleTV.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleTV.cpp @@ -132,90 +132,24 @@ llvm::StringRef PlatformRemoteAppleTV::GetDescriptionStatic() { return "Remote Apple TV platform plug-in."; } -bool PlatformRemoteAppleTV::GetSupportedArchitectureAtIndex(uint32_t idx, - ArchSpec &arch) { +std::vector PlatformRemoteAppleTV::GetSupportedArchitectures() { ArchSpec system_arch(GetSystemArchitecture()); const ArchSpec::Core system_core = system_arch.GetCore(); switch (system_core) { default: - switch (idx) { - case 0: - arch.SetTriple("arm64-apple-tvos"); - return true; - case 1: - arch.SetTriple("armv7s-apple-tvos"); - return true; - case 2: - arch.SetTriple("armv7-apple-tvos"); - return true; - case 3: - arch.SetTriple("thumbv7s-apple-tvos"); - return true; - case 4: - arch.SetTriple("thumbv7-apple-tvos"); - return true; - default: - break; - } - break; - case ArchSpec::eCore_arm_arm64: - switch (idx) { - case 0: - arch.SetTriple("arm64-apple-tvos"); - return true; - case 1: - arch.SetTriple("armv7s-apple-tvos"); - return true; - case 2: - arch.SetTriple("armv7-apple-tvos"); - return true; - case 3: - arch.SetTriple("thumbv7s-apple-tvos"); - return true; - case 4: - arch.SetTriple("thumbv7-apple-tvos"); - return true; - default: - break; - } - break; + return {ArchSpec("arm64-apple-tvos"), ArchSpec("armv7s-apple-tvos"), + ArchSpec("armv7-apple-tvos"), ArchSpec("thumbv7s-apple-tvos"), + ArchSpec("thumbv7-apple-tvos")}; case ArchSpec::eCore_arm_armv7s: - switch (idx) { - case 0: - arch.SetTriple("armv7s-apple-tvos"); - return true; - case 1: - arch.SetTriple("armv7-apple-tvos"); - return true; - case 2: - arch.SetTriple("thumbv7s-apple-tvos"); - return true; - case 3: - arch.SetTriple("thumbv7-apple-tvos"); - return true; - default: - break; - } - break; + return {ArchSpec("armv7s-apple-tvos"), ArchSpec("armv7-apple-tvos"), + ArchSpec("thumbv7s-apple-tvos"), ArchSpec("thumbv7-apple-tvos")}; case ArchSpec::eCore_arm_armv7: - switch (idx) { - case 0: - arch.SetTriple("armv7-apple-tvos"); - return true; - case 1: - arch.SetTriple("thumbv7-apple-tvos"); - return true; - default: - break; - } - break; + return {ArchSpec("armv7-apple-tvos"), ArchSpec("thumbv7-apple-tvos")}; } - arch.Clear(); - return false; } llvm::StringRef PlatformRemoteAppleTV::GetDeviceSupportDirectoryName() { diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleTV.h b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleTV.h index ce120082a74f..f6dbc7cac793 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleTV.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleTV.h @@ -40,8 +40,7 @@ class PlatformRemoteAppleTV : public PlatformRemoteDarwinDevice { llvm::StringRef GetDescription() override { return GetDescriptionStatic(); } - bool GetSupportedArchitectureAtIndex(uint32_t idx, - lldb_private::ArchSpec &arch) override; + std::vector GetSupportedArchitectures() override; protected: llvm::StringRef GetDeviceSupportDirectoryName() override; diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleWatch.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleWatch.cpp index 849ff3a50486..e1d78be6c145 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleWatch.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleWatch.cpp @@ -143,154 +143,39 @@ llvm::StringRef PlatformRemoteAppleWatch::GetDescriptionStatic() { PlatformRemoteAppleWatch::PlatformRemoteAppleWatch() : PlatformRemoteDarwinDevice() {} -bool PlatformRemoteAppleWatch::GetSupportedArchitectureAtIndex(uint32_t idx, - ArchSpec &arch) { +std::vector PlatformRemoteAppleWatch::GetSupportedArchitectures() { ArchSpec system_arch(GetSystemArchitecture()); const ArchSpec::Core system_core = system_arch.GetCore(); switch (system_core) { default: - switch (idx) { - case 0: - arch.SetTriple("arm64-apple-watchos"); - return true; - case 1: - arch.SetTriple("armv7k-apple-watchos"); - return true; - case 2: - arch.SetTriple("armv7s-apple-watchos"); - return true; - case 3: - arch.SetTriple("armv7-apple-watchos"); - return true; - case 4: - arch.SetTriple("thumbv7k-apple-watchos"); - return true; - case 5: - arch.SetTriple("thumbv7-apple-watchos"); - return true; - case 6: - arch.SetTriple("thumbv7s-apple-watchos"); - return true; - case 7: - arch.SetTriple("arm64_32-apple-watchos"); - return true; - default: - break; - } - break; - case ArchSpec::eCore_arm_arm64: - switch (idx) { - case 0: - arch.SetTriple("arm64-apple-watchos"); - return true; - case 1: - arch.SetTriple("armv7k-apple-watchos"); - return true; - case 2: - arch.SetTriple("armv7s-apple-watchos"); - return true; - case 3: - arch.SetTriple("armv7-apple-watchos"); - return true; - case 4: - arch.SetTriple("thumbv7k-apple-watchos"); - return true; - case 5: - arch.SetTriple("thumbv7-apple-watchos"); - return true; - case 6: - arch.SetTriple("thumbv7s-apple-watchos"); - return true; - case 7: - arch.SetTriple("arm64_32-apple-watchos"); - return true; - default: - break; - } - break; + return { + ArchSpec("arm64-apple-watchos"), ArchSpec("armv7k-apple-watchos"), + ArchSpec("armv7s-apple-watchos"), ArchSpec("armv7-apple-watchos"), + ArchSpec("thumbv7k-apple-watchos"), ArchSpec("thumbv7-apple-watchos"), + ArchSpec("thumbv7s-apple-watchos"), ArchSpec("arm64_32-apple-watchos")}; case ArchSpec::eCore_arm_armv7k: - switch (idx) { - case 0: - arch.SetTriple("armv7k-apple-watchos"); - return true; - case 1: - arch.SetTriple("armv7s-apple-watchos"); - return true; - case 2: - arch.SetTriple("armv7-apple-watchos"); - return true; - case 3: - arch.SetTriple("thumbv7k-apple-watchos"); - return true; - case 4: - arch.SetTriple("thumbv7-apple-watchos"); - return true; - case 5: - arch.SetTriple("thumbv7s-apple-watchos"); - return true; - case 6: - arch.SetTriple("arm64_32-apple-watchos"); - return true; - default: - break; - } - break; + return { + ArchSpec("armv7k-apple-watchos"), ArchSpec("armv7s-apple-watchos"), + ArchSpec("armv7-apple-watchos"), ArchSpec("thumbv7k-apple-watchos"), + ArchSpec("thumbv7-apple-watchos"), ArchSpec("thumbv7s-apple-watchos"), + ArchSpec("arm64_32-apple-watchos")}; case ArchSpec::eCore_arm_armv7s: - switch (idx) { - case 0: - arch.SetTriple("armv7s-apple-watchos"); - return true; - case 1: - arch.SetTriple("armv7k-apple-watchos"); - return true; - case 2: - arch.SetTriple("armv7-apple-watchos"); - return true; - case 3: - arch.SetTriple("thumbv7k-apple-watchos"); - return true; - case 4: - arch.SetTriple("thumbv7-apple-watchos"); - return true; - case 5: - arch.SetTriple("thumbv7s-apple-watchos"); - return true; - case 6: - arch.SetTriple("arm64_32-apple-watchos"); - return true; - default: - break; - } - break; + return { + ArchSpec("armv7s-apple-watchos"), ArchSpec("armv7k-apple-watchos"), + ArchSpec("armv7-apple-watchos"), ArchSpec("thumbv7k-apple-watchos"), + ArchSpec("thumbv7-apple-watchos"), ArchSpec("thumbv7s-apple-watchos"), + ArchSpec("arm64_32-apple-watchos")}; case ArchSpec::eCore_arm_armv7: - switch (idx) { - case 0: - arch.SetTriple("armv7-apple-watchos"); - return true; - case 1: - arch.SetTriple("armv7k-apple-watchos"); - return true; - case 2: - arch.SetTriple("thumbv7k-apple-watchos"); - return true; - case 3: - arch.SetTriple("thumbv7-apple-watchos"); - return true; - case 4: - arch.SetTriple("arm64_32-apple-watchos"); - return true; - default: - break; - } - break; + return {ArchSpec("armv7-apple-watchos"), ArchSpec("armv7k-apple-watchos"), + ArchSpec("thumbv7k-apple-watchos"), + ArchSpec("thumbv7-apple-watchos"), + ArchSpec("arm64_32-apple-watchos")}; } - arch.Clear(); - return false; } llvm::StringRef PlatformRemoteAppleWatch::GetDeviceSupportDirectoryName() { diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleWatch.h b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleWatch.h index abecbd733505..07d1cff081dd 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleWatch.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteAppleWatch.h @@ -43,8 +43,7 @@ class PlatformRemoteAppleWatch : public PlatformRemoteDarwinDevice { // lldb_private::Platform functions - bool GetSupportedArchitectureAtIndex(uint32_t idx, - lldb_private::ArchSpec &arch) override; + std::vector GetSupportedArchitectures() override; protected: llvm::StringRef GetDeviceSupportDirectoryName() override; diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp index 97b2b4a3b740..5bc90e77ba53 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteDarwinDevice.cpp @@ -90,9 +90,9 @@ Status PlatformRemoteDarwinDevice::ResolveExecutable( // so ask the platform for the architectures that we should be using (in // the correct order) and see if we can find a match that way StreamString arch_names; - for (uint32_t idx = 0; GetSupportedArchitectureAtIndex( - idx, resolved_module_spec.GetArchitecture()); - ++idx) { + llvm::ListSeparator LS; + for (const ArchSpec &arch : GetSupportedArchitectures()) { + resolved_module_spec.GetArchitecture() = arch; error = ModuleList::GetSharedModule(resolved_module_spec, exe_module_sp, nullptr, nullptr, nullptr); // Did we find an executable using one of the @@ -103,10 +103,7 @@ Status PlatformRemoteDarwinDevice::ResolveExecutable( error.SetErrorToGenericError(); } - if (idx > 0) - arch_names.PutCString(", "); - arch_names.PutCString( - resolved_module_spec.GetArchitecture().GetArchitectureName()); + arch_names << LS << arch.GetArchitectureName(); } if (error.Fail() || !exe_module_sp) { diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.cpp index 72c3480da661..1e969f05e15b 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.cpp @@ -124,35 +124,19 @@ PlatformSP PlatformRemoteMacOSX::CreateInstance(bool force, return PlatformSP(); } -bool PlatformRemoteMacOSX::GetSupportedArchitectureAtIndex(uint32_t idx, - ArchSpec &arch) { +std::vector PlatformRemoteMacOSX::GetSupportedArchitectures() { // macOS for ARM64 support both native and translated x86_64 processes - if (!m_num_arm_arches || idx < m_num_arm_arches) { - bool res = ARMGetSupportedArchitectureAtIndex(idx, arch); - if (res) - return true; - if (!m_num_arm_arches) - m_num_arm_arches = idx; - } + std::vector result; + ARMGetSupportedArchitectures(result); - // We can't use x86GetSupportedArchitectureAtIndex() because it uses + // We can't use x86GetSupportedArchitectures() because it uses // the system architecture for some of its return values and also // has a 32bits variant. - if (idx == m_num_arm_arches) { - arch.SetTriple("x86_64-apple-macosx"); - return true; - } else if (idx == m_num_arm_arches + 1) { - arch.SetTriple("x86_64-apple-ios-macabi"); - return true; - } else if (idx == m_num_arm_arches + 2) { - arch.SetTriple("arm64-apple-ios"); - return true; - } else if (idx == m_num_arm_arches + 3) { - arch.SetTriple("arm64e-apple-ios"); - return true; - } - - return false; + result.push_back(ArchSpec("x86_64-apple-macosx")); + result.push_back(ArchSpec("x86_64-apple-ios-macabi")); + result.push_back(ArchSpec("arm64-apple-ios")); + result.push_back(ArchSpec("arm64e-apple-ios")); + return result; } lldb_private::Status PlatformRemoteMacOSX::GetFileWithUUID( diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.h b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.h index 83335cda1f80..8d08a51b31b8 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteMacOSX.h @@ -42,15 +42,11 @@ class PlatformRemoteMacOSX : public virtual PlatformRemoteDarwinDevice { const lldb_private::UUID *uuid_ptr, lldb_private::FileSpec &local_file) override; - bool GetSupportedArchitectureAtIndex(uint32_t idx, - lldb_private::ArchSpec &arch) override; + std::vector GetSupportedArchitectures() override; protected: llvm::StringRef GetDeviceSupportDirectoryName() override; llvm::StringRef GetPlatformName() override; - -private: - uint32_t m_num_arm_arches = 0; }; #endif // LLDB_SOURCE_PLUGINS_PLATFORM_MACOSX_PLATFORMREMOTEMACOSX_H diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteiOS.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteiOS.cpp index 8824cab2fff2..616123698e66 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteiOS.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteiOS.cpp @@ -133,9 +133,10 @@ llvm::StringRef PlatformRemoteiOS::GetDescriptionStatic() { PlatformRemoteiOS::PlatformRemoteiOS() : PlatformRemoteDarwinDevice() {} -bool PlatformRemoteiOS::GetSupportedArchitectureAtIndex(uint32_t idx, - ArchSpec &arch) { - return ARMGetSupportedArchitectureAtIndex(idx, arch); +std::vector PlatformRemoteiOS::GetSupportedArchitectures() { + std::vector result; + ARMGetSupportedArchitectures(result); + return result; } llvm::StringRef PlatformRemoteiOS::GetDeviceSupportDirectoryName() { diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteiOS.h b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteiOS.h index 302df33fc116..94be124d4310 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteiOS.h +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformRemoteiOS.h @@ -39,8 +39,7 @@ class PlatformRemoteiOS : public PlatformRemoteDarwinDevice { // lldb_private::PluginInterface functions llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } - bool GetSupportedArchitectureAtIndex(uint32_t idx, - lldb_private::ArchSpec &arch) override; + std::vector GetSupportedArchitectures() override; protected: llvm::StringRef GetDeviceSupportDirectoryName() override; From 516882a8f23b36c925ad8be89846bfa36915ce73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Fri, 17 Dec 2021 12:15:37 +0000 Subject: [PATCH 047/293] [libcxx] [test] Prepare the ctime.timespec test for mingw CI env upgrades The test is currently marked XFAIL for mingw environments, but latest mingw-w64 got support for timespec_get: https://github.com/mingw-w64/mingw-w64/commit/e62a0a987c80f6a6fdac3e350943ae8085de0bd5 The CI environment will probably be upgraded to a state where this test is passing only after 14.x is branched in the llvm-project monorepo. If we'd just go from having an XFAIL to no marking at all (when CI is passing), we'd have to update both main and 14.x branches in sync exactly when the CI runners are updated to a newer version. Instead, mark the test as temporarily unsupported (so it doesn't cause failed builds when the CI environment is updated); after the CI environments are upgraded to such a state, we can remove the UNSUPPORTED marking to start requiring it to pass on the main branch, without needing to synchronize that change to anything else. Differential Revision: https://reviews.llvm.org/D116132 --- .../support.runtime/ctime.timespec.compile.pass.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/libcxx/test/std/language.support/support.runtime/ctime.timespec.compile.pass.cpp b/libcxx/test/std/language.support/support.runtime/ctime.timespec.compile.pass.cpp index d0d06c477c98..b906ad928536 100644 --- a/libcxx/test/std/language.support/support.runtime/ctime.timespec.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.runtime/ctime.timespec.compile.pass.cpp @@ -15,8 +15,11 @@ // unavailable until macOS 10.15 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} -// MinGW lacks timespec_get. -// XFAIL: target={{.+}}-windows-gnu +// MinGW used to lack timespec_get, but has got it in newer versions. +// Mark the test as unsupported for a transition period, to avoid spurious +// failures when upgrading the precommit testing environment. After +// the testing environment is upgraded, we could remove the marking altogether. +// UNSUPPORTED: target={{.+}}-windows-gnu #include #include From fd64544fbe0a4c0ed5d6d0cdda3945aacf7e9593 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Thu, 9 Dec 2021 00:04:35 +0200 Subject: [PATCH 048/293] [libcxx] [test] Remove a leftover unused function in config.py. NFC. While there's little value in polishing the old config system, I ran into this function and was confused for a while, while grepping around and trying to wrap my head around things. Differential Revision: https://reviews.llvm.org/D116131 --- libcxx/utils/libcxx/test/config.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py index a28590ef0bc1..d61743e49bff 100644 --- a/libcxx/utils/libcxx/test/config.py +++ b/libcxx/utils/libcxx/test/config.py @@ -160,14 +160,6 @@ def print_config_info(self): self.lit_config.note("Linking against the ABI Library at {}".format(self.abi_library_root)) self.lit_config.note("Running against the ABI Library at {}".format(self.abi_runtime_root)) - def get_test_format(self): - from libcxx.test.format import LibcxxTestFormat - return LibcxxTestFormat( - self.cxx, - self.use_clang_verify, - self.executor, - exec_env=self.exec_env) - def configure_cxx(self): # Gather various compiler parameters. cxx = self.get_lit_conf('cxx_under_test') From 7176799a7e196fa41046f9c77ebd65335b339acd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Wed, 8 Dec 2021 20:54:21 +0200 Subject: [PATCH 049/293] [libc++] Add from-scratch testing configs for Windows The paths to the compiler and to the python executable may need to be quoted (if they're installed into e.g. C:\Program Files). All testing commands that are executed expect a gcc compatible command line interface, while clang-cl uses different command line options. In the original testing config, if the chosen compiler was clang-cl, it was replaced with clang++ by looking for such an executable in the path. For the new from-scratch test configs, I instead chose to add "--driver-mode=g++" to flags - invoking "clang-cl --driver-mode=g++" has the same effect as invoking "clang++", without needing to run any heuristics for picking a different compiler executable. Differential Revision: https://reviews.llvm.org/D111202 --- libcxx/test/configs/cmake-bridge.cfg.in | 4 ++- libcxx/test/configs/llvm-libc++-mingw.cfg.in | 32 +++++++++++++++++++ .../configs/llvm-libc++-shared-clangcl.cfg.in | 32 +++++++++++++++++++ .../configs/llvm-libc++-static-clangcl.cfg.in | 32 +++++++++++++++++++ libcxx/utils/ci/run-buildbot | 8 +++-- 5 files changed, 105 insertions(+), 3 deletions(-) create mode 100644 libcxx/test/configs/llvm-libc++-mingw.cfg.in create mode 100644 libcxx/test/configs/llvm-libc++-shared-clangcl.cfg.in create mode 100644 libcxx/test/configs/llvm-libc++-static-clangcl.cfg.in diff --git a/libcxx/test/configs/cmake-bridge.cfg.in b/libcxx/test/configs/cmake-bridge.cfg.in index 9f031bf2148e..b689827439c4 100644 --- a/libcxx/test/configs/cmake-bridge.cfg.in +++ b/libcxx/test/configs/cmake-bridge.cfg.in @@ -2,6 +2,8 @@ @SERIALIZED_LIT_PARAMS@ +import shlex + # # This file performs the bridge between the CMake configuration and the Lit # configuration files by setting up the LitConfig object and various Lit @@ -23,7 +25,7 @@ config.recursiveExpansionLimit = 10 config.test_exec_root = '@CMAKE_BINARY_DIR@' # Add substitutions for bootstrapping the test suite configuration -config.substitutions.append(('%{cxx}', '@CMAKE_CXX_COMPILER@')) +config.substitutions.append(('%{cxx}', shlex.quote('@CMAKE_CXX_COMPILER@'))) config.substitutions.append(('%{libcxx}', '@LIBCXX_SOURCE_DIR@')) config.substitutions.append(('%{install}', '@CMAKE_BINARY_DIR@')) config.substitutions.append(('%{include}', '%{install}/@LIBCXX_INSTALL_INCLUDE_DIR@')) diff --git a/libcxx/test/configs/llvm-libc++-mingw.cfg.in b/libcxx/test/configs/llvm-libc++-mingw.cfg.in new file mode 100644 index 000000000000..07f53c41886e --- /dev/null +++ b/libcxx/test/configs/llvm-libc++-mingw.cfg.in @@ -0,0 +1,32 @@ +# This testing configuration handles running the test suite against LLVM's libc++ +# using either a DLL or a static library, with MinGW/Clang on Windows. + +lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') + +config.substitutions.append(('%{flags}', '')) +config.substitutions.append(('%{compile_flags}', + '-nostdinc++ -isystem %{include} -isystem %{target-include} -I %{libcxx}/test/support' +)) +config.substitutions.append(('%{link_flags}', + '-nostdlib++ -L %{lib} -lc++' +)) +config.substitutions.append(('%{exec}', + '%{executor} --execdir %T --env PATH=%{lib} -- ' +)) + +# LIBCXX-WINDOWS-FIXME is the feature name used to XFAIL the +# initial Windows failures until they can be properly diagnosed +# and fixed. This allows easier detection of new test failures +# and regressions. Note: New failures should not be suppressed +# using this feature. (Also see llvm.org/PR32730) +config.available_features.add('LIBCXX-WINDOWS-FIXME') + +import os, site +site.addsitedir(os.path.join('@LIBCXX_SOURCE_DIR@', 'utils')) +import libcxx.test.params, libcxx.test.newconfig +libcxx.test.newconfig.configure( + libcxx.test.params.DEFAULT_PARAMETERS, + libcxx.test.features.DEFAULT_FEATURES, + config, + lit_config +) diff --git a/libcxx/test/configs/llvm-libc++-shared-clangcl.cfg.in b/libcxx/test/configs/llvm-libc++-shared-clangcl.cfg.in new file mode 100644 index 000000000000..dcb2b07ae724 --- /dev/null +++ b/libcxx/test/configs/llvm-libc++-shared-clangcl.cfg.in @@ -0,0 +1,32 @@ +# This testing configuration handles running the test suite against LLVM's libc++ +# using a DLL, with Clang-cl on Windows. + +lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') + +config.substitutions.append(('%{flags}', '--driver-mode=g++')) +config.substitutions.append(('%{compile_flags}', + '-nostdinc++ -isystem %{include} -isystem %{target-include} -I %{libcxx}/test/support -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_WARNINGS -D_CRT_STDIO_ISO_WIDE_SPECIFIERS -DNOMINMAX' +)) +config.substitutions.append(('%{link_flags}', + '-nostdlib -L %{lib} -lc++ -lmsvcrt -lmsvcprt -loldnames' +)) +config.substitutions.append(('%{exec}', + '%{executor} --execdir %T --env PATH=%{lib} -- ' +)) + +# LIBCXX-WINDOWS-FIXME is the feature name used to XFAIL the +# initial Windows failures until they can be properly diagnosed +# and fixed. This allows easier detection of new test failures +# and regressions. Note: New failures should not be suppressed +# using this feature. (Also see llvm.org/PR32730) +config.available_features.add('LIBCXX-WINDOWS-FIXME') + +import os, site +site.addsitedir(os.path.join('@LIBCXX_SOURCE_DIR@', 'utils')) +import libcxx.test.params, libcxx.test.newconfig +libcxx.test.newconfig.configure( + libcxx.test.params.DEFAULT_PARAMETERS, + libcxx.test.features.DEFAULT_FEATURES, + config, + lit_config +) diff --git a/libcxx/test/configs/llvm-libc++-static-clangcl.cfg.in b/libcxx/test/configs/llvm-libc++-static-clangcl.cfg.in new file mode 100644 index 000000000000..14812b2af9c1 --- /dev/null +++ b/libcxx/test/configs/llvm-libc++-static-clangcl.cfg.in @@ -0,0 +1,32 @@ +# This testing configuration handles running the test suite against LLVM's libc++ +# using a static library, with Clang-cl on Windows. + +lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg') + +config.substitutions.append(('%{flags}', '--driver-mode=g++')) +config.substitutions.append(('%{compile_flags}', + '-nostdinc++ -isystem %{include} -isystem %{target-include} -I %{libcxx}/test/support -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_WARNINGS -D_CRT_STDIO_ISO_WIDE_SPECIFIERS -DNOMINMAX' +)) +config.substitutions.append(('%{link_flags}', + '-nostdlib -L %{lib} -llibc++ -lmsvcrt -lmsvcprt -loldnames' +)) +config.substitutions.append(('%{exec}', + '%{executor} --execdir %T --env PATH=%{lib} -- ' +)) + +# LIBCXX-WINDOWS-FIXME is the feature name used to XFAIL the +# initial Windows failures until they can be properly diagnosed +# and fixed. This allows easier detection of new test failures +# and regressions. Note: New failures should not be suppressed +# using this feature. (Also see llvm.org/PR32730) +config.available_features.add('LIBCXX-WINDOWS-FIXME') + +import os, site +site.addsitedir(os.path.join('@LIBCXX_SOURCE_DIR@', 'utils')) +import libcxx.test.params, libcxx.test.newconfig +libcxx.test.newconfig.configure( + libcxx.test.params.DEFAULT_PARAMETERS, + libcxx.test.features.DEFAULT_FEATURES, + config, + lit_config +) diff --git a/libcxx/utils/ci/run-buildbot b/libcxx/utils/ci/run-buildbot index d1bff8e592aa..f27979dcb578 100755 --- a/libcxx/utils/ci/run-buildbot +++ b/libcxx/utils/ci/run-buildbot @@ -597,13 +597,15 @@ clang-cl-dll) # correctly when libc++ visibility attributes indicate dllimport linkage # anyway), thus just disable the experimental library. Remove this # setting when cmake and the test driver does the right thing automatically. - generate-cmake-libcxx-win -DLIBCXX_ENABLE_EXPERIMENTAL_LIBRARY=OFF + generate-cmake-libcxx-win -DLIBCXX_ENABLE_EXPERIMENTAL_LIBRARY=OFF \ + -DLIBCXX_TEST_CONFIG="llvm-libc++-shared-clangcl.cfg.in" echo "+++ Running the libc++ tests" ${NINJA} -vC "${BUILD_DIR}" check-cxx ;; clang-cl-static) clean - generate-cmake-libcxx-win -DLIBCXX_ENABLE_SHARED=OFF + generate-cmake-libcxx-win -DLIBCXX_ENABLE_SHARED=OFF \ + -DLIBCXX_TEST_CONFIG="llvm-libc++-static-clangcl.cfg.in" echo "+++ Running the libc++ tests" ${NINJA} -vC "${BUILD_DIR}" check-cxx ;; @@ -617,6 +619,7 @@ mingw-dll) generate-cmake \ -DCMAKE_C_COMPILER=x86_64-w64-mingw32-clang \ -DCMAKE_CXX_COMPILER=x86_64-w64-mingw32-clang++ \ + -DLIBCXX_TEST_CONFIG="llvm-libc++-mingw.cfg.in" \ -C "${MONOREPO_ROOT}/libcxx/cmake/caches/MinGW.cmake" echo "+++ Running the libc++ tests" ${NINJA} -vC "${BUILD_DIR}" check-cxx @@ -626,6 +629,7 @@ mingw-static) generate-cmake \ -DCMAKE_C_COMPILER=x86_64-w64-mingw32-clang \ -DCMAKE_CXX_COMPILER=x86_64-w64-mingw32-clang++ \ + -DLIBCXX_TEST_CONFIG="llvm-libc++-mingw.cfg.in" \ -C "${MONOREPO_ROOT}/libcxx/cmake/caches/MinGW.cmake" \ -DLIBCXX_ENABLE_SHARED=OFF \ -DLIBUNWIND_ENABLE_SHARED=OFF From a9bb97e8410b09446c8d6da3b7bb493f79a4aaea Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 22 Dec 2021 14:43:52 +0100 Subject: [PATCH 050/293] [msan] Break optimization in memccpy tests After D116148 the memccpy gets optimized away and the expected uninitialized memory access does not occur. Make sure the call does not get optimized away. --- compiler-rt/lib/msan/tests/msan_test.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/compiler-rt/lib/msan/tests/msan_test.cpp b/compiler-rt/lib/msan/tests/msan_test.cpp index 085e9040221e..00b736e93ade 100644 --- a/compiler-rt/lib/msan/tests/msan_test.cpp +++ b/compiler-rt/lib/msan/tests/msan_test.cpp @@ -1560,6 +1560,7 @@ TEST(MemorySanitizer, memccpy_nomatch_positive) { char* y = new char[5]; strcpy(x, "abc"); EXPECT_UMR(memccpy(y, x, 'd', 5)); + break_optimization(y); delete[] x; delete[] y; } @@ -1570,6 +1571,7 @@ TEST(MemorySanitizer, memccpy_match_positive) { x[0] = 'a'; x[2] = 'b'; EXPECT_UMR(memccpy(y, x, 'b', 5)); + break_optimization(y); delete[] x; delete[] y; } From 5eb271880c8fc59835797806ac44f736eaf3ddbd Mon Sep 17 00:00:00 2001 From: Alok Kumar Sharma Date: Wed, 22 Dec 2021 19:56:37 +0530 Subject: [PATCH 051/293] [clang][OpenMP][DebugInfo] Debug support for variables in shared clause of OpenMP task construct Currently variables appearing inside shared clause of OpenMP task construct are not visible inside lldb debugger. After the current patch, lldb is able to show the variable ``` * thread #1, name = 'a.out', stop reason = breakpoint 1.1 frame #0: 0x0000000000400934 a.out`.omp_task_entry. [inlined] .omp_outlined.(.global_tid.=0, .part_id.=0x000000000071f0d0, .privates.=0x000000000071f0e8, .copy_fn.=(a.out`.omp_task_privates_map. at testshared.cxx:8), .task_t.=0x000000000071f0c0, __context=0x000000000071f0f0) at testshared.cxx:10:34 7 else { 8 #pragma omp task shared(svar) firstprivate(n) 9 { -> 10 printf("Task svar = %d\n", svar); 11 printf("Task n = %d\n", n); 12 svar = fib(n - 1); 13 } (lldb) p svar (int) $0 = 9 ``` Reviewed By: djtodoro Differential Revision: https://reviews.llvm.org/D115510 --- clang/lib/CodeGen/CGStmtOpenMP.cpp | 50 ++++++++++++++++++++++++ clang/lib/CodeGen/CodeGenFunction.h | 5 +++ clang/test/OpenMP/debug_task_shared.c | 55 +++++++++++++++++++++++++++ 3 files changed, 110 insertions(+) create mode 100644 clang/test/OpenMP/debug_task_shared.c diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 2509de486671..5677fce355d5 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -24,10 +24,13 @@ #include "clang/AST/StmtVisitor.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/PrettyStackTrace.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/AtomicOrdering.h" using namespace clang; using namespace CodeGen; @@ -4431,6 +4434,53 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( UntiedLocalVars; // Set proper addresses for generated private copies. OMPPrivateScope Scope(CGF); + // Generate debug info for variables present in shared clause. + if (auto *DI = CGF.getDebugInfo()) { + llvm::SmallDenseMap CaptureFields = + CGF.CapturedStmtInfo->getCaptureFields(); + llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue(); + if (CaptureFields.size() && ContextValue) { + unsigned CharWidth = CGF.getContext().getCharWidth(); + // The shared variables are packed together as members of structure. + // So the address of each shared variable can be computed by adding + // offset of it (within record) to the base address of record. For each + // shared variable, debug intrinsic llvm.dbg.declare is generated with + // appropriate expressions (DIExpression). + // Ex: + // %12 = load %struct.anon*, %struct.anon** %__context.addr.i + // call void @llvm.dbg.declare(metadata %struct.anon* %12, + // metadata !svar1, + // metadata !DIExpression(DW_OP_deref)) + // call void @llvm.dbg.declare(metadata %struct.anon* %12, + // metadata !svar2, + // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref)) + for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) { + const VarDecl *SharedVar = It->first; + RecordDecl *CaptureRecord = It->second->getParent(); + const ASTRecordLayout &Layout = + CGF.getContext().getASTRecordLayout(CaptureRecord); + unsigned Offset = + Layout.getFieldOffset(It->second->getFieldIndex()) / CharWidth; + (void)DI->EmitDeclareOfAutoVariable(SharedVar, ContextValue, + CGF.Builder, false); + llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back(); + // Get the call dbg.declare instruction we just created and update + // its DIExpression to add offset to base address. + if (auto DDI = dyn_cast(&Last)) { + SmallVector Ops; + // Add offset to the base address if non zero. + if (Offset) { + Ops.push_back(llvm::dwarf::DW_OP_plus_uconst); + Ops.push_back(Offset); + } + Ops.push_back(llvm::dwarf::DW_OP_deref); + auto &Ctx = DDI->getContext(); + llvm::DIExpression *DIExpr = llvm::DIExpression::get(Ctx, Ops); + Last.setOperand(2, llvm::MetadataAsValue::get(Ctx, DIExpr)); + } + } + } + } llvm::SmallVector, 16> FirstprivatePtrs; if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 481ec2d606b1..f76ce8a6400d 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -459,6 +459,11 @@ class CodeGenFunction : public CodeGenTypeCache { /// Get the name of the capture helper. virtual StringRef getHelperName() const { return "__captured_stmt"; } + /// Get the CaptureFields + llvm::SmallDenseMap getCaptureFields() { + return CaptureFields; + } + private: /// The kind of captured statement being generated. CapturedRegionKind Kind; diff --git a/clang/test/OpenMP/debug_task_shared.c b/clang/test/OpenMP/debug_task_shared.c new file mode 100644 index 000000000000..c3a23bc48632 --- /dev/null +++ b/clang/test/OpenMP/debug_task_shared.c @@ -0,0 +1,55 @@ +// This testcase checks emission of debug info for variables +// inside shared clause of task construct. + +// REQUIRES: x86_64-linux + +// RUN: %clang_cc1 -debug-info-kind=constructor -DSHARED -x c -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK +// RUN: %clang_cc1 -debug-info-kind=constructor -x c -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=NEG +// expected-no-diagnostics + +// CHECK-LABEL: define internal i32 @.omp_task_entry. + +// CHECK-DAG: [[CONTEXT:%[0-9]+]] = load %struct.anon*, %struct.anon** %__context.addr.i, align 8 +// CHECK-DAG: call void @llvm.dbg.declare(metadata %struct.anon* [[CONTEXT]], metadata [[SHARE2:![0-9]+]], metadata !DIExpression(DW_OP_deref)) +// CHECK-DAG: call void @llvm.dbg.declare(metadata %struct.anon* [[CONTEXT]], metadata [[SHARE3:![0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref)) +// CHECK-DAG: call void @llvm.dbg.declare(metadata %struct.anon* [[CONTEXT]], metadata [[SHARE1:![0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 16, DW_OP_deref)) + +// CHECK-DAG: [[SHARE2]] = !DILocalVariable(name: "share2" +// CHECK-DAG: [[SHARE3]] = !DILocalVariable(name: "share3" +// CHECK-DAG: [[SHARE1]] = !DILocalVariable(name: "share1" + +// NEG-LABEL: define internal i32 @.omp_task_entry. +// NEG: [[CONTEXT:%[0-9]+]] = load %struct.anon*, %struct.anon** %__context.addr.i, align 8 +// NEG-NOT: call void @llvm.dbg.declare(metadata %struct.anon* [[CONTEXT]], metadata {{![0-9]+}}, metadata !DIExpression(DW_OP_deref)) + +extern int printf(const char *, ...); + +int foo(int n) { + int share1 = 9, share2 = 11, share3 = 13, priv1, priv2, fpriv; + fpriv = n + 4; + + if (n < 2) + return n; + else { +#if SHARED +#pragma omp task shared(share1, share2) private(priv1, priv2) firstprivate(fpriv) shared(share3) +#else +#pragma omp task private(priv1, priv2) firstprivate(fpriv) +#endif + { + priv1 = n; + priv2 = n + 2; + share2 += share3; + printf("share1 = %d, share2 = %d, share3 = %d\n", share1, share2, share3); + share1 = priv1 + priv2 + fpriv + foo(n - 1) + share2 + share3; + } +#pragma omp taskwait + return share1 + share2 + share3; + } +} + +int main() { + int n = 10; + printf("foo(%d) = %d\n", n, foo(n)); + return 0; +} From fb785877a95d5d51e802df618b22187b624a2b23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Sat, 18 Dec 2021 10:09:12 +0100 Subject: [PATCH 052/293] [lldb] [Process/FreeBSDKernel] Introduce libkvm support Introduce initial support for using libkvm on FreeBSD. The library can be used as an alternate implementation for processing kernel coredumps but it can also be used to access live kernel memory through specifying "/dev/mem" as the core file, i.e.: lldb --core /dev/mem /boot/kernel/kernel Differential Revision: https://reviews.llvm.org/D116005 --- .../Process/FreeBSDKernel/CMakeLists.txt | 12 +- .../FreeBSDKernel/ProcessFreeBSDKernel.cpp | 144 ++++++++++++++---- .../FreeBSDKernel/ProcessFreeBSDKernel.h | 11 +- .../FreeBSDKernel/TestFreeBSDKernelLive.py | 44 ++++++ 4 files changed, 173 insertions(+), 38 deletions(-) create mode 100644 lldb/test/API/functionalities/postmortem/FreeBSDKernel/TestFreeBSDKernelLive.py diff --git a/lldb/source/Plugins/Process/FreeBSDKernel/CMakeLists.txt b/lldb/source/Plugins/Process/FreeBSDKernel/CMakeLists.txt index ad388ee76b07..ca89ab0b6dc9 100644 --- a/lldb/source/Plugins/Process/FreeBSDKernel/CMakeLists.txt +++ b/lldb/source/Plugins/Process/FreeBSDKernel/CMakeLists.txt @@ -1,4 +1,12 @@ -if (NOT FBSDVMCore_FOUND) +set(FBSDKERNEL_LIBS) +if(FBSDVMCore_FOUND) + list(APPEND FBSDKERNEL_LIBS fbsdvmcore) +endif() +if(${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") + list(APPEND FBSDKERNEL_LIBS kvm) +endif() + +if (NOT FBSDKERNEL_LIBS) message(STATUS "Skipping FreeBSDKernel plugin due to missing libfbsdvmcore") return() endif() @@ -13,7 +21,7 @@ add_lldb_library(lldbPluginProcessFreeBSDKernel PLUGIN LINK_LIBS lldbCore lldbTarget - fbsdvmcore + ${FBSDKERNEL_LIBS} LINK_COMPONENTS Support ) diff --git a/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.cpp b/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.cpp index 327e25acd589..339d33d25110 100644 --- a/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.cpp +++ b/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.cpp @@ -10,42 +10,91 @@ #include "lldb/Core/PluginManager.h" #include "lldb/Target/DynamicLoader.h" +#include "Plugins/DynamicLoader/Static/DynamicLoaderStatic.h" #include "ProcessFreeBSDKernel.h" #include "ThreadFreeBSDKernel.h" -#include "Plugins/DynamicLoader/Static/DynamicLoaderStatic.h" +#if LLDB_ENABLE_FBSDVMCORE #include +#endif +#if defined(__FreeBSD__) +#include +#endif using namespace lldb; using namespace lldb_private; LLDB_PLUGIN_DEFINE(ProcessFreeBSDKernel) -ProcessFreeBSDKernel::ProcessFreeBSDKernel(lldb::TargetSP target_sp, - ListenerSP listener_sp, - const FileSpec &core_file, void *fvc) - : PostMortemProcess(target_sp, listener_sp), m_fvc(fvc) {} +namespace { -ProcessFreeBSDKernel::~ProcessFreeBSDKernel() { - if (m_fvc) - fvc_close(static_cast(m_fvc)); -} +#if LLDB_ENABLE_FBSDVMCORE +class ProcessFreeBSDKernelFVC : public ProcessFreeBSDKernel { +public: + ProcessFreeBSDKernelFVC(lldb::TargetSP target_sp, lldb::ListenerSP listener, + fvc_t *fvc); + + ~ProcessFreeBSDKernelFVC(); + + size_t DoReadMemory(lldb::addr_t addr, void *buf, size_t size, + lldb_private::Status &error) override; + +private: + fvc_t *m_fvc; + + const char *GetError(); +}; +#endif // LLDB_ENABLE_FBSDVMCORE + +#if defined(__FreeBSD__) +class ProcessFreeBSDKernelKVM : public ProcessFreeBSDKernel { +public: + ProcessFreeBSDKernelKVM(lldb::TargetSP target_sp, lldb::ListenerSP listener, + kvm_t *fvc); + + ~ProcessFreeBSDKernelKVM(); + + size_t DoReadMemory(lldb::addr_t addr, void *buf, size_t size, + lldb_private::Status &error) override; + +private: + kvm_t *m_kvm; + + const char *GetError(); +}; +#endif // defined(__FreeBSD__) + +} // namespace + +ProcessFreeBSDKernel::ProcessFreeBSDKernel(lldb::TargetSP target_sp, + ListenerSP listener_sp) + : PostMortemProcess(target_sp, listener_sp) {} lldb::ProcessSP ProcessFreeBSDKernel::CreateInstance(lldb::TargetSP target_sp, ListenerSP listener_sp, const FileSpec *crash_file, bool can_connect) { - lldb::ProcessSP process_sp; ModuleSP executable = target_sp->GetExecutableModule(); if (crash_file && !can_connect && executable) { - fvc_t *fvc = fvc_open( - executable->GetFileSpec().GetPath().c_str(), - crash_file->GetPath().c_str(), nullptr, nullptr, nullptr); +#if LLDB_ENABLE_FBSDVMCORE + fvc_t *fvc = + fvc_open(executable->GetFileSpec().GetPath().c_str(), + crash_file->GetPath().c_str(), nullptr, nullptr, nullptr); if (fvc) - process_sp = std::make_shared( - target_sp, listener_sp, *crash_file, fvc); + return std::make_shared(target_sp, listener_sp, + fvc); +#endif + +#if defined(__FreeBSD__) + kvm_t *kvm = + kvm_open2(executable->GetFileSpec().GetPath().c_str(), + crash_file->GetPath().c_str(), O_RDONLY, nullptr, nullptr); + if (kvm) + return std::make_shared(target_sp, listener_sp, + kvm); +#endif } - return process_sp; + return nullptr; } void ProcessFreeBSDKernel::Initialize() { @@ -107,20 +156,63 @@ Status ProcessFreeBSDKernel::DoLoadCore() { return Status(); } -size_t ProcessFreeBSDKernel::DoReadMemory(lldb::addr_t addr, void *buf, - size_t size, Status &error) { - ssize_t rd = fvc_read(static_cast(m_fvc), addr, buf, size); +DynamicLoader *ProcessFreeBSDKernel::GetDynamicLoader() { + if (m_dyld_up.get() == nullptr) + m_dyld_up.reset(DynamicLoader::FindPlugin( + this, DynamicLoaderStatic::GetPluginNameStatic())); + return m_dyld_up.get(); +} + +#if LLDB_ENABLE_FBSDVMCORE + +ProcessFreeBSDKernelFVC::ProcessFreeBSDKernelFVC(lldb::TargetSP target_sp, + ListenerSP listener_sp, + fvc_t *fvc) + : ProcessFreeBSDKernel(target_sp, listener_sp), m_fvc(fvc) {} + +ProcessFreeBSDKernelFVC::~ProcessFreeBSDKernelFVC() { + if (m_fvc) + fvc_close(m_fvc); +} + +size_t ProcessFreeBSDKernelFVC::DoReadMemory(lldb::addr_t addr, void *buf, + size_t size, Status &error) { + ssize_t rd = 0; + rd = fvc_read(m_fvc, addr, buf, size); if (rd < 0 || static_cast(rd) != size) { - error.SetErrorStringWithFormat("Reading memory failed: %s", - fvc_geterr(static_cast(m_fvc))); + error.SetErrorStringWithFormat("Reading memory failed: %s", GetError()); return rd > 0 ? rd : 0; } return rd; } -DynamicLoader *ProcessFreeBSDKernel::GetDynamicLoader() { - if (m_dyld_up.get() == nullptr) - m_dyld_up.reset(DynamicLoader::FindPlugin( - this, DynamicLoaderStatic::GetPluginNameStatic())); - return m_dyld_up.get(); +const char *ProcessFreeBSDKernelFVC::GetError() { return fvc_geterr(m_fvc); } + +#endif // LLDB_ENABLE_FBSDVMCORE + +#if defined(__FreeBSD__) + +ProcessFreeBSDKernelKVM::ProcessFreeBSDKernelKVM(lldb::TargetSP target_sp, + ListenerSP listener_sp, + kvm_t *fvc) + : ProcessFreeBSDKernel(target_sp, listener_sp), m_kvm(fvc) {} + +ProcessFreeBSDKernelKVM::~ProcessFreeBSDKernelKVM() { + if (m_kvm) + kvm_close(m_kvm); } + +size_t ProcessFreeBSDKernelKVM::DoReadMemory(lldb::addr_t addr, void *buf, + size_t size, Status &error) { + ssize_t rd = 0; + rd = kvm_read2(m_kvm, addr, buf, size); + if (rd < 0 || static_cast(rd) != size) { + error.SetErrorStringWithFormat("Reading memory failed: %s", GetError()); + return rd > 0 ? rd : 0; + } + return rd; +} + +const char *ProcessFreeBSDKernelKVM::GetError() { return kvm_geterr(m_kvm); } + +#endif // defined(__FreeBSD__) diff --git a/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.h b/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.h index cd10728463b5..558eec5403db 100644 --- a/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.h +++ b/lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.h @@ -13,10 +13,7 @@ class ProcessFreeBSDKernel : public lldb_private::PostMortemProcess { public: - ProcessFreeBSDKernel(lldb::TargetSP target_sp, lldb::ListenerSP listener, - const lldb_private::FileSpec &core_file, void *fvc); - - ~ProcessFreeBSDKernel() override; + ProcessFreeBSDKernel(lldb::TargetSP target_sp, lldb::ListenerSP listener); static lldb::ProcessSP CreateInstance(lldb::TargetSP target_sp, lldb::ListenerSP listener, @@ -44,17 +41,11 @@ class ProcessFreeBSDKernel : public lldb_private::PostMortemProcess { lldb_private::Status DoLoadCore() override; - size_t DoReadMemory(lldb::addr_t addr, void *buf, size_t size, - lldb_private::Status &error) override; - lldb_private::DynamicLoader *GetDynamicLoader() override; protected: bool DoUpdateThreadList(lldb_private::ThreadList &old_thread_list, lldb_private::ThreadList &new_thread_list) override; - -private: - void *m_fvc; }; #endif // LLDB_SOURCE_PLUGINS_PROCESS_FREEBSDKERNEL_PROCESSFREEBSDKERNEL_H diff --git a/lldb/test/API/functionalities/postmortem/FreeBSDKernel/TestFreeBSDKernelLive.py b/lldb/test/API/functionalities/postmortem/FreeBSDKernel/TestFreeBSDKernelLive.py new file mode 100644 index 000000000000..98c87461a608 --- /dev/null +++ b/lldb/test/API/functionalities/postmortem/FreeBSDKernel/TestFreeBSDKernelLive.py @@ -0,0 +1,44 @@ +import os +import struct +import subprocess + +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class FreeBSDKernelVMCoreTestCase(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + mydir = TestBase.compute_mydir(__file__) + + def test_mem(self): + kernel_exec = "/boot/kernel/kernel" + mem_device = "/dev/mem" + + if not os.access(kernel_exec, os.R_OK): + self.skipTest("Kernel @ %s is not readable" % (kernel_exec,)) + if not os.access(mem_device, os.R_OK): + self.skipTest("Memory @ %s is not readable" % (mem_device,)) + + target = self.dbg.CreateTarget(kernel_exec) + process = target.LoadCore(mem_device) + hz_value = int(subprocess.check_output(["sysctl", "-n", "kern.hz"])) + + self.assertTrue(process, PROCESS_IS_VALID) + self.assertEqual(process.GetNumThreads(), 1) + self.assertEqual(process.GetProcessID(), 0) + + # test memory reading + self.expect("expr -- *(int *) &hz", + substrs=["(int) $0 = %d" % hz_value]) + + main_mod = target.GetModuleAtIndex(0) + hz_addr = (main_mod.FindSymbols("hz")[0].symbol.addr + .GetLoadAddress(target)) + error = lldb.SBError() + self.assertEqual(process.ReadMemory(hz_addr, 4, error), + struct.pack(" Date: Wed, 22 Dec 2021 18:32:52 +0530 Subject: [PATCH 053/293] [AMDGPU] Move call clobbered return address registers s[30:31] to callee saved range Currently the return address ABI registers s[30:31], which fall in the call clobbered register range, are added as a live-in on the function entry to preserve its value when we have calls so that it gets saved and restored around the calls. But the DWARF unwind information (CFI) needs to track where the return address resides in a frame and the above approach makes it difficult to track the return address when the CFI information is emitted during the frame lowering, due to the involvment of understanding the control flow. This patch moves the return address ABI registers s[30:31] into callee saved registers range and stops adding live-in for return address registers, so that the CFI machinery will know where the return address resides when CSR save/restore happen during the frame lowering. And doing the above poses an issue that now the return instruction uses undefined register `sgpr30_sgpr31`. This is resolved by hiding the return address register use by the return instruction through the `SI_RETURN` pseudo instruction, which doesn't take any input operands, until the `SI_RETURN` pseudo gets lowered to the `S_SETPC_B64_return` during the `expandPostRAPseudo()`. As an added benefit, this patch simplifies overall return instruction handling. Note: The AMDGPU CFI changes are there only in the downstream code and another version of this patch will be posted for review for the downstream code. Reviewed By: arsenm Differential Revision: https://reviews.llvm.org/D114652 --- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 38 +- llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td | 8 +- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 1 - llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 3 - llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td | 6 +- llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp | 3 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 46 +- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 2 +- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 17 + llvm/lib/Target/AMDGPU/SIInstructions.td | 2 +- llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp | 13 +- llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 17 - llvm/lib/Target/AMDGPU/SOPInstructions.td | 7 +- .../combine-fma-add-mul-post-legalize.mir | 456 +-- .../combine-fma-add-mul-pre-legalize.mir | 456 +-- .../combine-shift-imm-chain-illegal-types.mir | 22 +- .../CodeGen/AMDGPU/GlobalISel/dummy-target.ll | 92 +- .../CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll | 6 +- .../CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll | 105 +- llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll | 14 +- llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll | 22 +- llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll | 34 +- .../AMDGPU/GlobalISel/function-returns.ll | 1771 +++++------ .../CodeGen/AMDGPU/GlobalISel/global-value.ll | 40 +- .../GlobalISel/irtranslator-atomicrmw.ll | 16 +- .../irtranslator-call-abi-attribute-hints.ll | 86 +- .../irtranslator-call-implicit-args.ll | 176 +- .../GlobalISel/irtranslator-call-non-fixed.ll | 50 +- .../irtranslator-call-return-values.ll | 80 +- .../AMDGPU/GlobalISel/irtranslator-call.ll | 418 ++- .../GlobalISel/irtranslator-constantexpr.ll | 18 +- .../GlobalISel/irtranslator-constrained-fp.ll | 116 +- ...translator-fixed-function-abi-vgpr-args.ll | 6 +- .../GlobalISel/irtranslator-function-args.ll | 704 ++--- .../GlobalISel/irtranslator-getelementptr.ll | 64 +- .../GlobalISel/irtranslator-indirect-call.ll | 10 +- .../GlobalISel/irtranslator-inline-asm.ll | 180 +- .../irtranslator-memory-intrinsics.ll | 90 +- .../GlobalISel/irtranslator-metadata.ll | 5 +- .../AMDGPU/GlobalISel/irtranslator-ptrmask.ll | 48 +- .../AMDGPU/GlobalISel/irtranslator-sat.ll | 144 +- .../GlobalISel/irtranslator-sibling-call.ll | 493 ++- .../GlobalISel/irtranslator-tail-call.ll | 39 +- .../AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll | 60 +- .../AMDGPU/GlobalISel/load-unaligned.ll | 28 +- .../GlobalISel/regbankcombiner-smed3.mir | 120 +- .../GlobalISel/regbankcombiner-umed3.mir | 120 +- .../CodeGen/AMDGPU/GlobalISel/roundeven.ll | 10 +- .../test/CodeGen/AMDGPU/GlobalISel/saddsat.ll | 22 +- .../test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll | 22 +- .../test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll | 22 +- .../test/CodeGen/AMDGPU/GlobalISel/usubsat.ll | 22 +- .../abi-attribute-hints-undefined-behavior.ll | 10 +- llvm/test/CodeGen/AMDGPU/amdpal-callable.ll | 10 +- .../test/CodeGen/AMDGPU/branch-relax-spill.ll | 294 +- .../AMDGPU/call-graph-register-usage.ll | 5 +- .../AMDGPU/call-preserved-registers.ll | 29 +- .../test/CodeGen/AMDGPU/callee-frame-setup.ll | 73 +- .../callee-special-input-sgprs-fixed-abi.ll | 2 +- .../AMDGPU/cross-block-use-is-not-abi-copy.ll | 40 +- llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll | 74 +- .../AMDGPU/fix-frame-ptr-reg-copy-livein.ll | 10 +- llvm/test/CodeGen/AMDGPU/fpow.ll | 48 +- ...frame-setup-without-sgpr-to-vgpr-spills.ll | 41 +- llvm/test/CodeGen/AMDGPU/fshr.ll | 16 +- .../AMDGPU/gfx-callable-argument-types.ll | 2810 +++++++++-------- .../gfx-callable-preserved-registers.ll | 224 +- .../AMDGPU/gfx-callable-return-types.ll | 298 +- llvm/test/CodeGen/AMDGPU/indirect-call.ll | 1796 +++++------ llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll | 24 +- llvm/test/CodeGen/AMDGPU/ipra.ll | 25 +- .../AMDGPU/llvm.amdgcn.implicitarg.ptr.ll | 4 + llvm/test/CodeGen/AMDGPU/llvm.powi.ll | 12 +- .../CodeGen/AMDGPU/mul24-pass-ordering.ll | 22 +- .../AMDGPU/need-fp-from-vgpr-spills.ll | 8 +- llvm/test/CodeGen/AMDGPU/nested-calls.ll | 6 +- .../CodeGen/AMDGPU/no-remat-indirect-mov.mir | 4 +- .../AMDGPU/no-source-locations-in-prologue.ll | 8 +- .../AMDGPU/reserve-vgpr-for-sgpr-spill.ll | 8 +- llvm/test/CodeGen/AMDGPU/save-fp.ll | 8 +- llvm/test/CodeGen/AMDGPU/sibling-call.ll | 16 +- .../CodeGen/AMDGPU/splitkit-copy-bundle.mir | 8 +- llvm/test/CodeGen/AMDGPU/stack-realign.ll | 7 +- llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll | 8 +- llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll | 8 +- llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll | 8 +- .../CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll | 25 +- llvm/test/CodeGen/AMDGPU/udiv.ll | 72 +- llvm/test/CodeGen/AMDGPU/udiv64.ll | 46 +- .../AMDGPU/unstructured-cfg-def-use-issue.ll | 44 +- .../AMDGPU/urem-seteq-illegal-types.ll | 15 +- llvm/test/CodeGen/AMDGPU/urem64.ll | 6 +- .../CodeGen/AMDGPU/vgpr-tuple-allocation.ll | 34 +- llvm/test/CodeGen/AMDGPU/wave32.ll | 4 +- .../test/CodeGen/AMDGPU/wwm-reserved-spill.ll | 191 +- .../CodeGen/MIR/AMDGPU/machine-metadata.mir | 24 +- .../CodeGen/MIR/AMDGPU/stack-id-assert.mir | 11 +- 97 files changed, 5826 insertions(+), 6960 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 2f1e7823f65c..8041e2993c11 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -337,7 +337,6 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val, FunctionLoweringInfo &FLI) const { MachineFunction &MF = B.getMF(); - MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *MFI = MF.getInfo(); MFI->setIfReturnsVoid(!Val); @@ -353,40 +352,15 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val, return true; } - auto const &ST = MF.getSubtarget(); - - unsigned ReturnOpc = 0; - if (IsShader) - ReturnOpc = AMDGPU::SI_RETURN_TO_EPILOG; - else if (CC == CallingConv::AMDGPU_Gfx) - ReturnOpc = AMDGPU::S_SETPC_B64_return_gfx; - else - ReturnOpc = AMDGPU::S_SETPC_B64_return; - + unsigned ReturnOpc = + IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::SI_RETURN; auto Ret = B.buildInstrNoInsert(ReturnOpc); - Register ReturnAddrVReg; - if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { - ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass); - Ret.addUse(ReturnAddrVReg); - } else if (ReturnOpc == AMDGPU::S_SETPC_B64_return_gfx) { - ReturnAddrVReg = - MRI.createVirtualRegister(&AMDGPU::Gfx_CCR_SGPR_64RegClass); - Ret.addUse(ReturnAddrVReg); - } if (!FLI.CanLowerReturn) insertSRetStores(B, Val->getType(), VRegs, FLI.DemoteRegister); else if (!lowerReturnVal(B, Val, VRegs, Ret)) return false; - if (ReturnOpc == AMDGPU::S_SETPC_B64_return || - ReturnOpc == AMDGPU::S_SETPC_B64_return_gfx) { - const SIRegisterInfo *TRI = ST.getRegisterInfo(); - Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF), - &AMDGPU::SGPR_64RegClass); - B.buildCopy(ReturnAddrVReg, LiveInReturn); - } - // TODO: Handle CalleeSavedRegsViaCopy. B.insertInstr(Ret); @@ -601,14 +575,6 @@ bool AMDGPUCallLowering::lowerFormalArguments( SmallVector ArgLocs; CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext()); - if (!IsEntryFunc) { - Register ReturnAddrReg = TRI->getReturnAddressReg(MF); - Register LiveInReturn = MF.addLiveIn(ReturnAddrReg, - &AMDGPU::SGPR_64RegClass); - MBB.addLiveIn(ReturnAddrReg); - B.buildCopy(LiveInReturn, ReturnAddrReg); - } - if (Info->hasImplicitBufferPtr()) { Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td index 1682d43ae671..dec227777d8f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -152,6 +152,10 @@ def CSR_AMDGPU_AGPRs_32_255 : CalleeSavedRegs< (sequence "AGPR%u", 32, 255) >; +def CSR_AMDGPU_SGPRs_30_31 : CalleeSavedRegs< + (sequence "SGPR%u", 30, 31) +>; + def CSR_AMDGPU_SGPRs_32_105 : CalleeSavedRegs< (sequence "SGPR%u", 32, 105) >; @@ -182,7 +186,7 @@ def CSR_AMDGPU_AllAllocatableSRegs : CalleeSavedRegs< >; def CSR_AMDGPU_HighRegs : CalleeSavedRegs< - (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SGPRs_32_105) + (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SGPRs_30_31, CSR_AMDGPU_SGPRs_32_105) >; def CSR_AMDGPU_HighRegs_With_AGPRs : CalleeSavedRegs< @@ -190,7 +194,7 @@ def CSR_AMDGPU_HighRegs_With_AGPRs : CalleeSavedRegs< >; def CSR_AMDGPU_SI_Gfx : CalleeSavedRegs< - (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SI_Gfx_SGPRs_4_29, CSR_AMDGPU_SI_Gfx_SGPRs_64_105) + (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SI_Gfx_SGPRs_4_29, CSR_AMDGPU_SGPRs_30_31, CSR_AMDGPU_SI_Gfx_SGPRs_64_105) >; def CSR_AMDGPU_SI_Gfx_With_AGPRs : CalleeSavedRegs< diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 54177564afbc..d662bb1d9af9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4398,7 +4398,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(TC_RETURN) NODE_NAME_CASE(TRAP) NODE_NAME_CASE(RET_FLAG) - NODE_NAME_CASE(RET_GFX_FLAG) NODE_NAME_CASE(RETURN_TO_EPILOG) NODE_NAME_CASE(ENDPGM) NODE_NAME_CASE(DWORDADDR) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index daaca8737c5d..3dee2922501d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -367,9 +367,6 @@ enum NodeType : unsigned { // Return with values from a non-entry function. RET_FLAG, - // Return with values from a non-entry function (AMDGPU_Gfx CC). - RET_GFX_FLAG, - DWORDADDR, FRACT, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 391dc8428539..23b8fcf75f16 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -355,11 +355,7 @@ def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone, def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, - [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] ->; - -def AMDGPUret_gfx_flag : SDNode<"AMDGPUISD::RET_GFX_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, +def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] >; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index 3fad7e192195..d05536ca3894 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -120,8 +120,7 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { // FIXME: Should be able to handle this with emitPseudoExpansionLowering. We // need to select it to the subtarget specific version, and there's no way to // do that with a single pseudo source operation. - if (Opcode == AMDGPU::S_SETPC_B64_return || - Opcode == AMDGPU::S_SETPC_B64_return_gfx) + if (Opcode == AMDGPU::S_SETPC_B64_return) Opcode = AMDGPU::S_SETPC_B64; else if (Opcode == AMDGPU::SI_CALL) { // SI_CALL is just S_SWAPPC_B64 with an additional operand to track the diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 9f138136e6e9..77937c48a9f1 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2618,24 +2618,6 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) - // Add return address for callable functions. - if (!Info->isEntryFunction()) { - const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo(); - SDValue ReturnAddrReg = CreateLiveInRegister( - DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64); - - SDValue ReturnAddrVirtualReg = - DAG.getRegister(MF.getRegInfo().createVirtualRegister( - CallConv != CallingConv::AMDGPU_Gfx - ? &AMDGPU::CCR_SGPR_64RegClass - : &AMDGPU::Gfx_CCR_SGPR_64RegClass), - MVT::i64); - Chain = - DAG.getCopyToReg(Chain, DL, ReturnAddrVirtualReg, ReturnAddrReg, Flag); - Flag = Chain.getValue(1); - RetOps.push_back(ReturnAddrVirtualReg); - } - // Copy the result values into the output registers. for (unsigned I = 0, RealRVLocIdx = 0, E = RVLocs.size(); I != E; ++I, ++RealRVLocIdx) { @@ -2692,15 +2674,8 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, RetOps.push_back(Flag); unsigned Opc = AMDGPUISD::ENDPGM; - if (!IsWaveEnd) { - if (IsShader) - Opc = AMDGPUISD::RETURN_TO_EPILOG; - else if (CallConv == CallingConv::AMDGPU_Gfx) - Opc = AMDGPUISD::RET_GFX_FLAG; - else - Opc = AMDGPUISD::RET_FLAG; - } - + if (!IsWaveEnd) + Opc = IsShader ? AMDGPUISD::RETURN_TO_EPILOG : AMDGPUISD::RET_FLAG; return DAG.getNode(Opc, DL, MVT::Other, RetOps); } @@ -3268,21 +3243,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, } - SDValue PhysReturnAddrReg; - if (IsTailCall) { - // Since the return is being combined with the call, we need to pass on the - // return address. - - const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo(); - SDValue ReturnAddrReg = CreateLiveInRegister( - DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64); - - PhysReturnAddrReg = DAG.getRegister(TRI->getReturnAddressReg(MF), - MVT::i64); - Chain = DAG.getCopyToReg(Chain, DL, PhysReturnAddrReg, ReturnAddrReg, InFlag); - InFlag = Chain.getValue(1); - } - // We don't usually want to end the call-sequence here because we would tidy // the frame up *after* the call, however in the ABI-changing tail-call case // we've carefully laid out the parameters so that when sp is reset they'll be @@ -3312,8 +3272,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, // this information must travel along with the operation for eventual // consumption by emitEpilogue. Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32)); - - Ops.push_back(PhysReturnAddrReg); } // Add argument registers to the end of the list so that they are known live diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 6fbe5d45ce0a..b1771e1ff084 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -955,8 +955,8 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore( // NOTE: this could be improved with knowledge of all call sites or // with knowledge of the called routines. if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG || + MI.getOpcode() == AMDGPU::SI_RETURN || MI.getOpcode() == AMDGPU::S_SETPC_B64_return || - MI.getOpcode() == AMDGPU::S_SETPC_B64_return_gfx || (MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) { Wait = Wait.combined(AMDGPU::Waitcnt::allZero(ST->hasVscnt())); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 4ce2c8a02194..92f6456f70e4 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2071,6 +2071,23 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MI.setDesc(get(ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64)); break; } + case AMDGPU::SI_RETURN: { + const MachineFunction *MF = MBB.getParent(); + const GCNSubtarget &ST = MF->getSubtarget(); + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + // Hiding the return address use with SI_RETURN may lead to extra kills in + // the function and missing live-ins. We are fine in practice because callee + // saved register handling ensures the register value is restored before + // RET, but we need the undef flag here to appease the MachineVerifier + // liveness checks. + MachineInstrBuilder MIB = + BuildMI(MBB, MI, DL, get(AMDGPU::S_SETPC_B64_return)) + .addReg(TRI->getReturnAddressReg(*MF), RegState::Undef); + + MIB.copyImplicitOps(MI); + MI.eraseFromParent(); + break; + } } return true; } diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 2bbce1d8ba93..929d7526d49f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -463,7 +463,7 @@ def SI_RETURN_TO_EPILOG : SPseudoInstSI < // Return for returning function calls. def SI_RETURN : SPseudoInstSI < - (outs), (ins), [], + (outs), (ins), [(AMDGPUret_flag)], "; return"> { let isTerminator = 1; let isBarrier = 1; diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index 55196fe334e6..ccbdefe034d6 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -79,6 +79,8 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock, const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + const GCNSubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo *RI = ST.getRegisterInfo(); MachineBasicBlock::iterator I = SaveBlock.begin(); if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { @@ -89,8 +91,8 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock, MCRegister Reg = CS.getReg(); MachineInstrSpan MIS(I, &SaveBlock); - const TargetRegisterClass *RC = - TRI->getMinimalPhysRegClass(Reg, MVT::i32); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( + Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); // If this value was already livein, we probably have a direct use of the // incoming register value, so don't kill at the spill point. This happens @@ -119,7 +121,8 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock, const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - + const GCNSubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo *RI = ST.getRegisterInfo(); // Restore all registers immediately before the return and any // terminators that precede it. MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator(); @@ -128,8 +131,8 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock, if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) { for (const CalleeSavedInfo &CI : reverse(CSI)) { unsigned Reg = CI.getReg(); - const TargetRegisterClass *RC = - TRI->getMinimalPhysRegClass(Reg, MVT::i32); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( + Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI); assert(I != RestoreBlock.begin() && diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 340e2b48e5cd..110cfeb09eb5 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -701,23 +701,6 @@ def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], let HasSGPR = 1; } -// CCR (call clobbered registers) SGPR 64-bit registers -def CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, - (add (trunc SGPR_64, 16))> { - let CopyCost = SGPR_64.CopyCost; - let AllocationPriority = SGPR_64.AllocationPriority; - let HasSGPR = 1; -} - -// Call clobbered 64-bit SGPRs for AMDGPU_Gfx CC -def Gfx_CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, - (add (trunc (shl SGPR_64, 15), 1), // s[30:31] - (trunc (shl SGPR_64, 18), 14))> { // s[36:37]-s[s62:63] - let CopyCost = SGPR_64.CopyCost; - let AllocationPriority = SGPR_64.AllocationPriority; - let HasSGPR = 1; -} - def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, (add TTMP_64Regs)> { let isAllocatable = 0; diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 1713586dcf5b..0ca679cd506c 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -152,8 +152,8 @@ class SOP1_64_0 pattern=[]> : SOP1_Pseudo < } // 64-bit input, no output -class SOP1_1 pattern=[]> : SOP1_Pseudo < - opName, (outs), (ins rc:$src0), "$src0", pattern> { +class SOP1_1 pattern=[]> : SOP1_Pseudo < + opName, (outs), (ins SReg_64:$src0), "$src0", pattern> { let has_sdst = 0; } @@ -300,8 +300,7 @@ def S_SETPC_B64 : SOP1_1 <"s_setpc_b64">; let isReturn = 1 in { // Define variant marked as return rather than branch. -def S_SETPC_B64_return : SOP1_1<"", CCR_SGPR_64, [(AMDGPUret_flag i64:$src0)]>; -def S_SETPC_B64_return_gfx : SOP1_1<"", Gfx_CCR_SGPR_64, [(AMDGPUret_gfx_flag i64:$src0)]>; +def S_SETPC_B64_return : SOP1_1<"">; } } // End isTerminator = 1, isBarrier = 1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir index 3c6c6523e100..0c48b0e3585d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir @@ -12,193 +12,157 @@ name: test_f32_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-LABEL: name: test_f32_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] ; GFX9: $vgpr0 = COPY [[FADD]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_f32_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] ; GFX9-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-LABEL: name: test_f32_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] ; GFX10: $vgpr0 = COPY [[FADD]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_f32_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] ; GFX10-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 - %3:sgpr_64 = COPY $sgpr30_sgpr31 %4:_(s32) = G_FMUL %0, %1 %5:_(s32) = G_FADD %4, %2 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- name: test_f32_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-LABEL: name: test_f32_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] ; GFX9: $vgpr0 = COPY [[FADD]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] ; GFX9-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-LABEL: name: test_f32_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] ; GFX10: $vgpr0 = COPY [[FADD]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] ; GFX10-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 - %3:sgpr_64 = COPY $sgpr30_sgpr31 %4:_(s32) = G_FMUL %0, %1 %5:_(s32) = G_FADD %2, %4 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- name: test_half_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-LABEL: name: test_half_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -207,13 +171,11 @@ body: | ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC2]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_half_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -221,12 +183,10 @@ body: | ; GFX9-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_half_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -234,13 +194,11 @@ body: | ; GFX9-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC2]] ; GFX9-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_half_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -248,12 +206,10 @@ body: | ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-LABEL: name: test_half_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -261,13 +217,11 @@ body: | ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC2]] ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_half_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -275,12 +229,10 @@ body: | ; GFX10-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_half_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -288,13 +240,11 @@ body: | ; GFX10-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC2]] ; GFX10-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_half_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -302,32 +252,28 @@ body: | ; GFX10-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %4:_(s32) = COPY $vgpr0 %0:_(s16) = G_TRUNC %4(s32) %5:_(s32) = COPY $vgpr1 %1:_(s16) = G_TRUNC %5(s32) %6:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %6(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %7:_(s16) = G_FMUL %0, %1 %8:_(s16) = G_FADD %7, %2 %10:_(s32) = G_ANYEXT %8(s16) $vgpr0 = COPY %10(s32) - %9:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %9, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- name: test_half_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-LABEL: name: test_half_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -336,13 +282,11 @@ body: | ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FMUL]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_half_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -350,12 +294,10 @@ body: | ; GFX9-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -363,13 +305,11 @@ body: | ; GFX9-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FMUL]] ; GFX9-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_half_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -377,12 +317,10 @@ body: | ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-LABEL: name: test_half_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -390,13 +328,11 @@ body: | ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FMUL]] ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_half_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -404,12 +340,10 @@ body: | ; GFX10-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -417,13 +351,11 @@ body: | ; GFX10-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FMUL]] ; GFX10-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_half_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -431,32 +363,28 @@ body: | ; GFX10-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %4:_(s32) = COPY $vgpr0 %0:_(s16) = G_TRUNC %4(s32) %5:_(s32) = COPY $vgpr1 %1:_(s16) = G_TRUNC %5(s32) %6:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %6(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %7:_(s16) = G_FMUL %0, %1 %8:_(s16) = G_FADD %2, %7 %10:_(s32) = G_ANYEXT %8(s16) $vgpr0 = COPY %10(s32) - %9:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %9, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- name: test_double_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-LABEL: name: test_double_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -468,14 +396,12 @@ body: | ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX9: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[MV2]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_double_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -486,13 +412,11 @@ body: | ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_double_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -503,14 +427,12 @@ body: | ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[MV2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_double_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -521,13 +443,11 @@ body: | ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_double_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -538,14 +458,12 @@ body: | ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX10: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[MV2]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) - ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_double_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -556,13 +474,11 @@ body: | ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_double_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -573,14 +489,12 @@ body: | ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[MV2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_double_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -591,13 +505,11 @@ body: | ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) @@ -607,21 +519,19 @@ body: | %8:_(s32) = COPY $vgpr4 %9:_(s32) = COPY $vgpr5 %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %10:_(s64) = G_FMUL %0, %1 %11:_(s64) = G_FADD %10, %2 %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) $vgpr0 = COPY %13(s32) $vgpr1 = COPY %14(s32) - %12:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... --- name: test_double_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-LABEL: name: test_double_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -633,14 +543,12 @@ body: | ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX9: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[MV2]], [[FMUL]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_double_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -651,13 +559,11 @@ body: | ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -668,14 +574,12 @@ body: | ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[MV2]], [[FMUL]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_double_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -686,13 +590,11 @@ body: | ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_double_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -703,14 +605,12 @@ body: | ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX10: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[MV2]], [[FMUL]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) - ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_double_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -721,13 +621,11 @@ body: | ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -738,14 +636,12 @@ body: | ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[MV2]], [[FMUL]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_double_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -756,13 +652,11 @@ body: | ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) @@ -772,21 +666,19 @@ body: | %8:_(s32) = COPY $vgpr4 %9:_(s32) = COPY $vgpr5 %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %10:_(s64) = G_FMUL %0, %1 %11:_(s64) = G_FADD %2, %10 %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) $vgpr0 = COPY %13(s32) $vgpr1 = COPY %14(s32) - %12:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... --- name: test_4xfloat_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX9-LABEL: name: test_4xfloat_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -804,7 +696,6 @@ body: | ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -812,8 +703,7 @@ body: | ; GFX9: $vgpr1 = COPY [[UV1]](s32) ; GFX9: $vgpr2 = COPY [[UV2]](s32) ; GFX9: $vgpr3 = COPY [[UV3]](s32) - ; GFX9: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX9: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX9-CONTRACT-LABEL: name: test_4xfloat_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -830,7 +720,6 @@ body: | ; GFX9-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-CONTRACT: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -838,8 +727,7 @@ body: | ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-CONTRACT: $vgpr2 = COPY [[UV2]](s32) ; GFX9-CONTRACT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-CONTRACT: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX9-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -856,7 +744,6 @@ body: | ; GFX9-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-DENORM: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -864,8 +751,7 @@ body: | ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX9-DENORM: $vgpr2 = COPY [[UV2]](s32) ; GFX9-DENORM: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-DENORM: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX9-UNSAFE-LABEL: name: test_4xfloat_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -882,7 +768,6 @@ body: | ; GFX9-UNSAFE: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9-UNSAFE: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-UNSAFE: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-UNSAFE: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -890,8 +775,7 @@ body: | ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX9-UNSAFE: $vgpr2 = COPY [[UV2]](s32) ; GFX9-UNSAFE: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-UNSAFE: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: test_4xfloat_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -908,7 +792,6 @@ body: | ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -916,8 +799,7 @@ body: | ; GFX10: $vgpr1 = COPY [[UV1]](s32) ; GFX10: $vgpr2 = COPY [[UV2]](s32) ; GFX10: $vgpr3 = COPY [[UV3]](s32) - ; GFX10: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX10: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-CONTRACT-LABEL: name: test_4xfloat_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -934,7 +816,6 @@ body: | ; GFX10-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-CONTRACT: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -942,8 +823,7 @@ body: | ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-CONTRACT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-CONTRACT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-CONTRACT: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -960,7 +840,6 @@ body: | ; GFX10-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-DENORM: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -968,8 +847,7 @@ body: | ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM: $vgpr2 = COPY [[UV2]](s32) ; GFX10-DENORM: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-DENORM: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-UNSAFE-LABEL: name: test_4xfloat_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -986,7 +864,6 @@ body: | ; GFX10-UNSAFE: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10-UNSAFE: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-UNSAFE: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-UNSAFE: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -994,8 +871,7 @@ body: | ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX10-UNSAFE: $vgpr2 = COPY [[UV2]](s32) ; GFX10-UNSAFE: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-UNSAFE: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -1011,7 +887,6 @@ body: | %14:_(s32) = COPY $vgpr10 %15:_(s32) = COPY $vgpr11 %2:_(<4 x s32>) = G_BUILD_VECTOR %12(s32), %13(s32), %14(s32), %15(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %16:_(<4 x s32>) = G_FMUL %0, %1 %17:_(<4 x s32>) = G_FADD %16, %2 %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32) = G_UNMERGE_VALUES %17(<4 x s32>) @@ -1019,15 +894,14 @@ body: | $vgpr1 = COPY %20(s32) $vgpr2 = COPY %21(s32) $vgpr3 = COPY %22(s32) - %18:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %18, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ... --- name: test_3xfloat_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -1042,15 +916,13 @@ body: | ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) ; GFX9: $vgpr2 = COPY [[UV2]](s32) - ; GFX9: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX9: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX9-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1064,15 +936,13 @@ body: | ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-CONTRACT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-CONTRACT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX9-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1086,15 +956,13 @@ body: | ; GFX9-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-DENORM: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX9-DENORM: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-DENORM: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX9-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1108,15 +976,13 @@ body: | ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9-UNSAFE: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-UNSAFE: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-UNSAFE: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX9-UNSAFE: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1130,15 +996,13 @@ body: | ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) ; GFX10: $vgpr2 = COPY [[UV2]](s32) - ; GFX10: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX10: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1152,15 +1016,13 @@ body: | ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-CONTRACT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-CONTRACT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1174,15 +1036,13 @@ body: | ; GFX10-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-DENORM: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-DENORM: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1196,15 +1056,13 @@ body: | ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10-UNSAFE: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-UNSAFE: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-UNSAFE: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX10-UNSAFE: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -1217,22 +1075,20 @@ body: | %11:_(s32) = COPY $vgpr7 %12:_(s32) = COPY $vgpr8 %2:_(<3 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32), %12(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %13:_(<3 x s32>) = G_FMUL %0, %1 %14:_(<3 x s32>) = G_FADD %2, %13 %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %14(<3 x s32>) $vgpr0 = COPY %16(s32) $vgpr1 = COPY %17(s32) $vgpr2 = COPY %18(s32) - %15:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %15, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ... --- name: test_4xhalf_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-LABEL: name: test_4xhalf_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -1244,14 +1100,12 @@ body: | ; GFX9: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX9: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_4xhalf_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1262,14 +1116,12 @@ body: | ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-CONTRACT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1280,14 +1132,12 @@ body: | ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-DENORM: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_4xhalf_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1298,14 +1148,12 @@ body: | ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-UNSAFE: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-UNSAFE: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX9-UNSAFE: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_4xhalf_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1316,14 +1164,12 @@ body: | ; GFX10: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX10: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX10: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_4xhalf_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1334,14 +1180,12 @@ body: | ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-CONTRACT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1352,14 +1196,12 @@ body: | ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-DENORM: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_4xhalf_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1370,14 +1212,12 @@ body: | ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-UNSAFE: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-UNSAFE: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX10-UNSAFE: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(<2 x s16>) = COPY $vgpr0 %5:_(<2 x s16>) = COPY $vgpr1 %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>) @@ -1387,21 +1227,19 @@ body: | %8:_(<2 x s16>) = COPY $vgpr4 %9:_(<2 x s16>) = COPY $vgpr5 %2:_(<4 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %10:_(<4 x s16>) = G_FMUL %0, %1 %11:_(<4 x s16>) = G_FADD %10, %2 %13:_(<2 x s16>), %14:_(<2 x s16>) = G_UNMERGE_VALUES %11(<4 x s16>) $vgpr0 = COPY %13(<2 x s16>) $vgpr1 = COPY %14(<2 x s16>) - %12:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... --- name: test_3xhalf_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -1423,7 +1261,6 @@ body: | ; GFX9: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX9: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX9: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1431,8 +1268,7 @@ body: | ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1453,7 +1289,6 @@ body: | ; GFX9-CONTRACT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9-CONTRACT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX9-CONTRACT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1461,8 +1296,7 @@ body: | ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1483,7 +1317,6 @@ body: | ; GFX9-DENORM: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9-DENORM: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX9-DENORM: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1491,8 +1324,7 @@ body: | ; GFX9-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1513,7 +1345,6 @@ body: | ; GFX9-UNSAFE: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9-UNSAFE: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX9-UNSAFE: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX9-UNSAFE: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1521,8 +1352,7 @@ body: | ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1543,7 +1373,6 @@ body: | ; GFX10: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX10: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX10: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1551,8 +1380,7 @@ body: | ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1573,7 +1401,6 @@ body: | ; GFX10-CONTRACT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10-CONTRACT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX10-CONTRACT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1581,8 +1408,7 @@ body: | ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1603,7 +1429,6 @@ body: | ; GFX10-DENORM: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10-DENORM: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX10-DENORM: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1611,8 +1436,7 @@ body: | ; GFX10-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1633,7 +1457,6 @@ body: | ; GFX10-UNSAFE: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10-UNSAFE: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX10-UNSAFE: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX10-UNSAFE: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1641,8 +1464,7 @@ body: | ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(<2 x s16>) = COPY $vgpr0 %5:_(<2 x s16>) = COPY $vgpr1 %10:_(<2 x s16>) = G_IMPLICIT_DEF @@ -1656,7 +1478,6 @@ body: | %9:_(<2 x s16>) = COPY $vgpr5 %15:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %10(<2 x s16>) %2:_(<3 x s16>), %16:_(<3 x s16>) = G_UNMERGE_VALUES %15(<6 x s16>) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %17:_(<3 x s16>) = G_FMUL %0, %1 %18:_(<3 x s16>) = G_FADD %2, %17 %22:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1664,15 +1485,14 @@ body: | %20:_(<2 x s16>), %21:_(<2 x s16>), %24:_(<2 x s16>) = G_UNMERGE_VALUES %23(<6 x s16>) $vgpr0 = COPY %20(<2 x s16>) $vgpr1 = COPY %21(<2 x s16>) - %19:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %19, implicit $vgpr0, implicit $vgpr1 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... --- name: test_4xdouble_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX9-LABEL: name: test_4xdouble_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -1714,7 +1534,6 @@ body: | ; GFX9: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1726,8 +1545,7 @@ body: | ; GFX9: $vgpr5 = COPY [[UV5]](s32) ; GFX9: $vgpr6 = COPY [[UV6]](s32) ; GFX9: $vgpr7 = COPY [[UV7]](s32) - ; GFX9: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX9: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX9-CONTRACT-LABEL: name: test_4xdouble_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1768,7 +1586,6 @@ body: | ; GFX9-CONTRACT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9-CONTRACT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-CONTRACT: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1780,8 +1597,7 @@ body: | ; GFX9-CONTRACT: $vgpr5 = COPY [[UV5]](s32) ; GFX9-CONTRACT: $vgpr6 = COPY [[UV6]](s32) ; GFX9-CONTRACT: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-CONTRACT: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX9-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1822,7 +1638,6 @@ body: | ; GFX9-DENORM: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9-DENORM: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-DENORM: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1834,8 +1649,7 @@ body: | ; GFX9-DENORM: $vgpr5 = COPY [[UV5]](s32) ; GFX9-DENORM: $vgpr6 = COPY [[UV6]](s32) ; GFX9-DENORM: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-DENORM: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX9-UNSAFE-LABEL: name: test_4xdouble_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1876,7 +1690,6 @@ body: | ; GFX9-UNSAFE: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9-UNSAFE: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-UNSAFE: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-UNSAFE: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1888,8 +1701,7 @@ body: | ; GFX9-UNSAFE: $vgpr5 = COPY [[UV5]](s32) ; GFX9-UNSAFE: $vgpr6 = COPY [[UV6]](s32) ; GFX9-UNSAFE: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-UNSAFE: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-LABEL: name: test_4xdouble_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1930,7 +1742,6 @@ body: | ; GFX10: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1942,8 +1753,7 @@ body: | ; GFX10: $vgpr5 = COPY [[UV5]](s32) ; GFX10: $vgpr6 = COPY [[UV6]](s32) ; GFX10: $vgpr7 = COPY [[UV7]](s32) - ; GFX10: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX10: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-CONTRACT-LABEL: name: test_4xdouble_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1984,7 +1794,6 @@ body: | ; GFX10-CONTRACT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10-CONTRACT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-CONTRACT: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1996,8 +1805,7 @@ body: | ; GFX10-CONTRACT: $vgpr5 = COPY [[UV5]](s32) ; GFX10-CONTRACT: $vgpr6 = COPY [[UV6]](s32) ; GFX10-CONTRACT: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-CONTRACT: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2038,7 +1846,6 @@ body: | ; GFX10-DENORM: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10-DENORM: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-DENORM: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -2050,8 +1857,7 @@ body: | ; GFX10-DENORM: $vgpr5 = COPY [[UV5]](s32) ; GFX10-DENORM: $vgpr6 = COPY [[UV6]](s32) ; GFX10-DENORM: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-DENORM: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-UNSAFE-LABEL: name: test_4xdouble_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2092,7 +1898,6 @@ body: | ; GFX10-UNSAFE: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10-UNSAFE: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-UNSAFE: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-UNSAFE: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -2104,8 +1909,7 @@ body: | ; GFX10-UNSAFE: $vgpr5 = COPY [[UV5]](s32) ; GFX10-UNSAFE: $vgpr6 = COPY [[UV6]](s32) ; GFX10-UNSAFE: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-UNSAFE: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -2145,7 +1949,6 @@ body: | %38:_(s64) = G_MERGE_VALUES %24(s32), %25(s32) %39:_(s64) = G_MERGE_VALUES %26(s32), %27(s32) %2:_(<4 x s64>) = G_BUILD_VECTOR %36(s64), %37(s64), %38(s64), %39(s64) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %40:_(<4 x s64>) = G_FMUL %0, %1 %41:_(<4 x s64>) = G_FADD %40, %2 %43:_(s32), %44:_(s32), %45:_(s32), %46:_(s32), %47:_(s32), %48:_(s32), %49:_(s32), %50:_(s32) = G_UNMERGE_VALUES %41(<4 x s64>) @@ -2157,15 +1960,14 @@ body: | $vgpr5 = COPY %48(s32) $vgpr6 = COPY %49(s32) $vgpr7 = COPY %50(s32) - %42:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %42, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ... --- name: test_3xdouble_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX9-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -2198,7 +2000,6 @@ body: | ; GFX9: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2208,8 +2009,7 @@ body: | ; GFX9: $vgpr3 = COPY [[UV3]](s32) ; GFX9: $vgpr4 = COPY [[UV4]](s32) ; GFX9: $vgpr5 = COPY [[UV5]](s32) - ; GFX9: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX9: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX9-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2241,7 +2041,6 @@ body: | ; GFX9-CONTRACT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9-CONTRACT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-CONTRACT: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2251,8 +2050,7 @@ body: | ; GFX9-CONTRACT: $vgpr3 = COPY [[UV3]](s32) ; GFX9-CONTRACT: $vgpr4 = COPY [[UV4]](s32) ; GFX9-CONTRACT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-CONTRACT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX9-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2284,7 +2082,6 @@ body: | ; GFX9-DENORM: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9-DENORM: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-DENORM: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2294,8 +2091,7 @@ body: | ; GFX9-DENORM: $vgpr3 = COPY [[UV3]](s32) ; GFX9-DENORM: $vgpr4 = COPY [[UV4]](s32) ; GFX9-DENORM: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-DENORM: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX9-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2327,7 +2123,6 @@ body: | ; GFX9-UNSAFE: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9-UNSAFE: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-UNSAFE: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-UNSAFE: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2337,8 +2132,7 @@ body: | ; GFX9-UNSAFE: $vgpr3 = COPY [[UV3]](s32) ; GFX9-UNSAFE: $vgpr4 = COPY [[UV4]](s32) ; GFX9-UNSAFE: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-UNSAFE: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2370,7 +2164,6 @@ body: | ; GFX10: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2380,8 +2173,7 @@ body: | ; GFX10: $vgpr3 = COPY [[UV3]](s32) ; GFX10: $vgpr4 = COPY [[UV4]](s32) ; GFX10: $vgpr5 = COPY [[UV5]](s32) - ; GFX10: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX10: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2413,7 +2205,6 @@ body: | ; GFX10-CONTRACT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10-CONTRACT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-CONTRACT: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2423,8 +2214,7 @@ body: | ; GFX10-CONTRACT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-CONTRACT: $vgpr4 = COPY [[UV4]](s32) ; GFX10-CONTRACT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-CONTRACT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2456,7 +2246,6 @@ body: | ; GFX10-DENORM: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10-DENORM: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-DENORM: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2466,8 +2255,7 @@ body: | ; GFX10-DENORM: $vgpr3 = COPY [[UV3]](s32) ; GFX10-DENORM: $vgpr4 = COPY [[UV4]](s32) ; GFX10-DENORM: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-DENORM: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2499,7 +2287,6 @@ body: | ; GFX10-UNSAFE: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10-UNSAFE: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-UNSAFE: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-UNSAFE: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2509,8 +2296,7 @@ body: | ; GFX10-UNSAFE: $vgpr3 = COPY [[UV3]](s32) ; GFX10-UNSAFE: $vgpr4 = COPY [[UV4]](s32) ; GFX10-UNSAFE: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-UNSAFE: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -2541,7 +2327,6 @@ body: | %29:_(s64) = G_MERGE_VALUES %18(s32), %19(s32) %30:_(s64) = G_MERGE_VALUES %20(s32), %21(s32) %2:_(<3 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %31:_(<3 x s64>) = G_FMUL %0, %1 %32:_(<3 x s64>) = G_FADD %2, %31 %34:_(s32), %35:_(s32), %36:_(s32), %37:_(s32), %38:_(s32), %39:_(s32) = G_UNMERGE_VALUES %32(<3 x s64>) @@ -2551,6 +2336,5 @@ body: | $vgpr3 = COPY %37(s32) $vgpr4 = COPY %38(s32) $vgpr5 = COPY %39(s32) - %33:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %33, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir index 67d0fd31db6b..d6ef87759e9d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir @@ -12,193 +12,157 @@ name: test_f32_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-LABEL: name: test_f32_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX9: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[FMUL]], [[COPY2]] ; GFX9: $vgpr0 = COPY [[FADD]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_f32_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[FMUL]], [[COPY2]] ; GFX9-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-LABEL: name: test_f32_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX10: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[FMUL]], [[COPY2]] ; GFX10: $vgpr0 = COPY [[FADD]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_f32_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[FMUL]], [[COPY2]] ; GFX10-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 - %3:sgpr_64 = COPY $sgpr30_sgpr31 %4:_(s32) = reassoc G_FMUL %0, %1 %5:_(s32) = reassoc G_FADD %4, %2 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- name: test_f32_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-LABEL: name: test_f32_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX9: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY2]], [[FMUL]] ; GFX9: $vgpr0 = COPY [[FADD]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY2]], [[FMUL]] ; GFX9-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-LABEL: name: test_f32_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX10: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY2]], [[FMUL]] ; GFX10: $vgpr0 = COPY [[FADD]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY2]], [[FMUL]] ; GFX10-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 - %3:sgpr_64 = COPY $sgpr30_sgpr31 %4:_(s32) = reassoc G_FMUL %0, %1 %5:_(s32) = reassoc G_FADD %2, %4 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- name: test_half_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-LABEL: name: test_half_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -207,13 +171,11 @@ body: | ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[FMUL]], [[TRUNC2]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_half_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -221,12 +183,10 @@ body: | ; GFX9-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_half_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -234,13 +194,11 @@ body: | ; GFX9-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[FMUL]], [[TRUNC2]] ; GFX9-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_half_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -248,12 +206,10 @@ body: | ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-LABEL: name: test_half_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -261,13 +217,11 @@ body: | ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[FMUL]], [[TRUNC2]] ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_half_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -275,12 +229,10 @@ body: | ; GFX10-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_half_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -288,13 +240,11 @@ body: | ; GFX10-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[FMUL]], [[TRUNC2]] ; GFX10-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_half_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -302,32 +252,28 @@ body: | ; GFX10-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %4:_(s32) = COPY $vgpr0 %0:_(s16) = G_TRUNC %4(s32) %5:_(s32) = COPY $vgpr1 %1:_(s16) = G_TRUNC %5(s32) %6:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %6(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %7:_(s16) = reassoc G_FMUL %0, %1 %8:_(s16) = reassoc G_FADD %7, %2 %10:_(s32) = G_ANYEXT %8(s16) $vgpr0 = COPY %10(s32) - %9:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %9, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- name: test_half_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-LABEL: name: test_half_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -336,13 +282,11 @@ body: | ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_half_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -350,12 +294,10 @@ body: | ; GFX9-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -363,13 +305,11 @@ body: | ; GFX9-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] ; GFX9-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_half_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -377,12 +317,10 @@ body: | ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-LABEL: name: test_half_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -390,13 +328,11 @@ body: | ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_half_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -404,12 +340,10 @@ body: | ; GFX10-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -417,13 +351,11 @@ body: | ; GFX10-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] ; GFX10-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_half_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -431,32 +363,28 @@ body: | ; GFX10-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %4:_(s32) = COPY $vgpr0 %0:_(s16) = G_TRUNC %4(s32) %5:_(s32) = COPY $vgpr1 %1:_(s16) = G_TRUNC %5(s32) %6:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %6(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %7:_(s16) = reassoc G_FMUL %0, %1 %8:_(s16) = reassoc G_FADD %2, %7 %10:_(s32) = G_ANYEXT %8(s16) $vgpr0 = COPY %10(s32) - %9:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %9, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- name: test_double_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-LABEL: name: test_double_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -468,14 +396,12 @@ body: | ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX9: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[FMUL]], [[MV2]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_double_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -486,13 +412,11 @@ body: | ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_double_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -503,14 +427,12 @@ body: | ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[FMUL]], [[MV2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_double_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -521,13 +443,11 @@ body: | ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_double_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -538,14 +458,12 @@ body: | ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX10: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[FMUL]], [[MV2]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) - ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_double_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -556,13 +474,11 @@ body: | ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_double_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -573,14 +489,12 @@ body: | ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[FMUL]], [[MV2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_double_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -591,13 +505,11 @@ body: | ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) @@ -607,21 +519,19 @@ body: | %8:_(s32) = COPY $vgpr4 %9:_(s32) = COPY $vgpr5 %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %10:_(s64) = reassoc G_FMUL %0, %1 %11:_(s64) = reassoc G_FADD %10, %2 %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) $vgpr0 = COPY %13(s32) $vgpr1 = COPY %14(s32) - %12:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... --- name: test_double_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-LABEL: name: test_double_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -633,14 +543,12 @@ body: | ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX9: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[MV2]], [[FMUL]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_double_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -651,13 +559,11 @@ body: | ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -668,14 +574,12 @@ body: | ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[MV2]], [[FMUL]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_double_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -686,13 +590,11 @@ body: | ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_double_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -703,14 +605,12 @@ body: | ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX10: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[MV2]], [[FMUL]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) - ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_double_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -721,13 +621,11 @@ body: | ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -738,14 +636,12 @@ body: | ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[MV2]], [[FMUL]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_double_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -756,13 +652,11 @@ body: | ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) @@ -772,21 +666,19 @@ body: | %8:_(s32) = COPY $vgpr4 %9:_(s32) = COPY $vgpr5 %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %10:_(s64) = reassoc G_FMUL %0, %1 %11:_(s64) = reassoc G_FADD %2, %10 %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) $vgpr0 = COPY %13(s32) $vgpr1 = COPY %14(s32) - %12:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... --- name: test_4xfloat_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX9-LABEL: name: test_4xfloat_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -804,7 +696,6 @@ body: | ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<4 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<4 x s32>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -812,8 +703,7 @@ body: | ; GFX9: $vgpr1 = COPY [[UV1]](s32) ; GFX9: $vgpr2 = COPY [[UV2]](s32) ; GFX9: $vgpr3 = COPY [[UV3]](s32) - ; GFX9: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX9: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX9-CONTRACT-LABEL: name: test_4xfloat_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -830,15 +720,13 @@ body: | ; GFX9-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-CONTRACT: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-CONTRACT: $vgpr2 = COPY [[UV2]](s32) ; GFX9-CONTRACT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-CONTRACT: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX9-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -855,7 +743,6 @@ body: | ; GFX9-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-DENORM: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<4 x s32>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -863,8 +750,7 @@ body: | ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX9-DENORM: $vgpr2 = COPY [[UV2]](s32) ; GFX9-DENORM: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-DENORM: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX9-UNSAFE-LABEL: name: test_4xfloat_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -881,15 +767,13 @@ body: | ; GFX9-UNSAFE: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9-UNSAFE: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-UNSAFE: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX9-UNSAFE: $vgpr2 = COPY [[UV2]](s32) ; GFX9-UNSAFE: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-UNSAFE: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: test_4xfloat_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -906,7 +790,6 @@ body: | ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<4 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<4 x s32>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -914,8 +797,7 @@ body: | ; GFX10: $vgpr1 = COPY [[UV1]](s32) ; GFX10: $vgpr2 = COPY [[UV2]](s32) ; GFX10: $vgpr3 = COPY [[UV3]](s32) - ; GFX10: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX10: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-CONTRACT-LABEL: name: test_4xfloat_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -932,15 +814,13 @@ body: | ; GFX10-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-CONTRACT: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-CONTRACT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-CONTRACT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-CONTRACT: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -957,7 +837,6 @@ body: | ; GFX10-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-DENORM: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<4 x s32>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -965,8 +844,7 @@ body: | ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM: $vgpr2 = COPY [[UV2]](s32) ; GFX10-DENORM: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-DENORM: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-UNSAFE-LABEL: name: test_4xfloat_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -983,15 +861,13 @@ body: | ; GFX10-UNSAFE: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10-UNSAFE: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-UNSAFE: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX10-UNSAFE: $vgpr2 = COPY [[UV2]](s32) ; GFX10-UNSAFE: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-UNSAFE: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -1007,7 +883,6 @@ body: | %14:_(s32) = COPY $vgpr10 %15:_(s32) = COPY $vgpr11 %2:_(<4 x s32>) = G_BUILD_VECTOR %12(s32), %13(s32), %14(s32), %15(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %16:_(<4 x s32>) = reassoc G_FMUL %0, %1 %17:_(<4 x s32>) = reassoc G_FADD %16, %2 %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32) = G_UNMERGE_VALUES %17(<4 x s32>) @@ -1015,15 +890,14 @@ body: | $vgpr1 = COPY %20(s32) $vgpr2 = COPY %21(s32) $vgpr3 = COPY %22(s32) - %18:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %18, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ... --- name: test_3xfloat_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -1038,15 +912,13 @@ body: | ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) ; GFX9: $vgpr2 = COPY [[UV2]](s32) - ; GFX9: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX9: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX9-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1060,14 +932,12 @@ body: | ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-CONTRACT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-CONTRACT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX9-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1081,15 +951,13 @@ body: | ; GFX9-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-DENORM: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX9-DENORM: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-DENORM: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX9-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1103,14 +971,12 @@ body: | ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9-UNSAFE: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-UNSAFE: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX9-UNSAFE: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1124,15 +990,13 @@ body: | ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) ; GFX10: $vgpr2 = COPY [[UV2]](s32) - ; GFX10: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX10: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1146,14 +1010,12 @@ body: | ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-CONTRACT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-CONTRACT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1167,15 +1029,13 @@ body: | ; GFX10-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-DENORM: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-DENORM: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1189,14 +1049,12 @@ body: | ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10-UNSAFE: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-UNSAFE: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX10-UNSAFE: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -1209,22 +1067,20 @@ body: | %11:_(s32) = COPY $vgpr7 %12:_(s32) = COPY $vgpr8 %2:_(<3 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32), %12(s32) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %13:_(<3 x s32>) = reassoc G_FMUL %0, %1 %14:_(<3 x s32>) = reassoc G_FADD %2, %13 %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %14(<3 x s32>) $vgpr0 = COPY %16(s32) $vgpr1 = COPY %17(s32) $vgpr2 = COPY %18(s32) - %15:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %15, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ... --- name: test_4xhalf_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-LABEL: name: test_4xhalf_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -1236,14 +1092,12 @@ body: | ; GFX9: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<4 x s16>) = reassoc G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX9: [[FADD:%[0-9]+]]:_(<4 x s16>) = reassoc G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_4xhalf_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1254,13 +1108,11 @@ body: | ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-CONTRACT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1271,14 +1123,12 @@ body: | ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-DENORM: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s16>) = reassoc G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<4 x s16>) = reassoc G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_4xhalf_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1289,13 +1139,11 @@ body: | ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-UNSAFE: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-UNSAFE: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_4xhalf_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1306,14 +1154,12 @@ body: | ; GFX10: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<4 x s16>) = reassoc G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX10: [[FADD:%[0-9]+]]:_(<4 x s16>) = reassoc G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX10: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_4xhalf_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1324,13 +1170,11 @@ body: | ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-CONTRACT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1341,14 +1185,12 @@ body: | ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-DENORM: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s16>) = reassoc G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<4 x s16>) = reassoc G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_4xhalf_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1359,13 +1201,11 @@ body: | ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-UNSAFE: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-UNSAFE: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(<2 x s16>) = COPY $vgpr0 %5:_(<2 x s16>) = COPY $vgpr1 %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>) @@ -1375,21 +1215,19 @@ body: | %8:_(<2 x s16>) = COPY $vgpr4 %9:_(<2 x s16>) = COPY $vgpr5 %2:_(<4 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %10:_(<4 x s16>) = reassoc G_FMUL %0, %1 %11:_(<4 x s16>) = reassoc G_FADD %10, %2 %13:_(<2 x s16>), %14:_(<2 x s16>) = G_UNMERGE_VALUES %11(<4 x s16>) $vgpr0 = COPY %13(<2 x s16>) $vgpr1 = COPY %14(<2 x s16>) - %12:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... --- name: test_3xhalf_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -1411,7 +1249,6 @@ body: | ; GFX9: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX9: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[BITCAST5]], [[FMUL]] ; GFX9: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1419,8 +1256,7 @@ body: | ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1441,15 +1277,13 @@ body: | ; GFX9-CONTRACT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9-CONTRACT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[BITCAST1]], [[BITCAST3]], [[BITCAST5]] ; GFX9-CONTRACT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX9-CONTRACT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1470,7 +1304,6 @@ body: | ; GFX9-DENORM: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9-DENORM: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[BITCAST5]], [[FMUL]] ; GFX9-DENORM: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1478,8 +1311,7 @@ body: | ; GFX9-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1500,15 +1332,13 @@ body: | ; GFX9-UNSAFE: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9-UNSAFE: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[BITCAST1]], [[BITCAST3]], [[BITCAST5]] ; GFX9-UNSAFE: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX9-UNSAFE: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1529,7 +1359,6 @@ body: | ; GFX10: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX10: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[BITCAST5]], [[FMUL]] ; GFX10: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1537,8 +1366,7 @@ body: | ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1559,15 +1387,13 @@ body: | ; GFX10-CONTRACT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10-CONTRACT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[BITCAST1]], [[BITCAST3]], [[BITCAST5]] ; GFX10-CONTRACT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX10-CONTRACT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1588,7 +1414,6 @@ body: | ; GFX10-DENORM: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10-DENORM: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[BITCAST5]], [[FMUL]] ; GFX10-DENORM: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1596,8 +1421,7 @@ body: | ; GFX10-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1618,15 +1442,13 @@ body: | ; GFX10-UNSAFE: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10-UNSAFE: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) - ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[BITCAST1]], [[BITCAST3]], [[BITCAST5]] ; GFX10-UNSAFE: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX10-UNSAFE: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(<2 x s16>) = COPY $vgpr0 %5:_(<2 x s16>) = COPY $vgpr1 %10:_(<2 x s16>) = G_IMPLICIT_DEF @@ -1640,7 +1462,6 @@ body: | %9:_(<2 x s16>) = COPY $vgpr5 %15:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %10(<2 x s16>) %2:_(<3 x s16>), %16:_(<3 x s16>) = G_UNMERGE_VALUES %15(<6 x s16>) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %17:_(<3 x s16>) = reassoc G_FMUL %0, %1 %18:_(<3 x s16>) = reassoc G_FADD %2, %17 %22:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1648,15 +1469,14 @@ body: | %20:_(<2 x s16>), %21:_(<2 x s16>), %24:_(<2 x s16>) = G_UNMERGE_VALUES %23(<6 x s16>) $vgpr0 = COPY %20(<2 x s16>) $vgpr1 = COPY %21(<2 x s16>) - %19:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %19, implicit $vgpr0, implicit $vgpr1 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... --- name: test_4xdouble_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX9-LABEL: name: test_4xdouble_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -1698,7 +1518,6 @@ body: | ; GFX9: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<4 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<4 x s64>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1710,8 +1529,7 @@ body: | ; GFX9: $vgpr5 = COPY [[UV5]](s32) ; GFX9: $vgpr6 = COPY [[UV6]](s32) ; GFX9: $vgpr7 = COPY [[UV7]](s32) - ; GFX9: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX9: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX9-CONTRACT-LABEL: name: test_4xdouble_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1752,7 +1570,6 @@ body: | ; GFX9-CONTRACT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9-CONTRACT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-CONTRACT: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) @@ -1763,8 +1580,7 @@ body: | ; GFX9-CONTRACT: $vgpr5 = COPY [[UV5]](s32) ; GFX9-CONTRACT: $vgpr6 = COPY [[UV6]](s32) ; GFX9-CONTRACT: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-CONTRACT: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX9-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1805,7 +1621,6 @@ body: | ; GFX9-DENORM: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9-DENORM: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-DENORM: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<4 x s64>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1817,8 +1632,7 @@ body: | ; GFX9-DENORM: $vgpr5 = COPY [[UV5]](s32) ; GFX9-DENORM: $vgpr6 = COPY [[UV6]](s32) ; GFX9-DENORM: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-DENORM: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX9-UNSAFE-LABEL: name: test_4xdouble_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1859,7 +1673,6 @@ body: | ; GFX9-UNSAFE: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9-UNSAFE: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-UNSAFE: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) @@ -1870,8 +1683,7 @@ body: | ; GFX9-UNSAFE: $vgpr5 = COPY [[UV5]](s32) ; GFX9-UNSAFE: $vgpr6 = COPY [[UV6]](s32) ; GFX9-UNSAFE: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-UNSAFE: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-LABEL: name: test_4xdouble_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1912,7 +1724,6 @@ body: | ; GFX10: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<4 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<4 x s64>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1924,8 +1735,7 @@ body: | ; GFX10: $vgpr5 = COPY [[UV5]](s32) ; GFX10: $vgpr6 = COPY [[UV6]](s32) ; GFX10: $vgpr7 = COPY [[UV7]](s32) - ; GFX10: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX10: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-CONTRACT-LABEL: name: test_4xdouble_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1966,7 +1776,6 @@ body: | ; GFX10-CONTRACT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10-CONTRACT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-CONTRACT: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) @@ -1977,8 +1786,7 @@ body: | ; GFX10-CONTRACT: $vgpr5 = COPY [[UV5]](s32) ; GFX10-CONTRACT: $vgpr6 = COPY [[UV6]](s32) ; GFX10-CONTRACT: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-CONTRACT: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2019,7 +1827,6 @@ body: | ; GFX10-DENORM: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10-DENORM: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-DENORM: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<4 x s64>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -2031,8 +1838,7 @@ body: | ; GFX10-DENORM: $vgpr5 = COPY [[UV5]](s32) ; GFX10-DENORM: $vgpr6 = COPY [[UV6]](s32) ; GFX10-DENORM: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-DENORM: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-UNSAFE-LABEL: name: test_4xdouble_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2073,7 +1879,6 @@ body: | ; GFX10-UNSAFE: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10-UNSAFE: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-UNSAFE: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) @@ -2084,8 +1889,7 @@ body: | ; GFX10-UNSAFE: $vgpr5 = COPY [[UV5]](s32) ; GFX10-UNSAFE: $vgpr6 = COPY [[UV6]](s32) ; GFX10-UNSAFE: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-UNSAFE: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -2125,7 +1929,6 @@ body: | %38:_(s64) = G_MERGE_VALUES %24(s32), %25(s32) %39:_(s64) = G_MERGE_VALUES %26(s32), %27(s32) %2:_(<4 x s64>) = G_BUILD_VECTOR %36(s64), %37(s64), %38(s64), %39(s64) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %40:_(<4 x s64>) = reassoc G_FMUL %0, %1 %41:_(<4 x s64>) = reassoc G_FADD %40, %2 %43:_(s32), %44:_(s32), %45:_(s32), %46:_(s32), %47:_(s32), %48:_(s32), %49:_(s32), %50:_(s32) = G_UNMERGE_VALUES %41(<4 x s64>) @@ -2137,15 +1940,14 @@ body: | $vgpr5 = COPY %48(s32) $vgpr6 = COPY %49(s32) $vgpr7 = COPY %50(s32) - %42:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %42, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ... --- name: test_3xdouble_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX9-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -2178,7 +1980,6 @@ body: | ; GFX9: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<3 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<3 x s64>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2188,8 +1989,7 @@ body: | ; GFX9: $vgpr3 = COPY [[UV3]](s32) ; GFX9: $vgpr4 = COPY [[UV4]](s32) ; GFX9: $vgpr5 = COPY [[UV5]](s32) - ; GFX9: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX9: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX9-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2221,7 +2021,6 @@ body: | ; GFX9-CONTRACT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9-CONTRACT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-CONTRACT: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) @@ -2230,8 +2029,7 @@ body: | ; GFX9-CONTRACT: $vgpr3 = COPY [[UV3]](s32) ; GFX9-CONTRACT: $vgpr4 = COPY [[UV4]](s32) ; GFX9-CONTRACT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-CONTRACT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX9-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2263,7 +2061,6 @@ body: | ; GFX9-DENORM: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9-DENORM: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-DENORM: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<3 x s64>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2273,8 +2070,7 @@ body: | ; GFX9-DENORM: $vgpr3 = COPY [[UV3]](s32) ; GFX9-DENORM: $vgpr4 = COPY [[UV4]](s32) ; GFX9-DENORM: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-DENORM: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX9-DENORM: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX9-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2306,7 +2102,6 @@ body: | ; GFX9-UNSAFE: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9-UNSAFE: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-UNSAFE: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) @@ -2315,8 +2110,7 @@ body: | ; GFX9-UNSAFE: $vgpr3 = COPY [[UV3]](s32) ; GFX9-UNSAFE: $vgpr4 = COPY [[UV4]](s32) ; GFX9-UNSAFE: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-UNSAFE: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2348,7 +2142,6 @@ body: | ; GFX10: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<3 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<3 x s64>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2358,8 +2151,7 @@ body: | ; GFX10: $vgpr3 = COPY [[UV3]](s32) ; GFX10: $vgpr4 = COPY [[UV4]](s32) ; GFX10: $vgpr5 = COPY [[UV5]](s32) - ; GFX10: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX10: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2391,7 +2183,6 @@ body: | ; GFX10-CONTRACT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10-CONTRACT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-CONTRACT: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) @@ -2400,8 +2191,7 @@ body: | ; GFX10-CONTRACT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-CONTRACT: $vgpr4 = COPY [[UV4]](s32) ; GFX10-CONTRACT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-CONTRACT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2433,7 +2223,6 @@ body: | ; GFX10-DENORM: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10-DENORM: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-DENORM: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<3 x s64>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2443,8 +2232,7 @@ body: | ; GFX10-DENORM: $vgpr3 = COPY [[UV3]](s32) ; GFX10-DENORM: $vgpr4 = COPY [[UV4]](s32) ; GFX10-DENORM: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-DENORM: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX10-DENORM: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2476,7 +2264,6 @@ body: | ; GFX10-UNSAFE: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10-UNSAFE: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-UNSAFE: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) @@ -2485,8 +2272,7 @@ body: | ; GFX10-UNSAFE: $vgpr3 = COPY [[UV3]](s32) ; GFX10-UNSAFE: $vgpr4 = COPY [[UV4]](s32) ; GFX10-UNSAFE: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-UNSAFE: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] - ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -2517,7 +2303,6 @@ body: | %29:_(s64) = G_MERGE_VALUES %18(s32), %19(s32) %30:_(s64) = G_MERGE_VALUES %20(s32), %21(s32) %2:_(<3 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64) - %3:sgpr_64 = COPY $sgpr30_sgpr31 %31:_(<3 x s64>) = reassoc G_FMUL %0, %1 %32:_(<3 x s64>) = reassoc G_FADD %2, %31 %34:_(s32), %35:_(s32), %36:_(s32), %37:_(s32), %38:_(s32), %39:_(s32) = G_UNMERGE_VALUES %32(<3 x s64>) @@ -2527,6 +2312,5 @@ body: | $vgpr3 = COPY %37(s32) $vgpr4 = COPY %38(s32) $vgpr5 = COPY %39(s32) - %33:ccr_sgpr_64 = COPY %3 - S_SETPC_B64_return %33, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir index e271229aa1fd..dcec23c030c4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir @@ -283,10 +283,9 @@ body: | liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-LABEL: name: ushlsat_i44 - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(s44) = G_TRUNC [[MV]](s64) ; CHECK: [[C:%[0-9]+]]:_(s44) = G_CONSTANT i44 22 @@ -296,11 +295,9 @@ body: | ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0, implicit $vgpr1 + ; CHECK: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %2:_(s32) = COPY $vgpr0 %3:_(s32) = COPY $vgpr1 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %4:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) %0:_(s44) = G_TRUNC %4(s64) %5:_(s44) = G_CONSTANT i44 22 @@ -310,8 +307,7 @@ body: | %10:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(s64) $vgpr0 = COPY %10(s32) $vgpr1 = COPY %11(s32) - %8:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %8, implicit $vgpr0, implicit $vgpr1 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... --- @@ -319,13 +315,12 @@ name: ushlsat_i55 tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1 ; CHECK-LABEL: name: ushlsat_i55 - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0, $vgpr1 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(s55) = G_TRUNC [[MV]](s64) ; CHECK: [[C:%[0-9]+]]:_(s55) = G_CONSTANT i55 53 @@ -334,11 +329,9 @@ body: | ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0, implicit $vgpr1 + ; CHECK: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %2:_(s32) = COPY $vgpr0 %3:_(s32) = COPY $vgpr1 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %4:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) %0:_(s55) = G_TRUNC %4(s64) %5:_(s55) = G_CONSTANT i55 50 @@ -349,7 +342,6 @@ body: | %11:_(s32), %12:_(s32) = G_UNMERGE_VALUES %10(s64) $vgpr0 = COPY %11(s32) $vgpr1 = COPY %12(s32) - %9:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %9, implicit $vgpr0, implicit $vgpr1 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll index 8a957d101912..5dfde116785d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll @@ -6,15 +6,14 @@ define i16 @vop3p_add_i16(i16 %arg0) #0 { ; CHECK-LABEL: name: vop3p_add_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %add = add i16 %arg0, %arg0 ret i16 %add } @@ -22,28 +21,27 @@ define i16 @vop3p_add_i16(i16 %arg0) #0 { define <2 x i16> @vop3p_add_v2i16(<2 x i16> %arg0) #0 { ; CHECK-LABEL: name: vop3p_add_v2i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC2]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC3]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC3]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %add = add <2 x i16> %arg0, %arg0 ret <2 x i16> %add } @@ -51,13 +49,12 @@ define <2 x i16> @vop3p_add_v2i16(<2 x i16> %arg0) #0 { define i16 @halfinsts_add_i16(i16 %arg0) #1 { ; CHECK-LABEL: name: halfinsts_add_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] - ; CHECK: $vgpr0 = COPY [[ADD]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] + ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %add = add i16 %arg0, %arg0 ret i16 %add } @@ -65,16 +62,15 @@ define i16 @halfinsts_add_i16(i16 %arg0) #1 { define <2 x i16> @halfinsts_add_v2i16(<2 x i16> %arg0) #1 { ; CHECK-LABEL: name: halfinsts_add_v2i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[ADD]](s32) - ; CHECK: $vgpr1 = COPY [[ADD1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ADD1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %add = add <2 x i16> %arg0, %arg0 ret <2 x i16> %add } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll index 7cd475bf0098..140c3ff69407 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll @@ -667,8 +667,8 @@ define <2 x float> @v_fdiv_v2f32(<2 x float> %a, <2 x float> %b) { ; GFX89-IEEE-NEXT: v_fma_f32 v10, v12, v8, v10 ; GFX89-IEEE-NEXT: v_fma_f32 v11, v13, v9, v11 ; GFX89-IEEE-NEXT: v_fma_f32 v4, -v4, v10, v6 -; GFX89-IEEE-NEXT: v_div_fmas_f32 v4, v4, v8, v10 ; GFX89-IEEE-NEXT: v_fma_f32 v5, -v5, v11, v7 +; GFX89-IEEE-NEXT: v_div_fmas_f32 v4, v4, v8, v10 ; GFX89-IEEE-NEXT: s_mov_b64 vcc, s[4:5] ; GFX89-IEEE-NEXT: v_div_fmas_f32 v5, v5, v9, v11 ; GFX89-IEEE-NEXT: v_div_fixup_f32 v0, v4, v2, v0 @@ -860,8 +860,8 @@ define <2 x float> @v_fdiv_v2f32_ulp25(<2 x float> %a, <2 x float> %b) { ; GFX89-IEEE-NEXT: v_fma_f32 v10, v12, v8, v10 ; GFX89-IEEE-NEXT: v_fma_f32 v11, v13, v9, v11 ; GFX89-IEEE-NEXT: v_fma_f32 v4, -v4, v10, v6 -; GFX89-IEEE-NEXT: v_div_fmas_f32 v4, v4, v8, v10 ; GFX89-IEEE-NEXT: v_fma_f32 v5, -v5, v11, v7 +; GFX89-IEEE-NEXT: v_div_fmas_f32 v4, v4, v8, v10 ; GFX89-IEEE-NEXT: s_mov_b64 vcc, s[4:5] ; GFX89-IEEE-NEXT: v_div_fmas_f32 v5, v5, v9, v11 ; GFX89-IEEE-NEXT: v_div_fixup_f32 v0, v4, v2, v0 @@ -1451,8 +1451,8 @@ define <2 x float> @v_fdiv_v2f32_arcp_ulp25(<2 x float> %a, <2 x float> %b) { ; GFX89-IEEE-NEXT: v_fma_f32 v10, v12, v8, v10 ; GFX89-IEEE-NEXT: v_fma_f32 v11, v13, v9, v11 ; GFX89-IEEE-NEXT: v_fma_f32 v4, -v4, v10, v6 -; GFX89-IEEE-NEXT: v_div_fmas_f32 v4, v4, v8, v10 ; GFX89-IEEE-NEXT: v_fma_f32 v5, -v5, v11, v7 +; GFX89-IEEE-NEXT: v_div_fmas_f32 v4, v4, v8, v10 ; GFX89-IEEE-NEXT: s_mov_b64 vcc, s[4:5] ; GFX89-IEEE-NEXT: v_div_fmas_f32 v5, v5, v9, v11 ; GFX89-IEEE-NEXT: v_div_fixup_f32 v0, v4, v2, v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll index 543174484924..39153cb8a744 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll @@ -837,33 +837,34 @@ define <2 x double> @v_rcp_v2f64(<2 x double> %x) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0 -; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], 1.0, v[0:1], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] ; GFX6-NEXT: v_mov_b32_e32 v18, 0x3ff00000 -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v11 +; GFX6-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] ; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 +; GFX6-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0 ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] -; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 -; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 +; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] -; GFX6-NEXT: v_mul_f64 v[14:15], v[10:11], v[6:7] -; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] -; GFX6-NEXT: v_fma_f64 v[10:11], -v[4:5], v[14:15], v[10:11] -; GFX6-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 -; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[10:11], v[6:7], v[14:15] -; GFX6-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v9 +; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 +; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[12:13], v[8:9] +; GFX6-NEXT: v_fma_f64 v[4:5], -v[10:11], v[14:15], 1.0 ; GFX6-NEXT: v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0 -; GFX6-NEXT: v_fma_f64 v[4:5], -v[8:9], v[12:13], 1.0 -; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 -; GFX6-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[12:13] +; GFX6-NEXT: v_fma_f64 v[4:5], v[14:15], v[4:5], v[14:15] +; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] +; GFX6-NEXT: v_mul_f64 v[14:15], v[16:17], v[4:5] +; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[8:9], v[6:7], v[12:13] +; GFX6-NEXT: v_fma_f64 v[8:9], -v[10:11], v[14:15], v[16:17] ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v17 -; GFX6-NEXT: v_mul_f64 v[12:13], v[16:17], v[4:5] +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] -; GFX6-NEXT: v_fma_f64 v[10:11], -v[8:9], v[12:13], v[16:17] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 -; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[10:11], v[4:5], v[12:13] +; GFX6-NEXT: s_nop 0 +; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[8:9], v[4:5], v[14:15] ; GFX6-NEXT: v_div_fixup_f64 v[2:3], v[4:5], v[2:3], 1.0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; @@ -960,33 +961,34 @@ define <2 x double> @v_rcp_v2f64_arcp(<2 x double> %x) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0 -; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], 1.0, v[0:1], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] ; GFX6-NEXT: v_mov_b32_e32 v18, 0x3ff00000 -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v11 +; GFX6-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] ; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 +; GFX6-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0 ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] -; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 -; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 +; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] -; GFX6-NEXT: v_mul_f64 v[14:15], v[10:11], v[6:7] -; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] -; GFX6-NEXT: v_fma_f64 v[10:11], -v[4:5], v[14:15], v[10:11] -; GFX6-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 -; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[10:11], v[6:7], v[14:15] -; GFX6-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v9 +; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 +; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[12:13], v[8:9] +; GFX6-NEXT: v_fma_f64 v[4:5], -v[10:11], v[14:15], 1.0 ; GFX6-NEXT: v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0 -; GFX6-NEXT: v_fma_f64 v[4:5], -v[8:9], v[12:13], 1.0 -; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 -; GFX6-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[12:13] +; GFX6-NEXT: v_fma_f64 v[4:5], v[14:15], v[4:5], v[14:15] +; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] +; GFX6-NEXT: v_mul_f64 v[14:15], v[16:17], v[4:5] +; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[8:9], v[6:7], v[12:13] +; GFX6-NEXT: v_fma_f64 v[8:9], -v[10:11], v[14:15], v[16:17] ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v17 -; GFX6-NEXT: v_mul_f64 v[12:13], v[16:17], v[4:5] +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] -; GFX6-NEXT: v_fma_f64 v[10:11], -v[8:9], v[12:13], v[16:17] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 -; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[10:11], v[4:5], v[12:13] +; GFX6-NEXT: s_nop 0 +; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[8:9], v[4:5], v[14:15] ; GFX6-NEXT: v_div_fixup_f64 v[2:3], v[4:5], v[2:3], 1.0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; @@ -1130,33 +1132,34 @@ define <2 x double> @v_rcp_v2f64_ulp25(<2 x double> %x) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0 -; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], 1.0, v[0:1], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] ; GFX6-NEXT: v_mov_b32_e32 v18, 0x3ff00000 -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v11 +; GFX6-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] ; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 +; GFX6-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0 ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] -; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 -; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 +; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] -; GFX6-NEXT: v_mul_f64 v[14:15], v[10:11], v[6:7] -; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] -; GFX6-NEXT: v_fma_f64 v[10:11], -v[4:5], v[14:15], v[10:11] -; GFX6-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 -; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[10:11], v[6:7], v[14:15] -; GFX6-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v9 +; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 +; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[12:13], v[8:9] +; GFX6-NEXT: v_fma_f64 v[4:5], -v[10:11], v[14:15], 1.0 ; GFX6-NEXT: v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0 -; GFX6-NEXT: v_fma_f64 v[4:5], -v[8:9], v[12:13], 1.0 -; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 -; GFX6-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[12:13] +; GFX6-NEXT: v_fma_f64 v[4:5], v[14:15], v[4:5], v[14:15] +; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] +; GFX6-NEXT: v_mul_f64 v[14:15], v[16:17], v[4:5] +; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[8:9], v[6:7], v[12:13] +; GFX6-NEXT: v_fma_f64 v[8:9], -v[10:11], v[14:15], v[16:17] ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v17 -; GFX6-NEXT: v_mul_f64 v[12:13], v[16:17], v[4:5] +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] -; GFX6-NEXT: v_fma_f64 v[10:11], -v[8:9], v[12:13], v[16:17] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 -; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[10:11], v[4:5], v[12:13] +; GFX6-NEXT: s_nop 0 +; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[8:9], v[4:5], v[14:15] ; GFX6-NEXT: v_div_fixup_f64 v[2:3], v[4:5], v[2:3], 1.0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll index bc2c5c737104..4667975ca0d0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll @@ -188,11 +188,11 @@ define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 -; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX9-NEXT: v_exp_f16_e32 v1, v2 -; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX9-NEXT: v_exp_f16_e32 v1, v1 +; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -274,11 +274,11 @@ define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) { ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 -; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX9-NEXT: v_exp_f16_e32 v1, v2 -; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX9-NEXT: v_exp_f16_e32 v1, v1 +; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -358,8 +358,8 @@ define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) { ; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 ; GFX9-NEXT: v_cvt_f32_f16_e32 v3, v1 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 ; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll index 09e49cedca48..f0f61e1803bd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll @@ -2094,18 +2094,18 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { ; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX9-NEXT: v_mul_f32_e32 v9, 0x4f7ffffe, v9 -; GFX9-NEXT: v_bfe_u32 v2, v2, 1, 23 +; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v9 ; GFX9-NEXT: v_mul_lo_u32 v8, v7, v6 +; GFX9-NEXT: v_bfe_u32 v2, v2, 1, 23 ; GFX9-NEXT: v_bfe_u32 v3, v3, 1, 23 +; GFX9-NEXT: v_mul_lo_u32 v7, v7, v9 ; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8 +; GFX9-NEXT: v_mul_hi_u32 v7, v9, v7 ; GFX9-NEXT: v_add_u32_e32 v6, v6, v8 -; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v9 ; GFX9-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX9-NEXT: v_mov_b32_e32 v9, 0xffffff -; GFX9-NEXT: v_and_b32_e32 v5, v5, v9 -; GFX9-NEXT: v_mul_lo_u32 v7, v7, v8 +; GFX9-NEXT: v_mov_b32_e32 v8, 0xffffff +; GFX9-NEXT: v_and_b32_e32 v5, v5, v8 ; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX9-NEXT: v_mul_hi_u32 v7, v8, v7 ; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6 ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 @@ -2113,11 +2113,11 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX9-NEXT: v_add_u32_e32 v6, v8, v7 +; GFX9-NEXT: v_add_u32_e32 v6, v9, v7 ; GFX9-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX9-NEXT: v_sub_u32_e32 v7, 23, v4 -; GFX9-NEXT: v_and_b32_e32 v7, v7, v9 -; GFX9-NEXT: v_and_b32_e32 v4, v4, v9 +; GFX9-NEXT: v_and_b32_e32 v7, v7, v8 +; GFX9-NEXT: v_and_b32_e32 v4, v4, v8 ; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v7, v2 ; GFX9-NEXT: v_lshl_or_b32 v0, v0, v4, v2 @@ -2129,8 +2129,8 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX9-NEXT: v_sub_u32_e32 v4, 23, v2 -; GFX9-NEXT: v_and_b32_e32 v4, v4, v9 -; GFX9-NEXT: v_and_b32_e32 v2, v2, v9 +; GFX9-NEXT: v_and_b32_e32 v4, v4, v8 +; GFX9-NEXT: v_and_b32_e32 v2, v2, v8 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, v4, v3 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, v2, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll index b570be394ca4..ff6618f8534b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll @@ -2100,40 +2100,40 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GFX9-NEXT: v_mov_b32_e32 v7, 0xffffffe8 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v9, v9 +; GFX9-NEXT: v_mov_b32_e32 v7, 0xffffffe8 ; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX9-NEXT: v_mul_f32_e32 v9, 0x4f7ffffe, v9 -; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v9 +; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX9-NEXT: v_mul_lo_u32 v8, v7, v6 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 +; GFX9-NEXT: v_mul_lo_u32 v7, v7, v9 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 1, v1 ; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8 +; GFX9-NEXT: v_mul_hi_u32 v7, v9, v7 ; GFX9-NEXT: v_add_u32_e32 v6, v6, v8 -; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v9 ; GFX9-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX9-NEXT: v_mov_b32_e32 v9, 0xffffff -; GFX9-NEXT: v_and_b32_e32 v5, v5, v9 -; GFX9-NEXT: v_mul_lo_u32 v7, v7, v8 +; GFX9-NEXT: v_mov_b32_e32 v8, 0xffffff +; GFX9-NEXT: v_and_b32_e32 v5, v5, v8 +; GFX9-NEXT: v_add_u32_e32 v7, v9, v7 ; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX9-NEXT: v_and_b32_e32 v2, v2, v9 -; GFX9-NEXT: v_and_b32_e32 v3, v3, v9 -; GFX9-NEXT: v_mul_hi_u32 v7, v8, v7 +; GFX9-NEXT: v_mul_hi_u32 v7, v5, v7 +; GFX9-NEXT: v_and_b32_e32 v2, v2, v8 +; GFX9-NEXT: v_and_b32_e32 v3, v3, v8 ; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6 ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 -; GFX9-NEXT: v_add_u32_e32 v7, v8, v7 -; GFX9-NEXT: v_mul_hi_u32 v7, v5, v7 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 ; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX9-NEXT: v_sub_u32_e32 v6, 23, v4 -; GFX9-NEXT: v_and_b32_e32 v4, v4, v9 -; GFX9-NEXT: v_and_b32_e32 v6, v6, v9 +; GFX9-NEXT: v_and_b32_e32 v4, v4, v8 +; GFX9-NEXT: v_and_b32_e32 v6, v6, v8 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v4, v2 ; GFX9-NEXT: v_lshl_or_b32 v0, v0, v6, v2 ; GFX9-NEXT: v_sub_u32_e32 v2, v5, v7 @@ -2144,8 +2144,8 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX9-NEXT: v_sub_u32_e32 v4, 23, v2 -; GFX9-NEXT: v_and_b32_e32 v2, v2, v9 -; GFX9-NEXT: v_and_b32_e32 v4, v4, v9 +; GFX9-NEXT: v_and_b32_e32 v2, v2, v8 +; GFX9-NEXT: v_and_b32_e32 v4, v4, v8 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v2, v3 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, v4, v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -4095,8 +4095,8 @@ define <4 x half> @v_fshr_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) ; GFX10-NEXT: v_and_b32_e32 v4, s4, v4 ; GFX10-NEXT: v_and_b32_e32 v6, s4, v6 ; GFX10-NEXT: v_and_b32_e32 v5, s4, v5 -; GFX10-NEXT: v_and_b32_e32 v7, s4, v7 ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1] +; GFX10-NEXT: v_and_b32_e32 v7, s4, v7 ; GFX10-NEXT: v_pk_lshrrev_b16 v2, v4, v2 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, v6, v0 ; GFX10-NEXT: v_pk_lshrrev_b16 v3, v5, v3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll index 3dfd3ab5a15a..53c3fa1064a6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll @@ -6,14 +6,11 @@ define i1 @i1_func_void() #0 { ; CHECK-LABEL: name: i1_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s1) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i1, i1 addrspace(1)* undef ret i1 %val } @@ -21,14 +18,11 @@ define i1 @i1_func_void() #0 { define zeroext i1 @i1_zeroext_func_void() #0 { ; CHECK-LABEL: name: i1_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i1, i1 addrspace(1)* undef ret i1 %val } @@ -36,14 +30,11 @@ define zeroext i1 @i1_zeroext_func_void() #0 { define signext i1 @i1_signext_func_void() #0 { ; CHECK-LABEL: name: i1_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i1, i1 addrspace(1)* undef ret i1 %val } @@ -51,14 +42,11 @@ define signext i1 @i1_signext_func_void() #0 { define i7 @i7_func_void() #0 { ; CHECK-LABEL: name: i7_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s7) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s7) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i7, i7 addrspace(1)* undef ret i7 %val } @@ -66,14 +54,11 @@ define i7 @i7_func_void() #0 { define zeroext i7 @i7_zeroext_func_void() #0 { ; CHECK-LABEL: name: i7_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s7) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s7) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i7, i7 addrspace(1)* undef ret i7 %val } @@ -81,14 +66,11 @@ define zeroext i7 @i7_zeroext_func_void() #0 { define signext i7 @i7_signext_func_void() #0 { ; CHECK-LABEL: name: i7_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s7) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s7) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i7, i7 addrspace(1)* undef ret i7 %val } @@ -96,14 +78,11 @@ define signext i7 @i7_signext_func_void() #0 { define i8 @i8_func_void() #0 { ; CHECK-LABEL: name: i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val } @@ -111,14 +90,11 @@ define i8 @i8_func_void() #0 { define zeroext i8 @i8_zeroext_func_void() #0 { ; CHECK-LABEL: name: i8_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val } @@ -126,14 +102,11 @@ define zeroext i8 @i8_zeroext_func_void() #0 { define signext i8 @i8_signext_func_void() #0 { ; CHECK-LABEL: name: i8_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val } @@ -141,14 +114,11 @@ define signext i8 @i8_signext_func_void() #0 { define i16 @i16_func_void() #0 { ; CHECK-LABEL: name: i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i16, i16 addrspace(1)* undef ret i16 %val } @@ -156,14 +126,11 @@ define i16 @i16_func_void() #0 { define zeroext i16 @i16_zeroext_func_void() #0 { ; CHECK-LABEL: name: i16_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i16, i16 addrspace(1)* undef ret i16 %val } @@ -171,14 +138,11 @@ define zeroext i16 @i16_zeroext_func_void() #0 { define signext i16 @i16_signext_func_void() #0 { ; CHECK-LABEL: name: i16_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i16, i16 addrspace(1)* undef ret i16 %val } @@ -186,14 +150,11 @@ define signext i16 @i16_signext_func_void() #0 { define half @f16_func_void() #0 { ; CHECK-LABEL: name: f16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `half addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `half addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load half, half addrspace(1)* undef ret half %val } @@ -201,14 +162,11 @@ define half @f16_func_void() #0 { define i24 @i24_func_void() #0 { ; CHECK-LABEL: name: i24_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s24) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i24, i24 addrspace(1)* undef ret i24 %val } @@ -216,14 +174,11 @@ define i24 @i24_func_void() #0 { define zeroext i24 @i24_zeroext_func_void() #0 { ; CHECK-LABEL: name: i24_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s24) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i24, i24 addrspace(1)* undef ret i24 %val } @@ -231,14 +186,11 @@ define zeroext i24 @i24_zeroext_func_void() #0 { define signext i24 @i24_signext_func_void() #0 { ; CHECK-LABEL: name: i24_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s24) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i24, i24 addrspace(1)* undef ret i24 %val } @@ -246,17 +198,14 @@ define signext i24 @i24_signext_func_void() #0 { define <2 x i24> @v2i24_func_void() #0 { ; CHECK-LABEL: name: v2i24_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s24>) = G_LOAD [[DEF]](p1) :: (load (<2 x s24>) from `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<2 x s24>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s24>) = G_LOAD [[DEF]](p1) :: (load (<2 x s24>) from `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<2 x s24>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <2 x i24>, <2 x i24> addrspace(1)* undef ret <2 x i24> %val } @@ -264,19 +213,16 @@ define <2 x i24> @v2i24_func_void() #0 { define <3 x i24> @v3i24_func_void() #0 { ; CHECK-LABEL: name: v3i24_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s24>) = G_LOAD [[DEF]](p1) :: (load (<3 x s24>) from `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24), [[UV2:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<3 x s24>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s24) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) - ; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s24>) = G_LOAD [[DEF]](p1) :: (load (<3 x s24>) from `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24), [[UV2:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<3 x s24>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i24>, <3 x i24> addrspace(1)* undef ret <3 x i24> %val } @@ -284,13 +230,10 @@ define <3 x i24> @v3i24_func_void() #0 { define i32 @i32_func_void() #0 { ; CHECK-LABEL: name: i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load i32, i32 addrspace(1)* undef ret i32 %val } @@ -298,16 +241,13 @@ define i32 @i32_func_void() #0 { define i48 @i48_func_void() #0 { ; CHECK-LABEL: name: i48_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i48, i48 addrspace(1)* undef, align 8 ret i48 %val } @@ -315,16 +255,13 @@ define i48 @i48_func_void() #0 { define signext i48 @i48_signext_func_void() #0 { ; CHECK-LABEL: name: i48_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i48, i48 addrspace(1)* undef, align 8 ret i48 %val } @@ -332,16 +269,13 @@ define signext i48 @i48_signext_func_void() #0 { define zeroext i48 @i48_zeroext_func_void() #0 { ; CHECK-LABEL: name: i48_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i48, i48 addrspace(1)* undef, align 8 ret i48 %val } @@ -349,15 +283,12 @@ define zeroext i48 @i48_zeroext_func_void() #0 { define i64 @i64_func_void() #0 { ; CHECK-LABEL: name: i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load i64, i64 addrspace(1)* undef ret i64 %val } @@ -365,17 +296,14 @@ define i64 @i64_func_void() #0 { define i65 @i65_func_void() #0 { ; CHECK-LABEL: name: i65_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s96) = G_ANYEXT [[LOAD]](s65) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s96) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s96) = G_ANYEXT [[LOAD]](s65) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s96) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, i65 addrspace(1)* undef ret i65 %val } @@ -383,17 +311,14 @@ define i65 @i65_func_void() #0 { define signext i65 @i65_signext_func_void() #0 { ; CHECK-LABEL: name: i65_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[LOAD]](s65) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s96) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[LOAD]](s65) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s96) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, i65 addrspace(1)* undef ret i65 %val } @@ -401,17 +326,14 @@ define signext i65 @i65_signext_func_void() #0 { define zeroext i65 @i65_zeroext_func_void() #0 { ; CHECK-LABEL: name: i65_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[LOAD]](s65) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s96) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[LOAD]](s65) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s96) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, i65 addrspace(1)* undef ret i65 %val } @@ -419,13 +341,10 @@ define zeroext i65 @i65_zeroext_func_void() #0 { define float @f32_func_void() #0 { ; CHECK-LABEL: name: f32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `float addrspace(1)* undef`, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `float addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load float, float addrspace(1)* undef ret float %val } @@ -433,15 +352,12 @@ define float @f32_func_void() #0 { define double @f64_func_void() #0 { ; CHECK-LABEL: name: f64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `double addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `double addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load double, double addrspace(1)* undef ret double %val } @@ -449,17 +365,14 @@ define double @f64_func_void() #0 { define <2 x double> @v2f64_func_void() #0 { ; CHECK-LABEL: name: v2f64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <2 x double>, <2 x double> addrspace(1)* undef ret <2 x double> %val } @@ -467,15 +380,12 @@ define <2 x double> @v2f64_func_void() #0 { define <2 x i32> @v2i32_func_void() #0 { ; CHECK-LABEL: name: v2i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `<2 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <2 x i32>, <2 x i32> addrspace(1)* undef ret <2 x i32> %val } @@ -483,16 +393,13 @@ define <2 x i32> @v2i32_func_void() #0 { define <3 x i32> @v3i32_func_void() #0 { ; CHECK-LABEL: name: v3i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[DEF]](p1) :: (load (<3 x s32>) from `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<3 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[DEF]](p1) :: (load (<3 x s32>) from `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<3 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i32>, <3 x i32> addrspace(1)* undef ret <3 x i32> %val } @@ -500,17 +407,14 @@ define <3 x i32> @v3i32_func_void() #0 { define <4 x i32> @v4i32_func_void() #0 { ; CHECK-LABEL: name: v4i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `<4 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <4 x i32>, <4 x i32> addrspace(1)* undef ret <4 x i32> %val } @@ -518,18 +422,15 @@ define <4 x i32> @v4i32_func_void() #0 { define <5 x i32> @v5i32_func_void() #0 { ; CHECK-LABEL: name: v5i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<5 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<5 x s32>) from `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<5 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<5 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<5 x s32>) from `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<5 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef ret <5 x i32> %val } @@ -537,22 +438,19 @@ define <5 x i32> @v5i32_func_void() #0 { define <8 x i32> @v8i32_func_void() #0 { ; CHECK-LABEL: name: v8i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr ret <8 x i32> %val @@ -561,30 +459,27 @@ define <8 x i32> @v8i32_func_void() #0 { define <16 x i32> @v16i32_func_void() #0 { ; CHECK-LABEL: name: v16i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr ret <16 x i32> %val @@ -593,46 +488,43 @@ define <16 x i32> @v16i32_func_void() #0 { define <32 x i32> @v32i32_func_void() #0 { ; CHECK-LABEL: name: v32i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr ret <32 x i32> %val @@ -641,17 +533,14 @@ define <32 x i32> @v32i32_func_void() #0 { define <2 x i64> @v2i64_func_void() #0 { ; CHECK-LABEL: name: v2i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <2 x i64>, <2 x i64> addrspace(1)* undef ret <2 x i64> %val } @@ -659,20 +548,17 @@ define <2 x i64> @v2i64_func_void() #0 { define <3 x i64> @v3i64_func_void() #0 { ; CHECK-LABEL: name: v3i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<3 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<3 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s64>) from %ir.ptr, align 32, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<3 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s64>) from %ir.ptr, align 32, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr ret <3 x i64> %val @@ -681,22 +567,19 @@ define <3 x i64> @v3i64_func_void() #0 { define <4 x i64> @v4i64_func_void() #0 { ; CHECK-LABEL: name: v4i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s64>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s64>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr ret <4 x i64> %val @@ -705,24 +588,21 @@ define <4 x i64> @v4i64_func_void() #0 { define <5 x i64> @v5i64_func_void() #0 { ; CHECK-LABEL: name: v5i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<5 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s64>) from %ir.ptr, align 64, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<5 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s64>) from %ir.ptr, align 64, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<5 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9 %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr ret <5 x i64> %val @@ -731,30 +611,27 @@ define <5 x i64> @v5i64_func_void() #0 { define <8 x i64> @v8i64_func_void() #0 { ; CHECK-LABEL: name: v8i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s64>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s64>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr ret <8 x i64> %val @@ -763,46 +640,43 @@ define <8 x i64> @v8i64_func_void() #0 { define <16 x i64> @v16i64_func_void() #0 { ; CHECK-LABEL: name: v16i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s64>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s64>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr ret <16 x i64> %val @@ -811,13 +685,10 @@ define <16 x i64> @v16i64_func_void() #0 { define <2 x i16> @v2i16_func_void() #0 { ; CHECK-LABEL: name: v2i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load <2 x i16>, <2 x i16> addrspace(1)* undef ret <2 x i16> %val } @@ -825,13 +696,10 @@ define <2 x i16> @v2i16_func_void() #0 { define <2 x half> @v2f16_func_void() #0 { ; CHECK-LABEL: name: v2f16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = load <2 x half>, <2 x half> addrspace(1)* undef ret <2 x half> %val } @@ -839,17 +707,14 @@ define <2 x half> @v2f16_func_void() #0 { define <3 x i16> @v3i16_func_void() #0 { ; CHECK-LABEL: name: v3i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef ret <3 x i16> %val } @@ -857,15 +722,12 @@ define <3 x i16> @v3i16_func_void() #0 { define <4 x i16> @v4i16_func_void() #0 { ; CHECK-LABEL: name: v4i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <4 x i16>, <4 x i16> addrspace(1)* undef ret <4 x i16> %val } @@ -873,15 +735,12 @@ define <4 x i16> @v4i16_func_void() #0 { define <4 x half> @v4f16_func_void() #0 { ; CHECK-LABEL: name: v4f16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <4 x half>, <4 x half> addrspace(1)* undef ret <4 x half> %val } @@ -889,19 +748,16 @@ define <4 x half> @v4f16_func_void() #0 { define <5 x i16> @v5i16_func_void() #0 { ; CHECK-LABEL: name: v5i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s16>) from %ir.ptr, align 16, addrspace 1) - ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD1]](<5 x s16>), [[DEF1]](<5 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s16>) from %ir.ptr, align 16, addrspace 1) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD1]](<5 x s16>), [[DEF1]](<5 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr ret <5 x i16> %val @@ -910,18 +766,15 @@ define <5 x i16> @v5i16_func_void() #0 { define <8 x i16> @v8i16_func_void() #0 { ; CHECK-LABEL: name: v8i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s16>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<8 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s16>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<8 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr ret <8 x i16> %val @@ -930,22 +783,19 @@ define <8 x i16> @v8i16_func_void() #0 { define <16 x i16> @v16i16_func_void() #0 { ; CHECK-LABEL: name: v16i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s16>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK: $vgpr4 = COPY [[UV4]](<2 x s16>) - ; CHECK: $vgpr5 = COPY [[UV5]](<2 x s16>) - ; CHECK: $vgpr6 = COPY [[UV6]](<2 x s16>) - ; CHECK: $vgpr7 = COPY [[UV7]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s16>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](<2 x s16>) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](<2 x s16>) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](<2 x s16>) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](<2 x s16>) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr ret <16 x i16> %val @@ -954,62 +804,59 @@ define <16 x i16> @v16i16_func_void() #0 { define <16 x i8> @v16i8_func_void() #0 { ; CHECK-LABEL: name: v16i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s8>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) - ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) - ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) - ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) - ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) - ; CHECK: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT16]](s32) - ; CHECK: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK: $vgpr1 = COPY [[ANYEXT17]](s32) - ; CHECK: [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK: $vgpr2 = COPY [[ANYEXT18]](s32) - ; CHECK: [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK: $vgpr3 = COPY [[ANYEXT19]](s32) - ; CHECK: [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16) - ; CHECK: $vgpr4 = COPY [[ANYEXT20]](s32) - ; CHECK: [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16) - ; CHECK: $vgpr5 = COPY [[ANYEXT21]](s32) - ; CHECK: [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16) - ; CHECK: $vgpr6 = COPY [[ANYEXT22]](s32) - ; CHECK: [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16) - ; CHECK: $vgpr7 = COPY [[ANYEXT23]](s32) - ; CHECK: [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT8]](s16) - ; CHECK: $vgpr8 = COPY [[ANYEXT24]](s32) - ; CHECK: [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT9]](s16) - ; CHECK: $vgpr9 = COPY [[ANYEXT25]](s32) - ; CHECK: [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT10]](s16) - ; CHECK: $vgpr10 = COPY [[ANYEXT26]](s32) - ; CHECK: [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT11]](s16) - ; CHECK: $vgpr11 = COPY [[ANYEXT27]](s32) - ; CHECK: [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT12]](s16) - ; CHECK: $vgpr12 = COPY [[ANYEXT28]](s32) - ; CHECK: [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT13]](s16) - ; CHECK: $vgpr13 = COPY [[ANYEXT29]](s32) - ; CHECK: [[ANYEXT30:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT14]](s16) - ; CHECK: $vgpr14 = COPY [[ANYEXT30]](s32) - ; CHECK: [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16) - ; CHECK: $vgpr15 = COPY [[ANYEXT31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s8>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) + ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) + ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) + ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) + ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) + ; CHECK-NEXT: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT16]](s32) + ; CHECK-NEXT: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT17]](s32) + ; CHECK-NEXT: [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT18]](s32) + ; CHECK-NEXT: [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) + ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT19]](s32) + ; CHECK-NEXT: [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16) + ; CHECK-NEXT: $vgpr4 = COPY [[ANYEXT20]](s32) + ; CHECK-NEXT: [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16) + ; CHECK-NEXT: $vgpr5 = COPY [[ANYEXT21]](s32) + ; CHECK-NEXT: [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16) + ; CHECK-NEXT: $vgpr6 = COPY [[ANYEXT22]](s32) + ; CHECK-NEXT: [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16) + ; CHECK-NEXT: $vgpr7 = COPY [[ANYEXT23]](s32) + ; CHECK-NEXT: [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT8]](s16) + ; CHECK-NEXT: $vgpr8 = COPY [[ANYEXT24]](s32) + ; CHECK-NEXT: [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT9]](s16) + ; CHECK-NEXT: $vgpr9 = COPY [[ANYEXT25]](s32) + ; CHECK-NEXT: [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT10]](s16) + ; CHECK-NEXT: $vgpr10 = COPY [[ANYEXT26]](s32) + ; CHECK-NEXT: [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT11]](s16) + ; CHECK-NEXT: $vgpr11 = COPY [[ANYEXT27]](s32) + ; CHECK-NEXT: [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT12]](s16) + ; CHECK-NEXT: $vgpr12 = COPY [[ANYEXT28]](s32) + ; CHECK-NEXT: [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT13]](s16) + ; CHECK-NEXT: $vgpr13 = COPY [[ANYEXT29]](s32) + ; CHECK-NEXT: [[ANYEXT30:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT14]](s16) + ; CHECK-NEXT: $vgpr14 = COPY [[ANYEXT30]](s32) + ; CHECK-NEXT: [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16) + ; CHECK-NEXT: $vgpr15 = COPY [[ANYEXT31]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr ret <16 x i8> %val @@ -1018,19 +865,16 @@ define <16 x i8> @v16i8_func_void() #0 { define <2 x i8> @v2i8_func_void() #0 { ; CHECK-LABEL: name: v2i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[DEF]](p1) :: (load (<2 x s8>) from `<2 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<2 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT2]](s32) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK: $vgpr1 = COPY [[ANYEXT3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[DEF]](p1) :: (load (<2 x s8>) from `<2 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<2 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT2]](s32) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT3]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load <2 x i8>, <2 x i8> addrspace(1)* undef ret <2 x i8> %val } @@ -1038,22 +882,19 @@ define <2 x i8> @v2i8_func_void() #0 { define <3 x i8> @v3i8_func_void() #0 { ; CHECK-LABEL: name: v3i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[DEF]](p1) :: (load (<3 x s8>) from `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<3 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT3]](s32) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK: $vgpr1 = COPY [[ANYEXT4]](s32) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK: $vgpr2 = COPY [[ANYEXT5]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[DEF]](p1) :: (load (<3 x s8>) from `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<3 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT3]](s32) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT4]](s32) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT5]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i8>, <3 x i8> addrspace(1)* undef ret <3 x i8> %val } @@ -1061,26 +902,23 @@ define <3 x i8> @v3i8_func_void() #0 { define <4 x i8> @v4i8_func_void() #0 { ; CHECK-LABEL: name: v4i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s8>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT4]](s32) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK: $vgpr1 = COPY [[ANYEXT5]](s32) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK: $vgpr2 = COPY [[ANYEXT6]](s32) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK: $vgpr3 = COPY [[ANYEXT7]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s8>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT4]](s32) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT5]](s32) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT6]](s32) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) + ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT7]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef %val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr ret <4 x i8> %val @@ -1089,18 +927,15 @@ define <4 x i8> @v4i8_func_void() #0 { define {i8, i32} @struct_i8_i32_func_void() #0 { ; CHECK-LABEL: name: struct_i8_i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: $vgpr1 = COPY [[LOAD1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[LOAD1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef ret { i8, i32 } %val } @@ -1108,19 +943,18 @@ define {i8, i32} @struct_i8_i32_func_void() #0 { define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }) %arg0) #0 { ; CHECK-LABEL: name: void_func_sret_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY2:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p1) :: (volatile load (s32) from `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CHECK: G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store (s8) into %ir.gep01, addrspace 5) - ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (volatile load (s32) from `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store (s8) into %ir.gep01, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5) + ; CHECK-NEXT: SI_RETURN %val0 = load volatile i8, i8 addrspace(1)* undef %val1 = load volatile i32, i32 addrspace(1)* undef %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 @@ -1137,15 +971,14 @@ define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i define <33 x i32> @v33i32_func_void() #0 { ; CHECK-LABEL: name: v33i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<33 x s32>) from %ir.ptr, align 256, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<33 x s32>) from %ir.ptr, align 256, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) + ; CHECK-NEXT: SI_RETURN %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr ret <33 x i32> %val @@ -1154,22 +987,21 @@ define <33 x i32> @v33i32_func_void() #0 { define <33 x i32> @v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx) #0 { ; CHECK-LABEL: name: v33i32_func_v33i32_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY3]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[MUL]](s64) - ; CHECK: [[COPY5:%[0-9]+]]:_(p1) = COPY [[PTR_ADD]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[COPY5]](p1) :: (load (<33 x s32>) from %ir.gep, align 256, addrspace 1) - ; CHECK: G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) - ; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY6]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[MUL]](s64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p1) = COPY [[PTR_ADD]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[COPY4]](p1) :: (load (<33 x s32>) from %ir.gep, align 256, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) + ; CHECK-NEXT: SI_RETURN %gep = getelementptr inbounds <33 x i32>, <33 x i32> addrspace(1)* %p, i32 %idx %val = load <33 x i32>, <33 x i32> addrspace(1)* %gep ret <33 x i32> %val @@ -1178,21 +1010,20 @@ define <33 x i32> @v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx) define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { ; CHECK-LABEL: name: struct_v32i32_i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr + 128, align 128, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK: G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p5) :: (store (s32), align 128, addrspace 5) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr + 128, align 128, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p5) :: (store (s32), align 128, addrspace 5) + ; CHECK-NEXT: SI_RETURN %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr ret { <32 x i32>, i32 }%val @@ -1201,21 +1032,20 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { ; CHECK-LABEL: name: struct_i32_v32i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ i32, <32 x i32> } addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p1) :: (load (s32) from %ir.ptr, align 128, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<32 x s32>) from %ir.ptr + 128, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](s32), [[COPY]](p5) :: (store (s32), align 128, addrspace 5) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK: G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (<32 x s32>), addrspace 5) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ i32, <32 x i32> } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p1) :: (load (s32) from %ir.ptr, align 128, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<32 x s32>) from %ir.ptr + 128, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[COPY]](p5) :: (store (s32), align 128, addrspace 5) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (<32 x s32>), addrspace 5) + ; CHECK-NEXT: SI_RETURN %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr ret { i32, <32 x i32> }%val @@ -1225,28 +1055,25 @@ define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { ; CHECK-LABEL: name: v3i32_struct_func_void_wasted_reg ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) - ; CHECK: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) - ; CHECK: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[LOAD3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) + ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) + ; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[LOAD3]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %load0 = load volatile i32, i32 addrspace(3)* undef %load1 = load volatile i32, i32 addrspace(3)* undef %load2 = load volatile i32, i32 addrspace(3)* undef @@ -1263,29 +1090,26 @@ define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { ; CHECK-LABEL: name: v3f32_struct_func_void_wasted_reg ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[COPY1:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) - ; CHECK: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) - ; CHECK: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[LOAD3]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) + ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) + ; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[LOAD3]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %load0 = load volatile float, float addrspace(3)* undef %load1 = load volatile float, float addrspace(3)* undef %load2 = load volatile float, float addrspace(3)* undef @@ -1302,22 +1126,21 @@ define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret(i8) %arg0) #0 { ; CHECK-LABEL: name: void_func_sret_max_known_zero_bits ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 - ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5) - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) - ; CHECK: G_STORE [[LSHR]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[LSHR1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[LSHR2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) + ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[LSHR1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[LSHR2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: SI_RETURN %arg0.int = ptrtoint i8 addrspace(5)* %arg0 to i32 %lshr0 = lshr i32 %arg0.int, 16 @@ -1333,46 +1156,43 @@ define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret(i8) %arg0) define i1022 @i1022_func_void() #0 { ; CHECK-LABEL: name: i1022_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s1024) = G_ANYEXT [[LOAD]](s1022) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s1024) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s1024) = G_ANYEXT [[LOAD]](s1022) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s1024) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, i1022 addrspace(1)* undef ret i1022 %val } @@ -1380,46 +1200,43 @@ define i1022 @i1022_func_void() #0 { define signext i1022 @i1022_signext_func_void() #0 { ; CHECK-LABEL: name: i1022_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[LOAD]](s1022) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s1024) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[LOAD]](s1022) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s1024) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, i1022 addrspace(1)* undef ret i1022 %val } @@ -1427,46 +1244,43 @@ define signext i1022 @i1022_signext_func_void() #0 { define zeroext i1022 @i1022_zeroext_func_void() #0 { ; CHECK-LABEL: name: i1022_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s1024) = G_ZEXT [[LOAD]](s1022) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s1024) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s1024) = G_ZEXT [[LOAD]](s1022) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s1024) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, i1022 addrspace(1)* undef ret i1022 %val } @@ -1476,32 +1290,31 @@ define zeroext i1022 @i1022_zeroext_func_void() #0 { define %struct.with.ptrs @ptr_in_struct_func_void() #0 { ; CHECK-LABEL: name: ptr_in_struct_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<32 x s32>) from `%struct.with.ptrs addrspace(1)* undef`, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p1) :: (volatile load (p3) from `%struct.with.ptrs addrspace(1)* undef` + 128, align 128, addrspace 1) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p1) :: (volatile load (p1) from `%struct.with.ptrs addrspace(1)* undef` + 136, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD2]](p1) :: (volatile load (<2 x p1>) from `%struct.with.ptrs addrspace(1)* undef` + 144, addrspace 1) - ; CHECK: G_STORE [[LOAD]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CHECK: G_STORE [[LOAD1]](p3), [[PTR_ADD3]](p5) :: (store (p3), align 128, addrspace 5) - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 - ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CHECK: G_STORE [[LOAD2]](p1), [[PTR_ADD4]](p5) :: (store (p1), addrspace 5) - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 - ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CHECK: G_STORE [[LOAD3]](<2 x p1>), [[PTR_ADD5]](p5) :: (store (<2 x p1>), addrspace 5) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<32 x s32>) from `%struct.with.ptrs addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p1) :: (volatile load (p3) from `%struct.with.ptrs addrspace(1)* undef` + 128, align 128, addrspace 1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p1) :: (volatile load (p1) from `%struct.with.ptrs addrspace(1)* undef` + 136, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD2]](p1) :: (volatile load (<2 x p1>) from `%struct.with.ptrs addrspace(1)* undef` + 144, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; CHECK-NEXT: G_STORE [[LOAD1]](p3), [[PTR_ADD3]](p5) :: (store (p3), align 128, addrspace 5) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; CHECK-NEXT: G_STORE [[LOAD2]](p1), [[PTR_ADD4]](p5) :: (store (p1), addrspace 5) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; CHECK-NEXT: G_STORE [[LOAD3]](<2 x p1>), [[PTR_ADD5]](p5) :: (store (<2 x p1>), addrspace 5) + ; CHECK-NEXT: SI_RETURN %val = load volatile %struct.with.ptrs, %struct.with.ptrs addrspace(1)* undef ret %struct.with.ptrs %val } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll index 420b9ef437b1..17a1a86cc649 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll @@ -16,113 +16,89 @@ define i32 addrspace(4)* @external_constant_got() { ; GCN-LABEL: name: external_constant_got ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant + 4, target-flags(amdgpu-gotprel32-hi) @external_constant + 12, implicit-def $scc ; GCN: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p4) from got, addrspace 4) ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p4) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(4)* @external_constant } define i32 addrspace(1)* @external_global_got() { ; GCN-LABEL: name: external_global_got ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_global + 4, target-flags(amdgpu-gotprel32-hi) @external_global + 12, implicit-def $scc ; GCN: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p1) from got, addrspace 4) ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(1)* @external_global } define i32 addrspace(999)* @external_other_got() { ; GCN-LABEL: name: external_other_got ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_other + 4, target-flags(amdgpu-gotprel32-hi) @external_other + 12, implicit-def $scc ; GCN: [[LOAD:%[0-9]+]]:_(p999) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p999) from got, addrspace 4) ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p999) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(999)* @external_other } define i32 addrspace(4)* @internal_constant_pcrel() { ; GCN-LABEL: name: internal_constant_pcrel ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant + 4, target-flags(amdgpu-rel32-hi) @internal_constant + 12, implicit-def $scc ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(4)* @internal_constant } define i32 addrspace(1)* @internal_global_pcrel() { ; GCN-LABEL: name: internal_global_pcrel ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p1) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_global + 4, target-flags(amdgpu-rel32-hi) @internal_global + 12, implicit-def $scc ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p1) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(1)* @internal_global } define i32 addrspace(999)* @internal_other_pcrel() { ; GCN-LABEL: name: internal_other_pcrel ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p999) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_other + 4, target-flags(amdgpu-rel32-hi) @internal_other + 12, implicit-def $scc ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p999) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(999)* @internal_other } define i32 addrspace(6)* @external_constant32_got() { ; GCN-LABEL: name: external_constant32_got ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant32 + 4, target-flags(amdgpu-gotprel32-hi) @external_constant32 + 12, implicit-def $scc ; GCN: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p4) from got, addrspace 4) ; GCN: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[LOAD]](p4), 0 ; GCN: $vgpr0 = COPY [[EXTRACT]](p6) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; GCN: SI_RETURN implicit $vgpr0 ret i32 addrspace(6)* @external_constant32 } define i32 addrspace(6)* @internal_constant32_pcrel() { ; GCN-LABEL: name: internal_constant32_pcrel ; GCN: bb.1 (%ir-block.0): - ; GCN: liveins: $sgpr30_sgpr31 - ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant32 + 4, target-flags(amdgpu-rel32-hi) @internal_constant32 + 12, implicit-def $scc ; GCN: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0 ; GCN: $vgpr0 = COPY [[EXTRACT]](p6) - ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; GCN: SI_RETURN implicit $vgpr0 ret i32 addrspace(6)* @internal_constant32 } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll index 9ed3ec23a181..5206284cf572 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll @@ -4,15 +4,13 @@ define float @test_atomicrmw_fadd(float addrspace(3)* %addr) { ; CHECK-LABEL: name: test_atomicrmw_fadd ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[C]] :: (load store seq_cst (s32) on %ir.addr, addrspace 3) ; CHECK-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %oldval = atomicrmw fadd float addrspace(3)* %addr, float 1.0 seq_cst ret float %oldval } @@ -21,10 +19,9 @@ define float @test_atomicrmw_fsub(float addrspace(3)* %addr) { ; CHECK-LABEL: name: test_atomicrmw_fsub ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: successors: %bb.2(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32) from %ir.addr, addrspace 3) @@ -33,8 +30,8 @@ define float @test_atomicrmw_fsub(float addrspace(3)* %addr) { ; CHECK-NEXT: bb.2.atomicrmw.start: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %9(s64), %bb.2, [[C1]](s64), %bb.1 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %7(s32), %bb.2 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %8(s64), %bb.2, [[C1]](s64), %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %6(s32), %bb.2 ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[PHI1]], [[C]] ; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst (s32) on %ir.2, addrspace 3) ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64) @@ -47,8 +44,7 @@ define float @test_atomicrmw_fsub(float addrspace(3)* %addr) { ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(s64) = G_PHI [[INT]](s64), %bb.2 ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[PHI2]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %oldval = atomicrmw fsub float addrspace(3)* %addr, float 1.0 seq_cst ret float %oldval } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll index 2e647da9b5df..7015bd8f9c96 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll @@ -125,7 +125,7 @@ define amdgpu_kernel void @kernel_call_no_other_sgprs() { define void @func_call_no_workitem_ids() { ; CHECK-LABEL: name: func_call_no_workitem_ids ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -134,29 +134,27 @@ define void @func_call_no_workitem_ids() { ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY15]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY11]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY13]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY7]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY16]] + ; CHECK-NEXT: SI_RETURN call void @extern() "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" ret void } @@ -164,32 +162,30 @@ define void @func_call_no_workitem_ids() { define void @func_call_no_workgroup_ids() { ; CHECK-LABEL: name: func_call_no_workgroup_ids ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY11]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY6]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY7]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](s64) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY10]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p4) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY10]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY5]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY6]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY7]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY8]](s64) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY9]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY12]] + ; CHECK-NEXT: SI_RETURN call void @extern() "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" ret void } @@ -197,23 +193,21 @@ define void @func_call_no_workgroup_ids() { define void @func_call_no_other_sgprs() { ; CHECK-LABEL: name: func_call_no_other_sgprs ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr31, $sgpr8_sgpr9, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr31, $sgpr8_sgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY3]](p4) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY4]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY2]](p4) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY3]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY6]] + ; CHECK-NEXT: SI_RETURN call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll index 00f2e4b620c2..851e893bead7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll @@ -116,7 +116,7 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 { define void @test_func_call_external_void_func_i32() #0 { ; GFX900-LABEL: name: test_func_call_external_void_func_i32 ; GFX900: bb.1 (%ir-block.0): - ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -126,36 +126,34 @@ define void @test_func_call_external_void_func_i32() #0 { ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GFX900-NEXT: $vgpr31 = COPY [[COPY15]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GFX900-NEXT: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; GFX900-NEXT: S_SETPC_B64_return [[COPY18]] + ; GFX900-NEXT: SI_RETURN ; GFX908-LABEL: name: test_func_call_external_void_func_i32 ; GFX908: bb.1 (%ir-block.0): - ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -165,33 +163,31 @@ define void @test_func_call_external_void_func_i32() #0 { ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; GFX908-NEXT: $vgpr31 = COPY [[COPY15]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GFX908-NEXT: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; GFX908-NEXT: S_SETPC_B64_return [[COPY18]] + ; GFX908-NEXT: SI_RETURN call void @external_void_func_i32(i32 99) ret void } @@ -378,7 +374,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 { define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX900-LABEL: name: test_func_call_external_void_func_v32i32 ; GFX900: bb.1 (%ir-block.1): - ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -439,23 +435,22 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX900-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 ; GFX900-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32) ; GFX900-NEXT: [[TRUNC33:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC32]](s16) - ; GFX900-NEXT: [[COPY25:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX900-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 - ; GFX900-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[COPY25:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX900-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX900-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GFX900-NEXT: [[COPY28:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX900-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX900-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) - ; GFX900-NEXT: [[COPY34:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GFX900-NEXT: [[COPY33:%[0-9]+]]:_(p5) = COPY $sgpr32 ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY34]], [[C1]](s32) + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY33]], [[C1]](s32) ; GFX900-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; GFX900-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX900-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -488,23 +483,22 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX900-NEXT: $vgpr28 = COPY [[UV28]](s32) ; GFX900-NEXT: $vgpr29 = COPY [[UV29]](s32) ; GFX900-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX900-NEXT: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY26]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY27]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY28]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY29]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY30]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY31]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY32]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[COPY33]](s32) + ; GFX900-NEXT: [[COPY34:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY34]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY25]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY26]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY27]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY28]](s64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY29]](s32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY30]](s32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY31]](s32) + ; GFX900-NEXT: $vgpr31 = COPY [[COPY32]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc - ; GFX900-NEXT: [[COPY36:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY25]] - ; GFX900-NEXT: S_SETPC_B64_return [[COPY36]] + ; GFX900-NEXT: SI_RETURN ; GFX908-LABEL: name: test_func_call_external_void_func_v32i32 ; GFX908: bb.1 (%ir-block.1): - ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -565,23 +559,22 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX908-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 ; GFX908-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32) ; GFX908-NEXT: [[TRUNC33:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC32]](s16) - ; GFX908-NEXT: [[COPY25:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX908-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 - ; GFX908-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[COPY25:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX908-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX908-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY28:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX908-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX908-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) - ; GFX908-NEXT: [[COPY34:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GFX908-NEXT: [[COPY33:%[0-9]+]]:_(p5) = COPY $sgpr32 ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY34]], [[C1]](s32) + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY33]], [[C1]](s32) ; GFX908-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; GFX908-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX908-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -614,20 +607,19 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX908-NEXT: $vgpr28 = COPY [[UV28]](s32) ; GFX908-NEXT: $vgpr29 = COPY [[UV29]](s32) ; GFX908-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX908-NEXT: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY26]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY27]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY28]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY29]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY30]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY31]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY32]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[COPY33]](s32) + ; GFX908-NEXT: [[COPY34:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY34]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY25]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY26]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY27]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY28]](s64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY29]](s32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY30]](s32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY31]](s32) + ; GFX908-NEXT: $vgpr31 = COPY [[COPY32]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc - ; GFX908-NEXT: [[COPY36:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY25]] - ; GFX908-NEXT: S_SETPC_B64_return [[COPY36]] + ; GFX908-NEXT: SI_RETURN call void @external_void_func_v32i32(<32 x i32> zeroinitializer) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll index 67cb7683fbf5..cb06c4752520 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll @@ -11,17 +11,13 @@ declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] + ; CHECK-NEXT: SI_RETURN call amdgpu_gfx void @external_gfx_void_func_void() ret void } @@ -29,20 +25,18 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 { define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32 ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY3]] + ; CHECK-NEXT: SI_RETURN call amdgpu_gfx void @external_gfx_void_func_i32(i32 42) ret void } @@ -50,20 +44,18 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 { define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg) #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm_inreg ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr4, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg ; CHECK-NEXT: $sgpr4 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY3]] + ; CHECK-NEXT: SI_RETURN call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42) ret void } @@ -71,9 +63,6 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) @@ -86,12 +75,11 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] + ; CHECK-NEXT: SI_RETURN %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val) @@ -101,9 +89,6 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32_inreg ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) @@ -116,12 +101,11 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) ; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32) ; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] + ; CHECK-NEXT: SI_RETURN %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll index 17d8f497cbcd..1ae8ee55c787 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -129,24 +129,22 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(i32 addrspace(1)* %out) #0 { ; GCN-LABEL: name: test_gfx_call_external_i32_func_i32_imm ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_i32 ; GCN-NEXT: $vgpr0 = COPY [[C]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) - ; GCN-NEXT: [[COPY5:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY2]] - ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY5]] + ; GCN-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i32 @external_gfx_i32_func_i32(i32 42) store volatile i32 %val, i32 addrspace(1)* %out ret void @@ -212,21 +210,17 @@ define amdgpu_kernel void @test_call_external_i1_func_void() #0 { define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 { ; GCN-LABEL: name: test_gfx_call_external_i1_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr30_sgpr31 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i1_func_void - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY3]] + ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i1 @external_gfx_i1_func_void() store volatile i1 %val, i1 addrspace(1)* undef ret void @@ -411,22 +405,18 @@ define amdgpu_kernel void @test_call_external_i8_func_void() #0 { define amdgpu_gfx void @test_gfx_call_external_i8_func_void() #0 { ; GCN-LABEL: name: test_gfx_call_external_i8_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr30_sgpr31 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i8_func_void - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i8_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY3]] + ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i8 @external_gfx_i8_func_void() store volatile i8 %val, i8 addrspace(1)* undef ret void @@ -786,20 +776,16 @@ define amdgpu_kernel void @test_call_external_i32_func_void() #0 { define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 { ; GCN-LABEL: name: test_gfx_call_external_i32_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr30_sgpr31 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_void - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY2]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY3]] + ; GCN-NEXT: G_STORE [[COPY1]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx i32 @external_gfx_i32_func_void() store volatile i32 %val, i32 addrspace(1)* undef ret void @@ -2509,25 +2495,21 @@ define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 { define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 { ; GCN-LABEL: name: test_gfx_call_external_i32_i64_func_void ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $sgpr30_sgpr31 - ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_i64_func_void - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_i64_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[MV]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: [[COPY6:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY6]] + ; GCN-NEXT: G_STORE [[COPY2]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[MV]](s64), [[COPY]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: SI_RETURN %val = call amdgpu_gfx { i32, i64 } @external_gfx_i32_i64_func_void() %val.0 = extractvalue { i32, i64 } %val, 0 %val.1 = extractvalue { i32, i64 } %val, 1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index d81a40bfe6d0..9a7552591c5a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -149,17 +149,13 @@ define amdgpu_kernel void @test_call_external_void_func_void() #0 { define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] + ; CHECK-NEXT: SI_RETURN call amdgpu_gfx void @external_gfx_void_func_void() ret void } @@ -167,7 +163,7 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 { define void @test_func_call_external_void_func_void() #0 { ; CHECK-LABEL: name: test_func_call_external_void_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -177,31 +173,29 @@ define void @test_func_call_external_void_func_void() #0 { ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_void - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY18]] + ; CHECK-NEXT: SI_RETURN call void @external_void_func_void() ret void } @@ -889,20 +883,18 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 { define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32 ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY3]] + ; CHECK-NEXT: SI_RETURN call amdgpu_gfx void @external_gfx_void_func_i32(i32 42) ret void } @@ -910,20 +902,18 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 { define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg) #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm_inreg ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr4, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg ; CHECK-NEXT: $sgpr4 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY3]] + ; CHECK-NEXT: SI_RETURN call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42) ret void } @@ -3876,9 +3866,6 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) @@ -3891,12 +3878,11 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] + ; CHECK-NEXT: SI_RETURN %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val) @@ -3906,9 +3892,6 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32_inreg ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) @@ -3921,12 +3904,11 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) ; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32) ; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] + ; CHECK-NEXT: SI_RETURN %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val) @@ -4007,7 +3989,7 @@ declare void @void_func_byval_a3i32_byval_i8_align32([3 x i32] addrspace(5)* byv define void @call_byval_3ai32_byval_i8_align32([3 x i32] addrspace(5)* %incoming0, i8 addrspace(5)* align 32 %incoming1) #0 { ; CHECK-LABEL: name: call_byval_3ai32_byval_i8_align32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4019,42 +4001,40 @@ define void @call_byval_3ai32_byval_i8_align32([3 x i32] addrspace(5)* %incoming ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 999 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a3i32_byval_i8_align32 - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY18]], [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C2]](s32), 0 :: (dereferenceable store (s96) into stack, align 4, addrspace 5), (dereferenceable load (s96) from %ir.incoming0, align 4, addrspace 5) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C3]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY18]], [[C3]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: G_MEMCPY [[PTR_ADD1]](p5), [[COPY9]](p5), [[C4]](s32), 0 :: (dereferenceable store (s8) into stack + 32, align 32, addrspace 5), (dereferenceable load (s8) from %ir.incoming1, align 32, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a3i32_byval_i8_align32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY21]] + ; CHECK-NEXT: SI_RETURN call void @void_func_byval_a3i32_byval_i8_align32([3 x i32] addrspace(5)* byval([3 x i32]) %incoming0, i8 addrspace(5)* align 32 %incoming1, i32 999) ret void } @@ -4066,7 +4046,7 @@ declare void @void_func_byval_a4i64_align4([4 x i64] addrspace(5)* byval([4 x i6 define void @call_byval_a4i64_align4_higher_source_align([4 x i64] addrspace(5)* align 256 %incoming_high_align) #0 { ; CHECK-LABEL: name: call_byval_a4i64_align4_higher_source_align ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4077,36 +4057,34 @@ define void @call_byval_a4i64_align4_higher_source_align([4 x i64] addrspace(5)* ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a4i64_align4 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY18]], [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C1]](s32), 0 :: (dereferenceable store (s256) into stack, align 4, addrspace 5), (dereferenceable load (s256) from %ir.incoming_high_align, align 256, addrspace 5) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 32, implicit-def $scc - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY20]] + ; CHECK-NEXT: SI_RETURN call void @void_func_byval_a4i64_align4([4 x i64] addrspace(5)* byval([4 x i64]) align 4 %incoming_high_align) ret void } @@ -4604,7 +4582,7 @@ entry: define void @stack_12xv3i32() #0 { ; CHECK-LABEL: name: stack_12xv3i32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4614,7 +4592,6 @@ define void @stack_12xv3i32() #0 { ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -4645,14 +4622,14 @@ define void @stack_12xv3i32() #0 { ; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3i32 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>) ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>) @@ -4664,22 +4641,22 @@ define void @stack_12xv3i32() #0 { ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>) ; CHECK-NEXT: [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>) ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C16]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C17]](s32) ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK-NEXT: [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C18]](s32) ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C19]](s32) ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -4712,20 +4689,19 @@ define void @stack_12xv3i32() #0 { ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY19]] + ; CHECK-NEXT: SI_RETURN entry: call void @external_void_func_12xv3i32( <3 x i32> , @@ -4746,7 +4722,7 @@ entry: define void @stack_12xv3f32() #0 { ; CHECK-LABEL: name: stack_12xv3f32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4756,7 +4732,6 @@ define void @stack_12xv3f32() #0 { ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 @@ -4787,14 +4762,14 @@ define void @stack_12xv3f32() #0 { ; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3f32 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>) ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>) @@ -4806,22 +4781,22 @@ define void @stack_12xv3f32() #0 { ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>) ; CHECK-NEXT: [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>) ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C16]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C17]](s32) ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK-NEXT: [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C18]](s32) ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C19]](s32) ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -4854,20 +4829,19 @@ define void @stack_12xv3f32() #0 { ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY19]] + ; CHECK-NEXT: SI_RETURN entry: call void @external_void_func_12xv3f32( <3 x float> , @@ -4888,7 +4862,7 @@ entry: define void @stack_8xv5i32() #0 { ; CHECK-LABEL: name: stack_8xv5i32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4898,7 +4872,6 @@ define void @stack_8xv5i32() #0 { ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -4925,14 +4898,14 @@ define void @stack_8xv5i32() #0 { ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5i32 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>) ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>) @@ -4940,34 +4913,34 @@ define void @stack_8xv5i32() #0 { ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>) ; CHECK-NEXT: [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>) ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C16]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C17]](s32) ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C18]](s32) ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C19]](s32) ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK-NEXT: [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C21]](s32) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C21]](s32) ; CHECK-NEXT: G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store (s32) into stack + 20, addrspace 5) ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C22]](s32) + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C22]](s32) ; CHECK-NEXT: G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store (s32) into stack + 24, align 8, addrspace 5) ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C23]](s32) + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C23]](s32) ; CHECK-NEXT: G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store (s32) into stack + 28, addrspace 5) ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C24]](s32) + ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C24]](s32) ; CHECK-NEXT: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -5000,20 +4973,19 @@ define void @stack_8xv5i32() #0 { ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY19]] + ; CHECK-NEXT: SI_RETURN entry: call void @external_void_func_8xv5i32( <5 x i32> , @@ -5030,7 +5002,7 @@ entry: define void @stack_8xv5f32() #0 { ; CHECK-LABEL: name: stack_8xv5f32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -5040,7 +5012,6 @@ define void @stack_8xv5f32() #0 { ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 @@ -5067,14 +5038,14 @@ define void @stack_8xv5f32() #0 { ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5f32 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>) ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>) @@ -5082,34 +5053,34 @@ define void @stack_8xv5f32() #0 { ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>) ; CHECK-NEXT: [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>) ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C16]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C17]](s32) ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C18]](s32) ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C19]](s32) ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK-NEXT: [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C21]](s32) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C21]](s32) ; CHECK-NEXT: G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store (s32) into stack + 20, addrspace 5) ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C22]](s32) + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C22]](s32) ; CHECK-NEXT: G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store (s32) into stack + 24, align 8, addrspace 5) ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C23]](s32) + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C23]](s32) ; CHECK-NEXT: G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store (s32) into stack + 28, addrspace 5) ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C24]](s32) + ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C24]](s32) ; CHECK-NEXT: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -5142,20 +5113,19 @@ define void @stack_8xv5f32() #0 { ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY19]] + ; CHECK-NEXT: SI_RETURN entry: call void @external_void_func_8xv5f32( <5 x float> , diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll index 78ecae87d06f..138e66ffd6c7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll @@ -6,21 +6,17 @@ define i32 @test() { ; CHECK-LABEL: name: test ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[C]](s32) ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[INTTOPTR]](p0), [[GV]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY4]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 ret i32 bitcast (<1 x i32> bitcast (i32 zext (i1 icmp eq (i32* @var, i32* inttoptr (i32 -1 to i32*)) to i32) to <1 x i32>), i64 0)> to i32) } @@ -66,9 +62,6 @@ define amdgpu_kernel void @constantexpr_select_1() { define i32 @test_fcmp_constexpr() { ; CHECK-LABEL: name: test_fcmp_constexpr ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[C]](s64) @@ -79,8 +72,7 @@ define i32 @test_fcmp_constexpr() { ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UITOFP]](s32), [[C1]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FCMP]](s1) ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: ret i32 zext (i1 fcmp oeq (float uitofp (i1 icmp eq (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @a, i64 0, i64 1), i32* @var) to float), float 0.000000e+00) to i32) } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll index 10218b9a88d0..e01d9c399e93 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll @@ -4,15 +4,13 @@ define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_strict ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = G_STRICT_FADD [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") ret float %val } @@ -20,15 +18,13 @@ define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) #0 { define float @v_constained_fadd_f32_fpexcept_strict_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_strict_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = nsz G_STRICT_FADD [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") ret float %val } @@ -36,15 +32,13 @@ define float @v_constained_fadd_f32_fpexcept_strict_flags(float %x, float %y) #0 define float @v_constained_fadd_f32_fpexcept_ignore(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_ignore ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %3:_(s32) = nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %3(s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: %2:_(s32) = nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -52,15 +46,13 @@ define float @v_constained_fadd_f32_fpexcept_ignore(float %x, float %y) #0 { define float @v_constained_fadd_f32_fpexcept_ignore_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %3(s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -68,15 +60,13 @@ define float @v_constained_fadd_f32_fpexcept_ignore_flags(float %x, float %y) #0 define float @v_constained_fadd_f32_fpexcept_maytrap(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_maytrap ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = G_STRICT_FADD [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") ret float %val } @@ -84,7 +74,7 @@ define float @v_constained_fadd_f32_fpexcept_maytrap(float %x, float %y) #0 { define <2 x float> @v_constained_fadd_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_v2f32_fpexcept_strict ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -92,13 +82,11 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_strict(<2 x float> %x, <2 x ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(<2 x s32>) = G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") ret <2 x float> %val } @@ -106,7 +94,7 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_strict(<2 x float> %x, <2 x define <2 x float> @v_constained_fadd_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_v2f32_fpexcept_ignore ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -114,13 +102,11 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_ignore(<2 x float> %x, <2 x ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %7:_(<2 x s32>) = nofpexcept G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %7(<2 x s32>) + ; CHECK-NEXT: %6:_(<2 x s32>) = nofpexcept G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %6(<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x float> %val } @@ -128,7 +114,7 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_ignore(<2 x float> %x, <2 x define <2 x float> @v_constained_fadd_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_v2f32_fpexcept_maytrap ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -136,13 +122,11 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_maytrap(<2 x float> %x, <2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(<2 x s32>) = G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") ret <2 x float> %val } @@ -150,15 +134,13 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_maytrap(<2 x float> %x, <2 define float @v_constained_fsub_f32_fpexcept_ignore_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fsub_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FSUB [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %3(s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FSUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -166,15 +148,13 @@ define float @v_constained_fsub_f32_fpexcept_ignore_flags(float %x, float %y) #0 define float @v_constained_fmul_f32_fpexcept_ignore_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fmul_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %3(s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FMUL [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -182,15 +162,13 @@ define float @v_constained_fmul_f32_fpexcept_ignore_flags(float %x, float %y) #0 define float @v_constained_fdiv_f32_fpexcept_ignore_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fdiv_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FDIV [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %3(s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FDIV [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -198,15 +176,13 @@ define float @v_constained_fdiv_f32_fpexcept_ignore_flags(float %x, float %y) #0 define float @v_constained_frem_f32_fpexcept_ignore_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_frem_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FREM [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %3(s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FREM [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %2(s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.frem.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -214,16 +190,14 @@ define float @v_constained_frem_f32_fpexcept_ignore_flags(float %x, float %y) #0 define float @v_constained_fma_f32_fpexcept_ignore_flags(float %x, float %y, float %z) #0 { ; CHECK-LABEL: name: v_constained_fma_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: %4:_(s32) = nsz nofpexcept G_STRICT_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY %4(s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -231,14 +205,12 @@ define float @v_constained_fma_f32_fpexcept_ignore_flags(float %x, float %y, flo define float @v_constained_sqrt_f32_fpexcept_strict(float %x) #0 { ; CHECK-LABEL: name: v_constained_sqrt_f32_fpexcept_strict ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FSQRT:%[0-9]+]]:_(s32) = G_STRICT_FSQRT [[COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FSQRT]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %val = call float @llvm.experimental.constrained.sqrt.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") ret float %val } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll index ceba50dc1b7b..af0dd868c909 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll @@ -7,7 +7,7 @@ define void @void_a31i32_i32([31 x i32] %arg0, i32 %arg1) { ; FIXED-LABEL: name: void_a31i32_i32 ; FIXED: bb.1 (%ir-block.0): - ; FIXED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; FIXED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; FIXED-NEXT: {{ $}} ; FIXED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; FIXED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -42,11 +42,9 @@ define void @void_a31i32_i32([31 x i32] %arg0, i32 %arg1) { ; FIXED-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; FIXED-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; FIXED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) - ; FIXED-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; FIXED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; FIXED-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; FIXED-NEXT: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; FIXED-NEXT: S_SETPC_B64_return [[COPY32]] + ; FIXED-NEXT: SI_RETURN store i32 %arg1, i32 addrspace(1)* undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll index 70a41f9dafaa..fa5daf763c3e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -8,14 +8,12 @@ define void @void_func_empty_arg({} %arg0, i32 %arg1) #0 { ; CHECK-LABEL: name: void_func_empty_arg ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN store i32 %arg1, i32 addrspace(1)* undef ret void } @@ -23,14 +21,12 @@ define void @void_func_empty_arg({} %arg0, i32 %arg1) #0 { define void @void_func_empty_array([0 x i8] %arg0, i32 %arg1) #0 { ; CHECK-LABEL: name: void_func_empty_array ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN store i32 %arg1, i32 addrspace(1)* undef ret void } @@ -38,15 +34,13 @@ define void @void_func_empty_array([0 x i8] %arg0, i32 %arg1) #0 { define void @void_func_i1(i1 %arg0) #0 { ; CHECK-LABEL: name: void_func_i1 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (store (s1) into `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN store i1 %arg0, i1 addrspace(1)* undef ret void } @@ -54,19 +48,17 @@ define void @void_func_i1(i1 %arg0) #0 { define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i1_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN %ext = zext i1 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -76,19 +68,17 @@ define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 { define void @void_func_i1_signext(i1 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i1_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN %ext = sext i1 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -99,11 +89,10 @@ define void @i1_arg_i1_use(i1 %arg) #0 { ; CHECK-LABEL: name: i1_arg_i1_use ; CHECK: bb.1.bb: ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -120,8 +109,7 @@ define void @i1_arg_i1_use(i1 %arg) #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.bb2: ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s64) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN bb: br i1 %arg, label %bb2, label %bb1 @@ -136,16 +124,14 @@ bb2: define void @void_func_i8(i8 %arg0) #0 { ; CHECK-LABEL: name: void_func_i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN store i8 %arg0, i8 addrspace(1)* undef ret void } @@ -153,19 +139,17 @@ define void @void_func_i8(i8 %arg0) #0 { define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i8_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 8 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN %ext = zext i8 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -175,19 +159,17 @@ define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 { define void @void_func_i8_signext(i8 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i8_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 8 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN %ext = sext i8 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -197,15 +179,13 @@ define void @void_func_i8_signext(i8 signext %arg0) #0 { define void @void_func_i16(i16 %arg0) #0 { ; CHECK-LABEL: name: void_func_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN store i16 %arg0, i16 addrspace(1)* undef ret void } @@ -213,19 +193,17 @@ define void @void_func_i16(i16 %arg0) #0 { define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i16_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN %ext = zext i16 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -235,19 +213,17 @@ define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 { define void @void_func_i16_signext(i16 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i16_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 16 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN %ext = sext i16 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -257,15 +233,13 @@ define void @void_func_i16_signext(i16 signext %arg0) #0 { define void @void_func_i24(i24 %arg0) #0 { ; CHECK-LABEL: name: void_func_i24 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN store i24 %arg0, i24 addrspace(1)* undef ret void } @@ -273,16 +247,14 @@ define void @void_func_i24(i24 %arg0) #0 { define void @void_func_i24_zeroext(i24 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i24_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 24 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_ZEXT]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN store i24 %arg0, i24 addrspace(1)* undef ret void } @@ -290,16 +262,14 @@ define void @void_func_i24_zeroext(i24 zeroext %arg0) #0 { define void @void_func_i24_signext(i24 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i24_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 24 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_SEXT]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN store i24 %arg0, i24 addrspace(1)* undef ret void } @@ -307,14 +277,12 @@ define void @void_func_i24_signext(i24 signext %arg0) #0 { define void @void_func_i32(i32 %arg0) #0 { ; CHECK-LABEL: name: void_func_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN store i32 %arg0, i32 addrspace(1)* undef ret void } @@ -323,14 +291,12 @@ define void @void_func_i32(i32 %arg0) #0 { define void @void_func_i32_signext(i32 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i32_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN store i32 %arg0, i32 addrspace(1)* undef ret void } @@ -339,14 +305,12 @@ define void @void_func_i32_signext(i32 signext %arg0) #0 { define void @void_func_i32_zeroext(i32 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i32_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN store i32 %arg0, i32 addrspace(1)* undef ret void } @@ -354,14 +318,12 @@ define void @void_func_i32_zeroext(i32 zeroext %arg0) #0 { define void @void_func_p3i8(i8 addrspace(3)* %arg0) #0 { ; CHECK-LABEL: name: void_func_p3i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[COPY]](p3), [[DEF]](p1) :: (store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN store i8 addrspace(3)* %arg0, i8 addrspace(3)* addrspace(1)* undef ret void } @@ -369,17 +331,15 @@ define void @void_func_p3i8(i8 addrspace(3)* %arg0) #0 { define void @void_func_i48(i48 %arg0) #0 { ; CHECK-LABEL: name: void_func_i48 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (store (s48) into `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: SI_RETURN store i48 %arg0, i48 addrspace(1)* undef ret void } @@ -387,20 +347,18 @@ define void @void_func_i48(i48 %arg0) #0 { define void @void_func_i48_zeroext(i48 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i48_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ZEXT]], [[C]] ; CHECK-NEXT: G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: SI_RETURN %ext = zext i48 %arg0 to i64 %add = add i64 %ext, 12 store i64 %add, i64 addrspace(1)* undef @@ -410,20 +368,18 @@ define void @void_func_i48_zeroext(i48 zeroext %arg0) #0 { define void @void_func_i48_signext(i48 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i48_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[SEXT]], [[C]] ; CHECK-NEXT: G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: SI_RETURN %ext = sext i48 %arg0 to i64 %add = add i64 %ext, 12 store i64 %add, i64 addrspace(1)* undef @@ -433,16 +389,14 @@ define void @void_func_i48_signext(i48 signext %arg0) #0 { define void @void_func_i64(i64 %arg0) #0 { ; CHECK-LABEL: name: void_func_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: SI_RETURN store i64 %arg0, i64 addrspace(1)* undef ret void } @@ -450,18 +404,16 @@ define void @void_func_i64(i64 %arg0) #0 { define void @void_func_i95(i95 %arg0) #0 { ; CHECK-LABEL: name: void_func_i95 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s95), [[DEF]](p1) :: (store (s95) into `i95 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: SI_RETURN store i95 %arg0, i95 addrspace(1)* undef ret void } @@ -469,21 +421,19 @@ define void @void_func_i95(i95 %arg0) #0 { define void @void_func_i95_zeroext(i95 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i95_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[TRUNC]](s95) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s96) = G_ADD [[ZEXT]], [[C]] ; CHECK-NEXT: G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: SI_RETURN %ext = zext i95 %arg0 to i96 %add = add i96 %ext, 12 store i96 %add, i96 addrspace(1)* undef @@ -493,21 +443,19 @@ define void @void_func_i95_zeroext(i95 zeroext %arg0) #0 { define void @void_func_i95_signext(i95 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i95_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[TRUNC]](s95) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s96) = G_ADD [[SEXT]], [[C]] ; CHECK-NEXT: G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: SI_RETURN %ext = sext i95 %arg0 to i96 %add = add i96 %ext, 12 store i96 %add, i96 addrspace(1)* undef @@ -517,17 +465,15 @@ define void @void_func_i95_signext(i95 signext %arg0) #0 { define void @void_func_i96(i96 %arg0) #0 { ; CHECK-LABEL: name: void_func_i96 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: SI_RETURN store i96 %arg0, i96 addrspace(1)* undef ret void } @@ -535,16 +481,14 @@ define void @void_func_i96(i96 %arg0) #0 { define void @void_func_p0i8(i8* %arg0) #0 { ; CHECK-LABEL: name: void_func_p0i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](p0), [[DEF]](p1) :: (store (p0) into `i8* addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: SI_RETURN store i8* %arg0, i8* addrspace(1)* undef ret void } @@ -552,16 +496,14 @@ define void @void_func_p0i8(i8* %arg0) #0 { define void @void_func_p1i8(i8 addrspace(1)* %arg0) #0 { ; CHECK-LABEL: name: void_func_p1i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](p1), [[DEF]](p1) :: (store (p1) into `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: SI_RETURN store i8 addrspace(1)* %arg0, i8 addrspace(1)* addrspace(1)* undef ret void } @@ -569,15 +511,13 @@ define void @void_func_p1i8(i8 addrspace(1)* %arg0) #0 { define void @void_func_f16(half %arg0) #0 { ; CHECK-LABEL: name: void_func_f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `half addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN store half %arg0, half addrspace(1)* undef ret void } @@ -585,14 +525,12 @@ define void @void_func_f16(half %arg0) #0 { define void @void_func_f32(float %arg0) #0 { ; CHECK-LABEL: name: void_func_f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `float addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN store float %arg0, float addrspace(1)* undef ret void } @@ -600,16 +538,14 @@ define void @void_func_f32(float %arg0) #0 { define void @void_func_f64(double %arg0) #0 { ; CHECK-LABEL: name: void_func_f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `double addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: SI_RETURN store double %arg0, double addrspace(1)* undef ret void } @@ -617,16 +553,14 @@ define void @void_func_f64(double %arg0) #0 { define void @void_func_v2i32(<2 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: SI_RETURN store <2 x i32> %arg0, <2 x i32> addrspace(1)* undef ret void } @@ -634,17 +568,15 @@ define void @void_func_v2i32(<2 x i32> %arg0) #0 { define void @void_func_v2i24(<2 x i24> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2i24 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s24>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](<2 x s24>), [[DEF]](p1) :: (store (<2 x s24>) into `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: SI_RETURN store <2 x i24> %arg0, <2 x i24> addrspace(1)* undef ret void } @@ -652,18 +584,16 @@ define void @void_func_v2i24(<2 x i24> %arg0) #0 { define void @void_func_v3i24(<3 x i24> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3i24 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s24>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](<3 x s24>), [[DEF]](p1) :: (store (<3 x s24>) into `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: SI_RETURN store <3 x i24> %arg0, <3 x i24> addrspace(1)* undef ret void } @@ -671,7 +601,7 @@ define void @void_func_v3i24(<3 x i24> %arg0) #0 { define void @void_func_v2i8(<2 x i8> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -679,11 +609,9 @@ define void @void_func_v2i8(<2 x i8> %arg0) #0 { ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s16>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC2]](<2 x s8>), [[DEF]](p1) :: (store (<2 x s8>) into `<2 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: SI_RETURN store <2 x i8> %arg0, <2 x i8> addrspace(1)* undef ret void } @@ -691,7 +619,7 @@ define void @void_func_v2i8(<2 x i8> %arg0) #0 { define void @void_func_v3i8(<3 x i8> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -701,11 +629,9 @@ define void @void_func_v3i8(<3 x i8> %arg0) #0 { ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s16>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC3]](<3 x s8>), [[DEF]](p1) :: (store (<3 x s8>) into `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: SI_RETURN store <3 x i8> %arg0, <3 x i8> addrspace(1)* undef ret void } @@ -713,7 +639,7 @@ define void @void_func_v3i8(<3 x i8> %arg0) #0 { define void @void_func_v4i8(<4 x i8> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -725,11 +651,9 @@ define void @void_func_v4i8(<4 x i8> %arg0) #0 { ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC4]](<4 x s8>), [[DEF]](p1) :: (store (<4 x s8>) into `<4 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: SI_RETURN store <4 x i8> %arg0, <4 x i8> addrspace(1)* undef ret void } @@ -737,16 +661,14 @@ define void @void_func_v4i8(<4 x i8> %arg0) #0 { define void @void_func_v2p3i8(<2 x i8 addrspace(3)*> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2p3i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY]](p3), [[COPY1]](p3) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: SI_RETURN store <2 x i8 addrspace(3)*> %arg0, <2 x i8 addrspace(3)*> addrspace(1)* undef ret void } @@ -754,17 +676,15 @@ define void @void_func_v2p3i8(<2 x i8 addrspace(3)*> %arg0) #0 { define void @void_func_v3i32(<3 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: SI_RETURN store <3 x i32> %arg0, <3 x i32> addrspace(1)* undef ret void } @@ -772,18 +692,16 @@ define void @void_func_v3i32(<3 x i32> %arg0) #0 { define void @void_func_v4i32(<4 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: SI_RETURN store <4 x i32> %arg0, <4 x i32> addrspace(1)* undef ret void } @@ -791,7 +709,7 @@ define void @void_func_v4i32(<4 x i32> %arg0) #0 { define void @void_func_v5i32(<5 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v5i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -799,11 +717,9 @@ define void @void_func_v5i32(<5 x i32> %arg0) #0 { ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (store (<5 x s32>) into `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY6]] + ; CHECK-NEXT: SI_RETURN store <5 x i32> %arg0, <5 x i32> addrspace(1)* undef ret void } @@ -811,7 +727,7 @@ define void @void_func_v5i32(<5 x i32> %arg0) #0 { define void @void_func_v8i32(<8 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -822,11 +738,9 @@ define void @void_func_v8i32(<8 x i32> %arg0) #0 { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY9]] + ; CHECK-NEXT: SI_RETURN store <8 x i32> %arg0, <8 x i32> addrspace(1)* undef ret void } @@ -834,7 +748,7 @@ define void @void_func_v8i32(<8 x i32> %arg0) #0 { define void @void_func_v16i32(<16 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -853,11 +767,9 @@ define void @void_func_v16i32(<16 x i32> %arg0) #0 { ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY17]] + ; CHECK-NEXT: SI_RETURN store <16 x i32> %arg0, <16 x i32> addrspace(1)* undef ret void } @@ -865,7 +777,7 @@ define void @void_func_v16i32(<16 x i32> %arg0) #0 { define void @void_func_v32i32(<32 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v32i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -901,11 +813,9 @@ define void @void_func_v32i32(<32 x i32> %arg0) #0 { ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY32]] + ; CHECK-NEXT: SI_RETURN store <32 x i32> %arg0, <32 x i32> addrspace(1)* undef ret void } @@ -914,7 +824,7 @@ define void @void_func_v32i32(<32 x i32> %arg0) #0 { define void @void_func_v33i32(<33 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v33i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -952,11 +862,9 @@ define void @void_func_v33i32(<33 x i32> %arg0) #0 { ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32), [[LOAD1]](s32) - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<33 x s32>), [[DEF]](p1) :: (store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 256, addrspace 1) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY32]] + ; CHECK-NEXT: SI_RETURN store <33 x i32> %arg0, <33 x i32> addrspace(1)* undef ret void } @@ -964,7 +872,7 @@ define void @void_func_v33i32(<33 x i32> %arg0) #0 { define void @void_func_v2i64(<2 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -973,11 +881,9 @@ define void @void_func_v2i64(<2 x i64> %arg0) #0 { ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: SI_RETURN store <2 x i64> %arg0, <2 x i64> addrspace(1)* undef ret void } @@ -985,7 +891,7 @@ define void @void_func_v2i64(<2 x i64> %arg0) #0 { define void @void_func_v2p0i8(<2 x i8*> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2p0i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -994,11 +900,9 @@ define void @void_func_v2p0i8(<2 x i8*> %arg0) #0 { ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[MV]](p0), [[MV1]](p0) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p0>), [[DEF]](p1) :: (store (<2 x p0>) into `<2 x i8*> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: SI_RETURN store <2 x i8*> %arg0, <2 x i8*> addrspace(1)* undef ret void } @@ -1006,7 +910,7 @@ define void @void_func_v2p0i8(<2 x i8*> %arg0) #0 { define void @void_func_v2p1i8(<2 x i8 addrspace(1)*> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2p1i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1015,11 +919,9 @@ define void @void_func_v2p1i8(<2 x i8 addrspace(1)*> %arg0) #0 { ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: SI_RETURN store <2 x i8 addrspace(1)*> %arg0, <2 x i8 addrspace(1)*> addrspace(1)* undef ret void } @@ -1027,7 +929,7 @@ define void @void_func_v2p1i8(<2 x i8 addrspace(1)*> %arg0) #0 { define void @void_func_v3i64(<3 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1039,11 +941,9 @@ define void @void_func_v3i64(<3 x i64> %arg0) #0 { ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x i64> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY7]] + ; CHECK-NEXT: SI_RETURN store <3 x i64> %arg0, <3 x i64> addrspace(1)* undef ret void } @@ -1051,7 +951,7 @@ define void @void_func_v3i64(<3 x i64> %arg0) #0 { define void @void_func_v4i64(<4 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1066,11 +966,9 @@ define void @void_func_v4i64(<4 x i64> %arg0) #0 { ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY9]] + ; CHECK-NEXT: SI_RETURN store <4 x i64> %arg0, <4 x i64> addrspace(1)* undef ret void } @@ -1078,7 +976,7 @@ define void @void_func_v4i64(<4 x i64> %arg0) #0 { define void @void_func_v5i64(<5 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v5i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1096,11 +994,9 @@ define void @void_func_v5i64(<5 x i64> %arg0) #0 { ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s64>), [[DEF]](p1) :: (store (<5 x s64>) into `<5 x i64> addrspace(1)* undef`, align 64, addrspace 1) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY11]] + ; CHECK-NEXT: SI_RETURN store <5 x i64> %arg0, <5 x i64> addrspace(1)* undef ret void } @@ -1108,7 +1004,7 @@ define void @void_func_v5i64(<5 x i64> %arg0) #0 { define void @void_func_v8i64(<8 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1135,11 +1031,9 @@ define void @void_func_v8i64(<8 x i64> %arg0) #0 { ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY17]] + ; CHECK-NEXT: SI_RETURN store <8 x i64> %arg0, <8 x i64> addrspace(1)* undef ret void } @@ -1147,7 +1041,7 @@ define void @void_func_v8i64(<8 x i64> %arg0) #0 { define void @void_func_v16i64(<16 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1199,11 +1093,9 @@ define void @void_func_v16i64(<16 x i64> %arg0) #0 { ; CHECK-NEXT: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32) ; CHECK-NEXT: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY32]] + ; CHECK-NEXT: SI_RETURN store <16 x i64> %arg0, <16 x i64> addrspace(1)* undef ret void } @@ -1211,14 +1103,12 @@ define void @void_func_v16i64(<16 x i64> %arg0) #0 { define void @void_func_v2i16(<2 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN store <2 x i16> %arg0, <2 x i16> addrspace(1)* undef ret void } @@ -1226,18 +1116,16 @@ define void @void_func_v2i16(<2 x i16> %arg0) #0 { define void @void_func_v3i16(<3 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: SI_RETURN store <3 x i16> %arg0, <3 x i16> addrspace(1)* undef ret void } @@ -1245,16 +1133,14 @@ define void @void_func_v3i16(<3 x i16> %arg0) #0 { define void @void_func_v4i16(<4 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: SI_RETURN store <4 x i16> %arg0, <4 x i16> addrspace(1)* undef ret void } @@ -1262,7 +1148,7 @@ define void @void_func_v4i16(<4 x i16> %arg0) #0 { define void @void_func_v5i16(<5 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v5i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1270,11 +1156,9 @@ define void @void_func_v5i16(<5 x i16> %arg0) #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[UV]](<5 x s16>), [[DEF1]](p1) :: (store (<5 x s16>) into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: SI_RETURN store <5 x i16> %arg0, <5 x i16> addrspace(1)* undef ret void } @@ -1282,18 +1166,16 @@ define void @void_func_v5i16(<5 x i16> %arg0) #0 { define void @void_func_v8i16(<8 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: SI_RETURN store <8 x i16> %arg0, <8 x i16> addrspace(1)* undef ret void } @@ -1301,7 +1183,7 @@ define void @void_func_v8i16(<8 x i16> %arg0) #0 { define void @void_func_v16i16(<16 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1312,11 +1194,9 @@ define void @void_func_v16i16(<16 x i16> %arg0) #0 { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY9]] + ; CHECK-NEXT: SI_RETURN store <16 x i16> %arg0, <16 x i16> addrspace(1)* undef ret void } @@ -1326,7 +1206,7 @@ define void @void_func_v16i16(<16 x i16> %arg0) #0 { define void @void_func_v65i16(<65 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v65i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1366,11 +1246,9 @@ define void @void_func_v65i16(<65 x i16> %arg0) #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<65 x s16>), [[UV1:%[0-9]+]]:_(<65 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>) - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[UV]](<65 x s16>), [[DEF1]](p1) :: (store (<65 x s16>) into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY32]] + ; CHECK-NEXT: SI_RETURN store <65 x i16> %arg0, <65 x i16> addrspace(1)* undef ret void } @@ -1378,16 +1256,14 @@ define void @void_func_v65i16(<65 x i16> %arg0) #0 { define void @void_func_v2f32(<2 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: SI_RETURN store <2 x float> %arg0, <2 x float> addrspace(1)* undef ret void } @@ -1395,17 +1271,15 @@ define void @void_func_v2f32(<2 x float> %arg0) #0 { define void @void_func_v3f32(<3 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: SI_RETURN store <3 x float> %arg0, <3 x float> addrspace(1)* undef ret void } @@ -1413,18 +1287,16 @@ define void @void_func_v3f32(<3 x float> %arg0) #0 { define void @void_func_v4f32(<4 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: SI_RETURN store <4 x float> %arg0, <4 x float> addrspace(1)* undef ret void } @@ -1432,7 +1304,7 @@ define void @void_func_v4f32(<4 x float> %arg0) #0 { define void @void_func_v8f32(<8 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1443,11 +1315,9 @@ define void @void_func_v8f32(<8 x float> %arg0) #0 { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY9]] + ; CHECK-NEXT: SI_RETURN store <8 x float> %arg0, <8 x float> addrspace(1)* undef ret void } @@ -1455,7 +1325,7 @@ define void @void_func_v8f32(<8 x float> %arg0) #0 { define void @void_func_v16f32(<16 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1474,11 +1344,9 @@ define void @void_func_v16f32(<16 x float> %arg0) #0 { ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY17]] + ; CHECK-NEXT: SI_RETURN store <16 x float> %arg0, <16 x float> addrspace(1)* undef ret void } @@ -1486,7 +1354,7 @@ define void @void_func_v16f32(<16 x float> %arg0) #0 { define void @void_func_v2f64(<2 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1495,11 +1363,9 @@ define void @void_func_v2f64(<2 x double> %arg0) #0 { ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: SI_RETURN store <2 x double> %arg0, <2 x double> addrspace(1)* undef ret void } @@ -1507,7 +1373,7 @@ define void @void_func_v2f64(<2 x double> %arg0) #0 { define void @void_func_v3f64(<3 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1519,11 +1385,9 @@ define void @void_func_v3f64(<3 x double> %arg0) #0 { ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x double> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY7]] + ; CHECK-NEXT: SI_RETURN store <3 x double> %arg0, <3 x double> addrspace(1)* undef ret void } @@ -1531,7 +1395,7 @@ define void @void_func_v3f64(<3 x double> %arg0) #0 { define void @void_func_v4f64(<4 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1546,11 +1410,9 @@ define void @void_func_v4f64(<4 x double> %arg0) #0 { ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY9]] + ; CHECK-NEXT: SI_RETURN store <4 x double> %arg0, <4 x double> addrspace(1)* undef ret void } @@ -1558,7 +1420,7 @@ define void @void_func_v4f64(<4 x double> %arg0) #0 { define void @void_func_v8f64(<8 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1585,11 +1447,9 @@ define void @void_func_v8f64(<8 x double> %arg0) #0 { ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY17]] + ; CHECK-NEXT: SI_RETURN store <8 x double> %arg0, <8 x double> addrspace(1)* undef ret void } @@ -1597,7 +1457,7 @@ define void @void_func_v8f64(<8 x double> %arg0) #0 { define void @void_func_v16f64(<16 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1649,11 +1509,9 @@ define void @void_func_v16f64(<16 x double> %arg0) #0 { ; CHECK-NEXT: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32) ; CHECK-NEXT: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY32]] + ; CHECK-NEXT: SI_RETURN store <16 x double> %arg0, <16 x double> addrspace(1)* undef ret void } @@ -1661,14 +1519,12 @@ define void @void_func_v16f64(<16 x double> %arg0) #0 { define void @void_func_v2f16(<2 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN store <2 x half> %arg0, <2 x half> addrspace(1)* undef ret void } @@ -1676,18 +1532,16 @@ define void @void_func_v2f16(<2 x half> %arg0) #0 { define void @void_func_v3f16(<3 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: SI_RETURN store <3 x half> %arg0, <3 x half> addrspace(1)* undef ret void } @@ -1695,16 +1549,14 @@ define void @void_func_v3f16(<3 x half> %arg0) #0 { define void @void_func_v4f16(<4 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: SI_RETURN store <4 x half> %arg0, <4 x half> addrspace(1)* undef ret void } @@ -1712,18 +1564,16 @@ define void @void_func_v4f16(<4 x half> %arg0) #0 { define void @void_func_v8f16(<8 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: SI_RETURN store <8 x half> %arg0, <8 x half> addrspace(1)* undef ret void } @@ -1731,7 +1581,7 @@ define void @void_func_v8f16(<8 x half> %arg0) #0 { define void @void_func_v16f16(<16 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1742,11 +1592,9 @@ define void @void_func_v16f16(<16 x half> %arg0) #0 { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY9]] + ; CHECK-NEXT: SI_RETURN store <16 x half> %arg0, <16 x half> addrspace(1)* undef ret void } @@ -1755,21 +1603,19 @@ define void @void_func_v16f16(<16 x half> %arg0) #0 { define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 { ; CHECK-LABEL: name: void_func_i32_i64_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[MV]](s64), [[COPY5]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](s64), [[COPY4]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY6]] + ; CHECK-NEXT: SI_RETURN store volatile i32 %arg0, i32 addrspace(1)* undef store volatile i64 %arg1, i64 addrspace(1)* undef store volatile i32 %arg2, i32 addrspace(1)* undef @@ -1779,14 +1625,12 @@ define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 { define void @void_func_struct_i32({ i32 } %arg0) #0 { ; CHECK-LABEL: name: void_func_struct_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `{ i32 } addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN store { i32 } %arg0, { i32 } addrspace(1)* undef ret void } @@ -1794,20 +1638,18 @@ define void @void_func_struct_i32({ i32 } %arg0) #0 { define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 { ; CHECK-LABEL: name: void_func_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) ; CHECK-NEXT: G_STORE [[COPY1]](s32), [[PTR_ADD]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: SI_RETURN store { i8, i32 } %arg0, { i8, i32 } addrspace(1)* undef ret void } @@ -1815,11 +1657,8 @@ define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 { define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0) #0 { ; CHECK-LABEL: name: void_func_byval_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 @@ -1829,8 +1668,7 @@ define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: SI_RETURN %arg0.load = load { i8, i32 }, { i8, i32 } addrspace(5)* %arg0 store { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef ret void @@ -1839,14 +1677,13 @@ define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0, { i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg1, i32 %arg2) #0 { ; CHECK-LABEL: name: void_func_byval_struct_i8_i32_x2 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (volatile dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5) @@ -1864,8 +1701,7 @@ define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval({ ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[LOAD3]](s32), [[PTR_ADD3]](p1) :: (volatile store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[DEF1]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: SI_RETURN %arg0.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg0 %arg1.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg1 store volatile { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef @@ -1877,21 +1713,17 @@ define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval({ define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval(i32) %arg0, i64 addrspace(5)* byval(i64) %arg1) #0 { ; CHECK-LABEL: name: void_func_byval_i32_byval_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s64) from %ir.arg1, addrspace 5) ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](s64), [[COPY3]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: G_STORE [[LOAD1]](s64), [[COPY2]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %arg0.load = load i32, i32 addrspace(5)* %arg0 %arg1.load = load i64, i64 addrspace(5)* %arg1 store i32 %arg0.load, i32 addrspace(1)* undef @@ -1902,21 +1734,17 @@ define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval(i32) %arg0, i define void @void_func_byval_i8_align32_i16_align64(i8 addrspace(5)* byval(i8) %arg0, i16 addrspace(5)* byval(i16) align 64 %arg1) #0 { ; CHECK-LABEL: name: void_func_byval_i8_align32_i16_align64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C]](p1) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, addrspace 5) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[C]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](s16), [[COPY3]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: G_STORE [[LOAD1]](s16), [[COPY2]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %arg0.load = load i8, i8 addrspace(5)* %arg0 %arg1.load = load i16, i16 addrspace(5)* %arg1 store i8 %arg0.load, i8 addrspace(1)* null @@ -1928,15 +1756,12 @@ define void @void_func_byval_i8_align32_i16_align64(i8 addrspace(5)* byval(i8) % define void @byval_a3i32_align128_byval_i16_align64([3 x i32] addrspace(5)* byval([3 x i32]) align 128 %arg0, i16 addrspace(5)* byval(i16) align 64 %arg1) #0 { ; CHECK-LABEL: name: byval_a3i32_align128_byval_i16_align64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C]](p1) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) @@ -1952,9 +1777,8 @@ define void @byval_a3i32_align128_byval_i16_align64([3 x i32] addrspace(5)* byva ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[C]], [[C4]](s64) ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[PTR_ADD3]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null` + 8, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[COPY3]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[COPY2]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %arg0.load = load [3 x i32], [3 x i32] addrspace(5)* %arg0 %arg1.load = load i16, i16 addrspace(5)* %arg1 store [3 x i32] %arg0.load, [3 x i32] addrspace(1)* null @@ -1966,7 +1790,7 @@ define void @byval_a3i32_align128_byval_i16_align64([3 x i32] addrspace(5)* byva define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, i8 addrspace(5)* byval(i8) align 8 %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_i32_byval_i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2006,14 +1830,12 @@ define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, i8 addrs ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX2]](p5) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[C]](p1) ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg2, addrspace 5) - ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[COPY33]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY34]] + ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[COPY32]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i32 %arg1, i32 addrspace(1)* null %arg2.load = load i8, i8 addrspace(5)* %arg2 store i8 %arg2.load, i8 addrspace(1)* null @@ -2024,7 +1846,7 @@ define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, i8 addrs define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, i8 addrspace(5)* byval(i8) %arg1, i32 %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_byval_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2064,14 +1886,12 @@ define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, i8 addrspace(5)* by ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5) - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[C]](p1) ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg1, addrspace 5) - ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[COPY33]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY34]] + ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[COPY32]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store i32 %arg2, i32 addrspace(1)* null %arg1.load = load i8, i8 addrspace(5)* %arg1 store i8 %arg1.load, i8 addrspace(1)* null @@ -2081,7 +1901,7 @@ define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, i8 addrspace(5)* by define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_i32_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2124,15 +1944,13 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[COPY32]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[MV]](s64), [[COPY33]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY34]] + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[COPY31]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](s64), [[COPY32]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile i32 %arg1, i32 addrspace(1)* undef store volatile i64 %arg2, i64 addrspace(1)* undef @@ -2143,7 +1961,7 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4) #0 { ; CHECK-LABEL: name: void_func_v32i32_i1_i8_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2189,19 +2007,17 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1 ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s16) from %fixed-stack.1, align 4, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s16) from %fixed-stack.0, align 16, addrspace 5) - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY35:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[TRUNC]](s1), [[COPY32]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[COPY33]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[COPY34]](p1) :: (volatile store (s16) into `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD4]](s16), [[COPY35]](p1) :: (volatile store (s16) into `half addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY36:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY36]] + ; CHECK-NEXT: G_STORE [[TRUNC]](s1), [[COPY31]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[COPY32]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[COPY33]](p1) :: (volatile store (s16) into `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD4]](s16), [[COPY34]](p1) :: (volatile store (s16) into `half addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile i1 %arg1, i1 addrspace(1)* undef store volatile i8 %arg2, i8 addrspace(1)* undef @@ -2213,7 +2029,7 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1 define void @void_func_v32i32_p3_p5_i16(<32 x i32> %arg0, i8 addrspace(3)* %arg1, i8 addrspace(5)* %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_p3_p5_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2253,15 +2069,13 @@ define void @void_func_v32i32_p3_p5_i16(<32 x i32> %arg0, i8 addrspace(3)* %arg1 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (p3) from %fixed-stack.1, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (p5) from %fixed-stack.0, align 8, addrspace 5) - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](p3), [[COPY32]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD2]](p5), [[COPY33]](p1) :: (volatile store (p5) into `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY34]] + ; CHECK-NEXT: G_STORE [[LOAD1]](p3), [[COPY31]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD2]](p5), [[COPY32]](p1) :: (volatile store (p5) into `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile i8 addrspace(3)* %arg1, i8 addrspace(3)* addrspace(1)* undef store volatile i8 addrspace(5)* %arg2, i8 addrspace(5)* addrspace(1)* undef @@ -2271,7 +2085,7 @@ define void @void_func_v32i32_p3_p5_i16(<32 x i32> %arg0, i8 addrspace(3)* %arg1 define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v2i32_v2f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2317,15 +2131,13 @@ define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32) - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[COPY32]](p1) :: (volatile store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x s32>), [[COPY33]](p1) :: (volatile store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY34]] + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[COPY31]](p1) :: (volatile store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x s32>), [[COPY32]](p1) :: (volatile store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <2 x i32> %arg1, <2 x i32> addrspace(1)* undef store volatile <2 x float> %arg2, <2 x float> addrspace(1)* undef @@ -2335,7 +2147,7 @@ define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v2i16_v2f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2375,15 +2187,13 @@ define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, align 8, addrspace 5) - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](<2 x s16>), [[COPY32]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD2]](<2 x s16>), [[COPY33]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY34]] + ; CHECK-NEXT: G_STORE [[LOAD1]](<2 x s16>), [[COPY31]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD2]](<2 x s16>), [[COPY32]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <2 x i16> %arg1, <2 x i16> addrspace(1)* undef store volatile <2 x half> %arg2, <2 x half> addrspace(1)* undef @@ -2393,7 +2203,7 @@ define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v2i64_v2f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2451,15 +2261,13 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD5]](s32), [[LOAD6]](s32) ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD7]](s32), [[LOAD8]](s32) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[COPY32]](p1) :: (volatile store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[COPY33]](p1) :: (volatile store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY34]] + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[COPY31]](p1) :: (volatile store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[COPY32]](p1) :: (volatile store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <2 x i64> %arg1, <2 x i64> addrspace(1)* undef store volatile <2 x double> %arg2, <2 x double> addrspace(1)* undef @@ -2469,7 +2277,7 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v4i32_v4f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2523,15 +2331,13 @@ define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 ; CHECK-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32) - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[COPY32]](p1) :: (volatile store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[COPY33]](p1) :: (volatile store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY34]] + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[COPY31]](p1) :: (volatile store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[COPY32]](p1) :: (volatile store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <4 x i32> %arg1, <4 x i32> addrspace(1)* undef store volatile <4 x float> %arg2, <4 x float> addrspace(1)* undef @@ -2541,7 +2347,7 @@ define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v8i32_v8f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2611,15 +2417,13 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 ; CHECK-NEXT: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32), [[LOAD16]](s32) - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<8 x s32>), [[COPY32]](p1) :: (volatile store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<8 x s32>), [[COPY33]](p1) :: (volatile store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY34]] + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<8 x s32>), [[COPY31]](p1) :: (volatile store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<8 x s32>), [[COPY32]](p1) :: (volatile store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <8 x i32> %arg1, <8 x i32> addrspace(1)* undef store volatile <8 x float> %arg2, <8 x float> addrspace(1)* undef @@ -2629,7 +2433,7 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v16i32_v16f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2731,15 +2535,13 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, ; CHECK-NEXT: [[FRAME_INDEX32:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32), [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32), [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32), [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32), [[LOAD32]](s32) - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<16 x s32>), [[COPY32]](p1) :: (volatile store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<16 x s32>), [[COPY33]](p1) :: (volatile store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY34]] + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<16 x s32>), [[COPY31]](p1) :: (volatile store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<16 x s32>), [[COPY32]](p1) :: (volatile store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <16 x i32> %arg1, <16 x i32> addrspace(1)* undef store volatile <16 x float> %arg2, <16 x float> addrspace(1)* undef @@ -2750,28 +2552,26 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 { ; CHECK-LABEL: name: void_func_v3f32_wasted_reg ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32) ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32) ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32) ; CHECK-NEXT: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) ; CHECK-NEXT: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) ; CHECK-NEXT: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[COPY5]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY6]] + ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[COPY4]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: SI_RETURN %arg0.0 = extractelement <3 x float> %arg0, i32 0 %arg0.1 = extractelement <3 x float> %arg0, i32 1 %arg0.2 = extractelement <3 x float> %arg0, i32 2 @@ -2785,14 +2585,13 @@ define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 { define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 { ; CHECK-LABEL: name: void_func_v3i32_wasted_reg ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 @@ -2804,8 +2603,7 @@ define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 { ; CHECK-NEXT: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) ; CHECK-NEXT: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]] + ; CHECK-NEXT: SI_RETURN %arg0.0 = extractelement <3 x i32> %arg0, i32 0 %arg0.1 = extractelement <3 x i32> %arg0, i32 1 %arg0.2 = extractelement <3 x i32> %arg0, i32 2 @@ -2820,7 +2618,7 @@ define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 { define void @void_func_v16i8(<16 x i8> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -2856,11 +2654,9 @@ define void @void_func_v16i8(<16 x i8> %arg0) #0 { ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16), [[TRUNC12]](s16), [[TRUNC13]](s16), [[TRUNC14]](s16), [[TRUNC15]](s16) ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s16>) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC16]](<16 x s8>), [[DEF]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY17]] + ; CHECK-NEXT: SI_RETURN store volatile <16 x i8> %arg0, <16 x i8> addrspace(1)* undef ret void } @@ -2869,7 +2665,7 @@ define void @void_func_v16i8(<16 x i8> %arg0) #0 { define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; CHECK-LABEL: name: void_func_v32i32_v16i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2939,13 +2735,11 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; CHECK-NEXT: [[LOAD16:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s16) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[LOAD1]](s16), [[LOAD2]](s16), [[LOAD3]](s16), [[LOAD4]](s16), [[LOAD5]](s16), [[LOAD6]](s16), [[LOAD7]](s16), [[LOAD8]](s16), [[LOAD9]](s16), [[LOAD10]](s16), [[LOAD11]](s16), [[LOAD12]](s16), [[LOAD13]](s16), [[LOAD14]](s16), [[LOAD15]](s16), [[LOAD16]](s16) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<16 x s16>) - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[TRUNC]](<16 x s8>), [[COPY32]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY33]] + ; CHECK-NEXT: G_STORE [[TRUNC]](<16 x s8>), [[COPY31]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: SI_RETURN store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <16 x i8> %arg1, <16 x i8> addrspace(1)* undef ret void @@ -2954,7 +2748,7 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { define void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addrspace(1)*} %arg0, i8 %pad, {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1) { ; CHECK-LABEL: name: pointer_in_struct_argument ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2967,18 +2761,16 @@ define void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addrspace(1)*} %ar ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1234) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p1) = COPY [[C]](p1) ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p1) = COPY [[C]](p1) ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p1) = COPY [[C]](p1) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p1) = COPY [[C]](p1) ; CHECK-NEXT: G_STORE [[COPY]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: G_STORE [[MV]](p1), [[COPY8]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[COPY9]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](p1), [[COPY7]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[COPY8]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) ; CHECK-NEXT: G_STORE [[COPY4]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: G_STORE [[MV1]](p1234), [[COPY10]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY7]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY11]] + ; CHECK-NEXT: G_STORE [[MV1]](p1234), [[COPY9]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: SI_RETURN %val0 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 0 %val1 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 1 %val2 = extractvalue {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1, 0 @@ -2994,7 +2786,7 @@ define void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addrspace(1)*} %ar define void @vector_ptr_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg) { ; CHECK-LABEL: name: vector_ptr_in_struct_arg ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -3006,14 +2798,12 @@ define void @vector_ptr_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i8 addrspac ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p3) = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p3) = COPY $vgpr5 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY4]](p3), [[COPY5]](p3) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x p3>), [[PTR_ADD]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY7]] + ; CHECK-NEXT: SI_RETURN store { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg, { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll index c3cc9ce6e54c..2e660884e7db 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll @@ -5,7 +5,7 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64(<2 x i32 addrspace(1)*> %ptr, <2 x i64> %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_v2i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -21,19 +21,17 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64(<2 x i32 addrspace(1 ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY9]](<2 x p1>) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY8]](<2 x p1>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, <2 x i64> %idx ret <2 x i32 addrspace(1)*> %gep } @@ -42,7 +40,7 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64(<2 x i32 addrspace(1 define <2 x i32 addrspace(3)*> @vector_gep_v2p3_index_v2i32(<2 x i32 addrspace(3)*> %ptr, <2 x i32> %idx) { ; CHECK-LABEL: name: vector_gep_v2p3_index_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1 @@ -50,17 +48,15 @@ define <2 x i32 addrspace(3)*> @vector_gep_v2p3_index_v2i32(<2 x i32 addrspace(3 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s32>) = G_MUL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p3>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s32>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x p3>) = COPY [[PTR_ADD]](<2 x p3>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY5]](<2 x p3>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x p3>) = COPY [[PTR_ADD]](<2 x p3>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY4]](<2 x p3>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY6]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %gep = getelementptr i32, <2 x i32 addrspace(3)*> %ptr, <2 x i32> %idx ret <2 x i32 addrspace(3)*> %gep } @@ -69,7 +65,7 @@ define <2 x i32 addrspace(3)*> @vector_gep_v2p3_index_v2i32(<2 x i32 addrspace(3 define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i32(<2 x i32 addrspace(1)*> %ptr, <2 x i32> %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -81,20 +77,18 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i32(<2 x i32 addrspace(1 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(<2 x s64>) = G_SEXT [[BUILD_VECTOR1]](<2 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[SEXT]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY7]](<2 x p1>) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY6]](<2 x p1>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY8]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, <2 x i32> %idx ret <2 x i32 addrspace(1)*> %gep } @@ -103,7 +97,7 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i32(<2 x i32 addrspace(1 define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i64(<2 x i32 addrspace(1)*> %ptr, i64 %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -115,21 +109,19 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i64(<2 x i32 addrspace(1)* ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV2]](s64) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY [[BUILD_VECTOR1]](<2 x s64>) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY [[BUILD_VECTOR1]](<2 x s64>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[COPY7]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[COPY6]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY8]](<2 x p1>) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY7]](<2 x p1>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY9]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, i64 %idx ret <2 x i32 addrspace(1)*> %gep } @@ -138,7 +130,7 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i64(<2 x i32 addrspace(1)* define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i32(<2 x i32 addrspace(1)*> %ptr, i32 %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -148,21 +140,19 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i32(<2 x i32 addrspace(1)* ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY4]](s32) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(<2 x s64>) = G_SEXT [[BUILD_VECTOR1]](<2 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[SEXT]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY6]](<2 x p1>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY5]](<2 x p1>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, i32 %idx ret <2 x i32 addrspace(1)*> %gep } @@ -171,7 +161,7 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i32(<2 x i32 addrspace(1)* define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64_constant(<2 x i32 addrspace(1)*> %ptr, <2 x i64> %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_v2i64_constant ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -187,7 +177,6 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64_constant(<2 x i32 ad ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64) @@ -196,14 +185,13 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64_constant(<2 x i32 ad ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C3]](s64) ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR4]](<2 x s64>) - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY9]](<2 x p1>) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY8]](<2 x p1>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, <2 x i64> ret <2 x i32 addrspace(1)*> %gep } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll index cc2c95324c2e..f0383259abb7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll @@ -57,19 +57,17 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(void()* %fptr) { define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(void()* %fptr) { ; CHECK-LABEL: name: test_gfx_indirect_call_sgpr_ptr ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[MV]](p0), 0, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY4]] + ; CHECK-NEXT: SI_RETURN call amdgpu_gfx void %fptr() ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll index d31e73e91b60..b5d9b33122ba 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -51,16 +51,12 @@ define amdgpu_kernel void @asm_simple_agpr_clobber() { define i32 @asm_vgpr_early_clobber() { ; CHECK-LABEL: name: asm_vgpr_early_clobber ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %1, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %2, !0 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %0, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %1, !0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 call { i32, i32 } asm sideeffect "v_mov_b32 $0, 7; v_mov_b32 $1, 7", "=&v,=&v"(), !srcloc !0 %asmresult = extractvalue { i32, i32 } %1, 0 %asmresult1 = extractvalue { i32, i32 } %1, 1 @@ -71,14 +67,10 @@ define i32 @asm_vgpr_early_clobber() { define i32 @test_specific_vgpr_output() nounwind { ; CHECK-LABEL: name: test_specific_vgpr_output ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: INLINEASM &"v_mov_b32 v1, 7", 0 /* attdialect */, 10 /* regdef */, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call i32 asm "v_mov_b32 v1, 7", "={v1}"() nounwind ret i32 %0 @@ -87,14 +79,10 @@ entry: define i32 @test_single_vgpr_output() nounwind { ; CHECK-LABEL: name: test_single_vgpr_output ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call i32 asm "v_mov_b32 $0, 7", "=v"() nounwind ret i32 %0 @@ -103,14 +91,10 @@ entry: define i32 @test_single_sgpr_output_s32() nounwind { ; CHECK-LABEL: name: test_single_sgpr_output_s32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind ret i32 %0 @@ -120,16 +104,12 @@ entry: define float @test_multiple_register_outputs_same() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_same ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 1835018 /* regdef:VGPR_32 */, def %2 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0, 1835018 /* regdef:VGPR_32 */, def %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 - ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY1]], [[COPY2]] + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[FADD]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %1 = call { float, float } asm "v_mov_b32 $0, 0; v_mov_b32 $1, 1", "=v,=v"() %asmresult = extractvalue { float, float } %1, 0 %asmresult1 = extractvalue { float, float } %1, 1 @@ -141,17 +121,13 @@ define float @test_multiple_register_outputs_same() #0 { define double @test_multiple_register_outputs_mixed() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_mixed ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 2949130 /* regdef:VReg_64 */, def %2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY %2 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](s64) + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0, 2949130 /* regdef:VReg_64 */, def %1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %1 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %1 = call { float, double } asm "v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", "=v,=v"() %asmresult = extractvalue { float, double } %1, 1 ret double %asmresult @@ -161,16 +137,12 @@ define double @test_multiple_register_outputs_mixed() #0 { define float @test_vector_output() nounwind { ; CHECK-LABEL: name: test_vector_output ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: INLINEASM &"v_add_f64 $0, 0, 0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr14_vgpr15 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr14_vgpr15 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<2 x s32>), [[C]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr14_vgpr15 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %1 = tail call <2 x float> asm sideeffect "v_add_f64 $0, 0, 0", "={v[14:15]}"() nounwind %2 = extractelement <2 x float> %1, i32 0 ret float %2 @@ -212,16 +184,14 @@ define amdgpu_kernel void @test_input_imm() { define float @test_input_vgpr(i32 %src) nounwind { ; CHECK-LABEL: name: test_input_vgpr ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %2, 1835017 /* reguse:VGPR_32 */, [[COPY2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %2 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) + ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 1835017 /* reguse:VGPR_32 */, [[COPY1]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %1 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %0 = tail call float asm "v_add_f32 $0, 1.0, $1", "=v,v"(i32 %src) nounwind ret float %0 @@ -230,15 +200,13 @@ entry: define i32 @test_memory_constraint(i32 addrspace(3)* %a) nounwind { ; CHECK-LABEL: name: test_memory_constraint ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1835018 /* regdef:VGPR_32 */, def %2, 196622 /* mem:m */, [[COPY]](p3) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 196622 /* mem:m */, [[COPY]](p3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %1 = tail call i32 asm "ds_read_b32 $0, $1", "=v,*m"(i32 addrspace(3)* %a) ret i32 %1 } @@ -246,18 +214,16 @@ define i32 @test_memory_constraint(i32 addrspace(3)* %a) nounwind { define i32 @test_vgpr_matching_constraint(i32 %a) nounwind { ; CHECK-LABEL: name: test_vgpr_matching_constraint ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32) - ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %4 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32) + ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %3, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %3 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %and = and i32 %a, 1 %asm = call i32 asm sideeffect ";", "=v,0"(i32 %and) ret i32 %asm @@ -266,20 +232,16 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind { define i32 @test_sgpr_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_matching_constraint ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %3 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY2]](s32) - ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %5, 1966089 /* reguse:SReg_32 */, [[COPY3]], 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY %5 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY5]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY6]], implicit $vgpr0 + ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %4, 1966089 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %4 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY4]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind %asm1 = tail call i32 asm "s_mov_b32 $0, 8", "=s"() nounwind @@ -290,25 +252,23 @@ entry: define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: name: test_many_matching_constraints ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %4, 1835018 /* regdef:VGPR_32 */, def %5, 1835018 /* regdef:VGPR_32 */, def %6, 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY5]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY6]](tied-def 5) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) + ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %3, 1835018 /* regdef:VGPR_32 */, def %4, 1835018 /* regdef:VGPR_32 */, def %5, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY %3 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %4 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %5 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY %6 + ; CHECK-NEXT: G_STORE [[COPY6]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: G_STORE [[COPY7]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: G_STORE [[COPY8]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[COPY9]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY10]] + ; CHECK-NEXT: SI_RETURN %asm = call {i32, i32, i32} asm sideeffect "; ", "=v,=v,=v,0,2,1"(i32 %c, i32 %a, i32 %b) %asmresult0 = extractvalue {i32, i32, i32} %asm, 0 store i32 %asmresult0, i32 addrspace(1)* undef @@ -322,17 +282,13 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind { define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_to_vgpr_move_matching_constraint ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr30_sgpr31 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %3, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 entry: %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind %asm1 = tail call i32 asm "v_mov_b32 $0, $1", "=v,0"(i32 %asm0) nounwind diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll index ec903d346288..8e90bd593a10 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll @@ -6,18 +6,16 @@ define void @test_memcpy_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memcpy_p1_p3_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p1i8.p3i8.i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i64 256, i1 false) ret void } @@ -25,17 +23,15 @@ define void @test_memcpy_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) define void @test_memcpy_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memcpy_p1_p3_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 256, i1 false) ret void } @@ -43,18 +39,16 @@ define void @test_memcpy_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) define void @test_memcpy_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memcpy_p1_p3_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p1i8.p3i8.i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i16 256, i1 false) ret void } @@ -62,18 +56,16 @@ define void @test_memcpy_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) define void @test_memcpy_p3_p1_i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) { ; CHECK-LABEL: name: test_memcpy_p3_p1_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i64 256, i1 false) ret void } @@ -81,17 +73,15 @@ define void @test_memcpy_p3_p1_i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) define void @test_memcpy_p3_p1_i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) { ; CHECK-LABEL: name: test_memcpy_p3_p1_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 256, i1 false) ret void } @@ -99,18 +89,16 @@ define void @test_memcpy_p3_p1_i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) define void @test_memcpy_p3_p1_i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) { ; CHECK-LABEL: name: test_memcpy_p3_p1_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: SI_RETURN call void @llvm.memcpy.p3i8.p1i8.i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i16 256, i1 false) ret void } @@ -118,18 +106,16 @@ define void @test_memcpy_p3_p1_i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) define void @test_memmove_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memmove_p1_p3_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: SI_RETURN call void @llvm.memmove.p1i8.p3i8.i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i64 256, i1 false) ret void } @@ -137,17 +123,15 @@ define void @test_memmove_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src define void @test_memmove_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memmove_p1_p3_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: SI_RETURN call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 256, i1 false) ret void } @@ -155,18 +139,16 @@ define void @test_memmove_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src define void @test_memmove_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memmove_p1_p3_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: SI_RETURN call void @llvm.memmove.p1i8.p3i8.i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i16 256, i1 false) ret void } @@ -174,18 +156,16 @@ define void @test_memmove_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src define void @test_memset_p1_i64(i8 addrspace(1)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p1_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 256, i1 false) ret void } @@ -193,19 +173,17 @@ define void @test_memset_p1_i64(i8 addrspace(1)* %dst, i8 %val) { define void @test_memset_p1_i32(i8 addrspace(1)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p1_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p1i8.i32(i8 addrspace(1)* %dst, i8 %val, i32 256, i1 false) ret void } @@ -213,19 +191,17 @@ define void @test_memset_p1_i32(i8 addrspace(1)* %dst, i8 %val) { define void @test_memset_p1_i16(i8 addrspace(1)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p1_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s16) ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] + ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p1i8.i16(i8 addrspace(1)* %dst, i8 %val, i16 256, i1 false) ret void } @@ -233,17 +209,15 @@ define void @test_memset_p1_i16(i8 addrspace(1)* %dst, i8 %val) { define void @test_memset_p3_i64(i8 addrspace(3)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p3_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[TRUNC1]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %dst, i8 %val, i64 256, i1 false) ret void } @@ -251,16 +225,14 @@ define void @test_memset_p3_i64(i8 addrspace(3)* %dst, i8 %val) { define void @test_memset_p3_i32(i8 addrspace(3)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p3_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p3i8.i32(i8 addrspace(3)* %dst, i8 %val, i32 256, i1 false) ret void } @@ -268,17 +240,15 @@ define void @test_memset_p3_i32(i8 addrspace(3)* %dst, i8 %val) { define void @test_memset_p3_i16(i8 addrspace(3)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p3_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: SI_RETURN call void @llvm.memset.p3i8.i16(i8 addrspace(3)* %dst, i8 %val, i16 256, i1 false) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll index 99a2875e8b16..a1a72c07cb63 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll @@ -5,15 +5,12 @@ define i32 @reloc_constant() { ; CHECK-LABEL: name: reloc_constant ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[INT0:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), !0 ; We cannot have any specific metadata check here as ConstantAsMetadata is printed as ; CHECK: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), <0x{{[0-9a-f]+}}> ; CHECK: [[SUM:%[0-9]+]]:_(s32) = G_ADD [[INT0]], [[INT1]] ; CHECK: $vgpr0 = COPY [[SUM]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK: SI_RETURN implicit $vgpr0 %val0 = call i32 @llvm.amdgcn.reloc.constant(metadata !0) %val1 = call i32 @llvm.amdgcn.reloc.constant(metadata i32 4) %res = add i32 %val0, %val1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll index a88c4224a2fa..af0daceac6c6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll @@ -4,7 +4,7 @@ define i8* @ptrmask_flat_i64(i8* %ptr, i64 %mask) { ; CHECK-LABEL: name: ptrmask_flat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -12,13 +12,11 @@ define i8* @ptrmask_flat_i64(i8* %ptr, i64 %mask) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[MV1]](s64) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %ptr, i64 %mask) ret i8* %masked } @@ -26,19 +24,17 @@ define i8* @ptrmask_flat_i64(i8* %ptr, i64 %mask) { define i8* @ptrmask_flat_i32(i8* %ptr, i32 %mask) { ; CHECK-LABEL: name: ptrmask_flat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[COPY2]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %masked = call i8* @llvm.ptrmask.p0i8.i32(i8* %ptr, i32 %mask) ret i8* %masked } @@ -46,20 +42,18 @@ define i8* @ptrmask_flat_i32(i8* %ptr, i32 %mask) { define i8* @ptrmask_flat_i16(i8* %ptr, i16 %mask) { ; CHECK-LABEL: name: ptrmask_flat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s16) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %masked = call i8* @llvm.ptrmask.p0i8.i16(i8* %ptr, i16 %mask) ret i8* %masked } @@ -67,20 +61,18 @@ define i8* @ptrmask_flat_i16(i8* %ptr, i16 %mask) { define i8* @ptrmask_flat_i1(i8* %ptr, i1 %mask) { ; CHECK-LABEL: name: ptrmask_flat_i1 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %masked = call i8* @llvm.ptrmask.p0i8.i1(i8* %ptr, i1 %mask) ret i8* %masked } @@ -88,17 +80,15 @@ define i8* @ptrmask_flat_i1(i8* %ptr, i1 %mask) { define i8 addrspace(3)* @ptrmask_local_i64(i8 addrspace(3)* %ptr, i64 %mask) { ; CHECK-LABEL: name: ptrmask_local_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[MV]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)* %ptr, i64 %mask) ret i8 addrspace(3)* %masked } @@ -106,15 +96,13 @@ define i8 addrspace(3)* @ptrmask_local_i64(i8 addrspace(3)* %ptr, i64 %mask) { define i8 addrspace(3)* @ptrmask_local_i32(i8 addrspace(3)* %ptr, i32 %mask) { ; CHECK-LABEL: name: ptrmask_local_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[COPY1]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* %ptr, i32 %mask) ret i8 addrspace(3)* %masked } @@ -122,16 +110,14 @@ define i8 addrspace(3)* @ptrmask_local_i32(i8 addrspace(3)* %ptr, i32 %mask) { define i8 addrspace(3)* @ptrmask_local_i16(i8 addrspace(3)* %ptr, i16 %mask) { ; CHECK-LABEL: name: ptrmask_local_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)* %ptr, i16 %mask) ret i8 addrspace(3)* %masked } @@ -139,16 +125,14 @@ define i8 addrspace(3)* @ptrmask_local_i16(i8 addrspace(3)* %ptr, i16 %mask) { define i8 addrspace(3)* @ptrmask_local_i1(i8 addrspace(3)* %ptr, i1 %mask) { ; CHECK-LABEL: name: ptrmask_local_i1 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s1) ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i1(i8 addrspace(3)* %ptr, i1 %mask) ret i8 addrspace(3)* %masked } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll index 0db076c6b0fc..4faea44d1582 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll @@ -4,18 +4,16 @@ define i16 @uaddsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: uaddsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.uadd.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -24,15 +22,13 @@ declare i16 @llvm.uadd.sat.i16(i16, i16) define i32 @uaddsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: uaddsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[UADDSAT]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.uadd.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -41,7 +37,7 @@ declare i32 @llvm.uadd.sat.i32(i32, i32) define i64 @uaddsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: uaddsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -49,13 +45,11 @@ define i64 @uaddsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[MV]], [[MV1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.uadd.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -64,7 +58,7 @@ declare i64 @llvm.uadd.sat.i64(i64, i64) define <2 x i32> @uaddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: uaddsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -72,13 +66,11 @@ define <2 x i32> @uaddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s32>) = G_UADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } @@ -87,18 +79,16 @@ declare <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32>, <2 x i32>) define i16 @saddsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: saddsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.sadd.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -107,15 +97,13 @@ declare i16 @llvm.sadd.sat.i16(i16, i16) define i32 @saddsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: saddsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[SADDSAT]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.sadd.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -124,7 +112,7 @@ declare i32 @llvm.sadd.sat.i32(i32, i32) define i64 @saddsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: saddsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -132,13 +120,11 @@ define i64 @saddsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[MV]], [[MV1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.sadd.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -147,7 +133,7 @@ declare i64 @llvm.sadd.sat.i64(i64, i64) define <2 x i32> @saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: saddsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -155,13 +141,11 @@ define <2 x i32> @saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s32>) = G_SADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } @@ -170,18 +154,16 @@ declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>) define i16 @usubsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: usubsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.usub.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -190,15 +172,13 @@ declare i16 @llvm.usub.sat.i16(i16, i16) define i32 @usubsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: usubsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[USUBSAT]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.usub.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -207,7 +187,7 @@ declare i32 @llvm.usub.sat.i32(i32, i32) define i64 @usubsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: usubsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -215,13 +195,11 @@ define i64 @usubsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[MV]], [[MV1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.usub.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -230,7 +208,7 @@ declare i64 @llvm.usub.sat.i64(i64, i64) define <2 x i32> @usubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: usubsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -238,13 +216,11 @@ define <2 x i32> @usubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s32>) = G_USUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } @@ -253,18 +229,16 @@ declare <2 x i32> @llvm.usub.sat.v2i32(<2 x i32>, <2 x i32>) define i16 @ssubsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: ssubsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -273,15 +247,13 @@ declare i16 @llvm.ssub.sat.i16(i16, i16) define i32 @ssubsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: ssubsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[SSUBSAT]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -290,7 +262,7 @@ declare i32 @llvm.ssub.sat.i32(i32, i32) define i64 @ssubsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: ssubsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -298,13 +270,11 @@ define i64 @ssubsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[MV]], [[MV1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.ssub.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -313,7 +283,7 @@ declare i64 @llvm.ssub.sat.i64(i64, i64) define <2 x i32> @ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: ssubsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -321,13 +291,11 @@ define <2 x i32> @ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s32>) = G_SSUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } @@ -336,18 +304,16 @@ declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) define i16 @ushlsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: ushlsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s16) = G_USHLSAT [[TRUNC]], [[TRUNC1]](s16) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USHLSAT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.ushl.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -356,15 +322,13 @@ declare i16 @llvm.ushl.sat.i16(i16, i16) define i32 @ushlsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: ushlsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[COPY1]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[USHLSAT]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.ushl.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -373,7 +337,7 @@ declare i32 @llvm.ushl.sat.i32(i32, i32) define i64 @ushlsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: ushlsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -381,13 +345,11 @@ define i64 @ushlsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s64) = G_USHLSAT [[MV]], [[MV1]](s64) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.ushl.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -396,7 +358,7 @@ declare i64 @llvm.ushl.sat.i64(i64, i64) define <2 x i32> @ushlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: ushlsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -404,13 +366,11 @@ define <2 x i32> @ushlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(<2 x s32>) = G_USHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.ushl.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } @@ -419,18 +379,16 @@ declare <2 x i32> @llvm.ushl.sat.v2i32(<2 x i32>, <2 x i32>) define i16 @sshlsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: sshlsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s16) = G_SSHLSAT [[TRUNC]], [[TRUNC1]](s16) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSHLSAT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i16 @llvm.sshl.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -439,15 +397,13 @@ declare i16 @llvm.sshl.sat.i16(i16, i16) define i32 @sshlsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: sshlsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[COPY1]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[SSHLSAT]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0 %res = call i32 @llvm.sshl.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -456,7 +412,7 @@ declare i32 @llvm.sshl.sat.i32(i32, i32) define i64 @sshlsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: sshlsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -464,13 +420,11 @@ define i64 @sshlsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s64) = G_SSHLSAT [[MV]], [[MV1]](s64) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.sshl.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -479,7 +433,7 @@ declare i64 @llvm.sshl.sat.i64(i64, i64) define <2 x i32> @sshlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: sshlsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -487,13 +441,11 @@ define <2 x i32> @sshlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(<2 x s32>) = G_SSHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.sshl.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll index d6da4fca2198..34925828c2d1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -5,15 +5,13 @@ define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 { ; GCN-LABEL: name: i32_fastcc_i32_i32 ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GCN-NEXT: SI_RETURN implicit $vgpr0 %add0 = add i32 %arg0, %arg1 ret i32 %add0 } @@ -21,11 +19,10 @@ define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 { define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 { ; GCN-LABEL: name: i32_fastcc_i32_i32_stack_object ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -33,8 +30,7 @@ define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 { ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GCN-NEXT: SI_RETURN implicit $vgpr0 %alloca = alloca [16 x i32], align 4, addrspace(5) %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5 store volatile i32 9, i32 addrspace(5)* %gep @@ -45,17 +41,16 @@ define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 { define hidden fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) @@ -65,12 +60,11 @@ entry: define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_stack_object ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -79,8 +73,8 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -93,12 +87,11 @@ entry: define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_callee_stack_object ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -107,8 +100,8 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_stack_object ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_stack_object, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -121,17 +114,16 @@ entry: define fastcc void @sibling_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_unused_result ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) @@ -171,17 +163,15 @@ entry: define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32 addrspace(5)* byval(i32) align 4 %arg1) #1 { ; GCN-LABEL: name: i32_fastcc_i32_byval_i32 ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s32) from %ir.arg1, addrspace 5) ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[LOAD]] ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GCN-NEXT: SI_RETURN implicit $vgpr0 %arg1.load = load i32, i32 addrspace(5)* %arg1, align 4 %add0 = add i32 %arg0, %arg1.load ret i32 %add0 @@ -191,29 +181,27 @@ define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32 addrspace(5)* define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, i32 addrspace(5)* byval(i32) %b.byval, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_byval_i32_byval_parent ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(p5) = COPY $sgpr32 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY4]], [[C]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GCN-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY1]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into stack, addrspace 5), (dereferenceable load (s32) from %ir.b.byval, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>) + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_byval_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc - ; GCN-NEXT: $vgpr0 = COPY [[COPY6]](s32) - ; GCN-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0 + ; GCN-NEXT: $vgpr0 = COPY [[COPY5]](s32) + ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) %b.byval) ret i32 %ret @@ -225,7 +213,7 @@ entry: define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %large) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_byval_i32 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -262,7 +250,6 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %lar ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GCN-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[C]](s32) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32 @@ -270,8 +257,8 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %lar ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load (s32) from `i32 addrspace(5)* inttoptr (i32 16 to i32 addrspace(5)*)`, align 16, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_byval_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) inttoptr (i32 16 to i32 addrspace(5)*)) @@ -281,7 +268,7 @@ entry: define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %large) #1 { ; GCN-LABEL: name: i32_fastcc_i32_i32_a32i32 ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -320,13 +307,11 @@ define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %l ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GCN-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[LOAD1]] ; GCN-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[LOAD2]] ; GCN-NEXT: $vgpr0 = COPY [[ADD2]](s32) - ; GCN-NEXT: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY32]], implicit $vgpr0 + ; GCN-NEXT: SI_RETURN implicit $vgpr0 %val_firststack = extractvalue [32 x i32] %large, 30 %val_laststack = extractvalue [32 x i32] %large, 31 %add0 = add i32 %arg0, %arg1 @@ -338,7 +323,7 @@ define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %l define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_a32i32 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -377,7 +362,6 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 ; GCN-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN-NEXT: G_STORE [[LOAD]](s32), [[FRAME_INDEX3]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5) @@ -416,8 +400,8 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32) ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32) ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32) - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) @@ -427,7 +411,7 @@ entry: define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i32 %b, [32 x i32] %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_a32i32_stack_object ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -466,7 +450,6 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -510,8 +493,8 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32) ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32) ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32) - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -527,23 +510,22 @@ entry: define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 { ; GCN-LABEL: name: no_sibling_call_callee_more_stack_space ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(p5) = COPY $sgpr32 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY2]], [[C1]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C2]](s32) + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY2]], [[C2]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C3]](s32) + ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY2]], [[C3]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) @@ -576,14 +558,13 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 { ; GCN-NEXT: $vgpr28 = COPY [[C]](s32) ; GCN-NEXT: $vgpr29 = COPY [[C]](s32) ; GCN-NEXT: $vgpr30 = COPY [[C]](s32) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32_a32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc - ; GCN-NEXT: $vgpr0 = COPY [[COPY5]](s32) - ; GCN-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY6]], implicit $vgpr0 + ; GCN-NEXT: $vgpr0 = COPY [[COPY4]](s32) + ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] zeroinitializer) ret i32 %ret @@ -593,27 +574,26 @@ entry: define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_other_call ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[GV1:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @sibling_call_i32_fastcc_i32_i32 ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[COPY5]](s32) - ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>) + ; GCN-NEXT: $vgpr2 = COPY [[COPY4]](s32) + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV1]](p0), @sibling_call_i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %other.call = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) @@ -626,7 +606,7 @@ entry: define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 { ; GCN-LABEL: name: sibling_call_stack_objecti32_fastcc_i32_i32_a32i32 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -665,7 +645,6 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i3 ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -709,8 +688,8 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i3 ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32) ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32) ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32) - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -723,7 +702,7 @@ entry: define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area(i32 %a, i32 %b, [36 x i32] %c) #1 { ; GCN-LABEL: name: sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -770,7 +749,6 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg ; GCN-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca @@ -815,8 +793,8 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg ; GCN-NEXT: $vgpr28 = COPY [[C1]](s32) ; GCN-NEXT: $vgpr29 = COPY [[C1]](s32) ; GCN-NEXT: $vgpr30 = COPY [[C1]](s32) - ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -831,7 +809,7 @@ declare hidden void @void_fastcc_multi_byval(i32 %a, [3 x i32] addrspace(5)* byv define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 { ; GCN-LABEL: name: sibling_call_fastcc_multi_byval ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -940,7 +918,6 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 { ; GCN-NEXT: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX33:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN-NEXT: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX33]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) - ; GCN-NEXT: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GCN-NEXT: [[FRAME_INDEX34:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca0 @@ -956,14 +933,14 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 { ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX35]], [[C3]](s32) ; GCN-NEXT: G_STORE [[C1]](s64), [[PTR_ADD2]](p5) :: (store (s64) into %ir.alloca1 + 8, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_multi_byval - ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX36]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.1, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca0, align 16, addrspace 5) @@ -971,16 +948,16 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 { ; GCN-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX37]](p5), [[FRAME_INDEX35]](p5), [[C5]](s32), 0 :: (dereferenceable store (s128) into %fixed-stack.0, addrspace 5), (dereferenceable load (s128) from %ir.alloca1, align 8, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[COPY8]](s32) - ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY40]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY41]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY42]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY43]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY44]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY45]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY46]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY47]](s32) + ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY47]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY39]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY40]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY41]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY42]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY43]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY44]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY45]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY46]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @void_fastcc_multi_byval, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %alloca0 = alloca [3 x i32], align 16, addrspace(5) @@ -997,7 +974,7 @@ declare hidden void @void_fastcc_byval_and_stack_passed([3 x i32] addrspace(5)* define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 { ; GCN-LABEL: name: sibling_call_byval_and_stack_passed ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1106,7 +1083,6 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 ; GCN-NEXT: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX33:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN-NEXT: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX33]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5) - ; GCN-NEXT: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[FRAME_INDEX34:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca @@ -1118,14 +1094,14 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C3]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into %ir.alloca + 8, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_byval_and_stack_passed - ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[FRAME_INDEX35:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX35]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.2, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca, align 16, addrspace 5) @@ -1164,16 +1140,16 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 ; GCN-NEXT: $vgpr28 = COPY [[C1]](s32) ; GCN-NEXT: $vgpr29 = COPY [[C1]](s32) ; GCN-NEXT: $vgpr30 = COPY [[C1]](s32) - ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY40]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY41]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY42]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY43]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY44]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY45]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY46]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY47]](s32) + ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY47]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY39]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY40]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY41]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY42]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY43]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY44]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY45]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY46]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @void_fastcc_byval_and_stack_passed, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %alloca = alloca [3 x i32], align 16, addrspace(5) @@ -1187,7 +1163,7 @@ declare hidden fastcc i64 @i64_fastcc_i64(i64 %arg0) define hidden fastcc i64 @sibling_call_i64_fastcc_i64(i64 %a) #1 { ; GCN-LABEL: name: sibling_call_i64_fastcc_i64 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1200,29 +1176,28 @@ define hidden fastcc i64 @sibling_call_i64_fastcc_i64(i64 %a) #1 { ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i64_fastcc_i64 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i64_fastcc_i64, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc i64 @i64_fastcc_i64(i64 %a) @@ -1234,7 +1209,7 @@ declare hidden fastcc i8 addrspace(1)* @p1i8_fastcc_p1i8(i8 addrspace(1)* %arg0) define hidden fastcc i8 addrspace(1)* @sibling_call_p1i8_fastcc_p1i8(i8 addrspace(1)* %a) #1 { ; GCN-LABEL: name: sibling_call_p1i8_fastcc_p1i8 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1247,29 +1222,28 @@ define hidden fastcc i8 addrspace(1)* @sibling_call_p1i8_fastcc_p1i8(i8 addrspac ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @p1i8_fastcc_p1i8 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @p1i8_fastcc_p1i8, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc i8 addrspace(1)* @p1i8_fastcc_p1i8(i8 addrspace(1)* %a) @@ -1281,7 +1255,7 @@ declare hidden fastcc i16 @i16_fastcc_i16(i16 %arg0) define hidden fastcc i16 @sibling_call_i16_fastcc_i16(i16 %a) #1 { ; GCN-LABEL: name: sibling_call_i16_fastcc_i16 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1293,28 +1267,27 @@ define hidden fastcc i16 @sibling_call_i16_fastcc_i16(i16 %a) #1 { ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) - ; GCN-NEXT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i16_fastcc_i16 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) ; GCN-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY16]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i16_fastcc_i16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc i16 @i16_fastcc_i16(i16 %a) @@ -1326,7 +1299,7 @@ declare hidden fastcc half @f16_fastcc_f16(half %arg0) define hidden fastcc half @sibling_call_f16_fastcc_f16(half %a) #1 { ; GCN-LABEL: name: sibling_call_f16_fastcc_f16 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1338,28 +1311,27 @@ define hidden fastcc half @sibling_call_f16_fastcc_f16(half %a) #1 { ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) - ; GCN-NEXT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @f16_fastcc_f16 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) ; GCN-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY16]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @f16_fastcc_f16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc half @f16_fastcc_f16(half %a) @@ -1371,7 +1343,7 @@ declare hidden fastcc <3 x i16> @v3i16_fastcc_v3i16(<3 x i16> %arg0) define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1 { ; GCN-LABEL: name: sibling_call_v3i16_fastcc_v3i16 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1386,31 +1358,30 @@ define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[DEF]](<2 x s16>) ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v3i16_fastcc_v3i16 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GCN-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<3 x s16>), [[DEF1]](<3 x s16>) ; GCN-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) ; GCN-NEXT: $vgpr0 = COPY [[UV2]](<2 x s16>) ; GCN-NEXT: $vgpr1 = COPY [[UV3]](<2 x s16>) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v3i16_fastcc_v3i16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc <3 x i16> @v3i16_fastcc_v3i16(<3 x i16> %a) @@ -1422,7 +1393,7 @@ declare hidden fastcc <4 x i16> @v4i16_fastcc_v4i16(<4 x i16> %arg0) define hidden fastcc <4 x i16> @sibling_call_v4i16_fastcc_v4i16(<4 x i16> %a) #1 { ; GCN-LABEL: name: sibling_call_v4i16_fastcc_v4i16 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1435,29 +1406,28 @@ define hidden fastcc <4 x i16> @sibling_call_v4i16_fastcc_v4i16(<4 x i16> %a) #1 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>) - ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v4i16_fastcc_v4i16 - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) ; GCN-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v4i16_fastcc_v4i16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc <4 x i16> @v4i16_fastcc_v4i16(<4 x i16> %a) @@ -1469,7 +1439,7 @@ declare hidden fastcc <2 x i64> @v2i64_fastcc_v2i64(<2 x i64> %arg0) define hidden fastcc <2 x i64> @sibling_call_v2i64_fastcc_v2i64(<2 x i64> %a) #1 { ; GCN-LABEL: name: sibling_call_v2i64_fastcc_v2i64 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1486,31 +1456,30 @@ define hidden fastcc <2 x i64> @sibling_call_v2i64_fastcc_v2i64(<2 x i64> %a) #1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GCN-NEXT: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v2i64_fastcc_v2i64 - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GCN-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY14]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY15]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY16]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY17]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY18]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY19]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY20]](s32) + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY13]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY14]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY15]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY17]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY18]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v2i64_fastcc_v2i64, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc <2 x i64> @v2i64_fastcc_v2i64(<2 x i64> %a) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll index 1147647ec514..87b53c69f612 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll @@ -6,7 +6,7 @@ declare hidden void @external_void_func_void() define void @tail_call_void_func_void() { ; CHECK-LABEL: name: tail_call_void_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -16,26 +16,25 @@ define void @tail_call_void_func_void() { ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_void - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) ; CHECK-NEXT: SI_TCRETURN [[GV]](p0), @external_void_func_void, 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 tail call void @external_void_func_void() ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll index 87ef4479d4bf..815f9b1a07fe 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll @@ -187,25 +187,21 @@ define float @ds_fmax_f32_vv(float addrspace(3)* %ptr, float %val) { ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR: liveins: $vgpr0, $vgpr1 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] - ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX8-MIR: SI_RETURN implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR: liveins: $vgpr0, $vgpr1 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] - ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX9-MIR: SI_RETURN implicit $vgpr0 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) ret float %ret } @@ -227,25 +223,21 @@ define float @ds_fmax_f32_vv_offset(float addrspace(3)* %ptr, float %val) { ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR: liveins: $vgpr0, $vgpr1 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] - ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX8-MIR: SI_RETURN implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR: liveins: $vgpr0, $vgpr1 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] - ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX9-MIR: SI_RETURN implicit $vgpr0 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) ret float %ret @@ -268,23 +260,19 @@ define void @ds_fmax_f32_vv_nortn(float addrspace(3)* %ptr, float %val) { ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_nortn ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR: liveins: $vgpr0, $vgpr1 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX8-MIR: S_SETPC_B64_return [[COPY3]] + ; GFX8-MIR: SI_RETURN ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_nortn ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR: liveins: $vgpr0, $vgpr1 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX9-MIR: S_SETPC_B64_return [[COPY3]] + ; GFX9-MIR: SI_RETURN %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) ret void } @@ -306,23 +294,19 @@ define void @ds_fmax_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) { ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR: liveins: $vgpr0, $vgpr1 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX8-MIR: S_SETPC_B64_return [[COPY3]] + ; GFX8-MIR: SI_RETURN ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR: liveins: $vgpr0, $vgpr1 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX9-MIR: S_SETPC_B64_return [[COPY3]] + ; GFX9-MIR: SI_RETURN %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) ret void @@ -345,25 +329,21 @@ define float @ds_fmax_f32_vv_volatile(float addrspace(3)* %ptr, float %val) { ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_volatile ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX8-MIR: liveins: $vgpr0, $vgpr1 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] - ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX8-MIR: SI_RETURN implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_volatile ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GFX9-MIR: liveins: $vgpr0, $vgpr1 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] - ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX9-MIR: SI_RETURN implicit $vgpr0 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 true) ret float %ret } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll index 7d604236777d..b4a7d4f9855a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll @@ -269,22 +269,22 @@ define void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, <4 x i32> %x) ; GFX10-NEXT: v_lshrrev_b16 v6, 8, v1 ; GFX10-NEXT: ds_write_b8 v0, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX10-NEXT: v_lshrrev_b16 v8, 8, v2 -; GFX10-NEXT: v_lshrrev_b16 v7, 8, v5 +; GFX10-NEXT: v_lshrrev_b16 v7, 8, v2 +; GFX10-NEXT: v_lshrrev_b16 v8, 8, v5 ; GFX10-NEXT: ds_write_b8 v0, v2 offset:4 ; GFX10-NEXT: ds_write_b8 v0, v6 offset:1 ; GFX10-NEXT: ds_write_b8 v0, v5 offset:2 -; GFX10-NEXT: ds_write_b8 v0, v7 offset:3 +; GFX10-NEXT: v_lshrrev_b16 v5, 8, v3 ; GFX10-NEXT: v_lshrrev_b16 v2, 8, v1 -; GFX10-NEXT: ds_write_b8 v0, v8 offset:5 +; GFX10-NEXT: ds_write_b8 v0, v8 offset:3 +; GFX10-NEXT: ds_write_b8 v0, v7 offset:5 ; GFX10-NEXT: ds_write_b8 v0, v1 offset:6 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v3 -; GFX10-NEXT: v_lshrrev_b16 v5, 8, v3 ; GFX10-NEXT: ds_write_b8 v0, v2 offset:7 ; GFX10-NEXT: ds_write_b8 v0, v3 offset:8 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v4 -; GFX10-NEXT: v_lshrrev_b16 v3, 8, v1 ; GFX10-NEXT: ds_write_b8 v0, v5 offset:9 +; GFX10-NEXT: v_lshrrev_b16 v3, 8, v1 ; GFX10-NEXT: v_lshrrev_b16 v5, 8, v4 ; GFX10-NEXT: ds_write_b8 v0, v1 offset:10 ; GFX10-NEXT: v_lshrrev_b16 v1, 8, v2 @@ -344,21 +344,21 @@ define void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, <3 x i32> %x) ; GFX10-NEXT: ds_write_b8 v0, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v2 ; GFX10-NEXT: v_lshrrev_b16 v6, 8, v2 -; GFX10-NEXT: v_lshrrev_b16 v7, 8, v4 ; GFX10-NEXT: ds_write_b8 v0, v2 offset:4 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v3 +; GFX10-NEXT: v_lshrrev_b16 v2, 8, v4 +; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v3 ; GFX10-NEXT: ds_write_b8 v0, v5 offset:1 ; GFX10-NEXT: ds_write_b8 v0, v4 offset:2 -; GFX10-NEXT: ds_write_b8 v0, v7 offset:3 ; GFX10-NEXT: v_lshrrev_b16 v4, 8, v1 -; GFX10-NEXT: v_lshrrev_b16 v5, 8, v3 -; GFX10-NEXT: ds_write_b8 v0, v1 offset:6 -; GFX10-NEXT: v_lshrrev_b16 v1, 8, v2 +; GFX10-NEXT: ds_write_b8 v0, v2 offset:3 ; GFX10-NEXT: ds_write_b8 v0, v6 offset:5 +; GFX10-NEXT: v_lshrrev_b16 v2, 8, v3 +; GFX10-NEXT: ds_write_b8 v0, v1 offset:6 +; GFX10-NEXT: v_lshrrev_b16 v1, 8, v7 ; GFX10-NEXT: ds_write_b8 v0, v4 offset:7 ; GFX10-NEXT: ds_write_b8 v0, v3 offset:8 -; GFX10-NEXT: ds_write_b8 v0, v5 offset:9 -; GFX10-NEXT: ds_write_b8 v0, v2 offset:10 +; GFX10-NEXT: ds_write_b8 v0, v2 offset:9 +; GFX10-NEXT: ds_write_b8 v0, v7 offset:10 ; GFX10-NEXT: ds_write_b8 v0, v1 offset:11 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir index da576626e995..ab05e6ef7ead 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir @@ -9,23 +9,20 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_min_max_ValK0_K1_i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 -12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMAX %0, %7 @@ -33,8 +30,7 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMIN %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -44,23 +40,20 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: min_max_ValK0_K1_i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 -12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMAX %7, %0 @@ -68,8 +61,7 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMIN %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -79,23 +71,20 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_min_K1max_ValK0__i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 -12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMAX %0, %7 @@ -103,8 +92,7 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMIN %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -114,23 +102,20 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_min_K1max_K0Val__i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 -12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMAX %7, %0 @@ -138,8 +123,7 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMIN %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -149,23 +133,20 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_min_ValK1_K0_i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMIN %0, %7 @@ -173,8 +154,7 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMAX %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -184,23 +164,20 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_min_K1Val_K0_i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMIN %7, %0 @@ -208,8 +185,7 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMAX %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -219,23 +195,20 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_K0min_ValK1__i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMIN %0, %7 @@ -243,8 +216,7 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMAX %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -254,23 +226,20 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_K0min_K1Val__i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMIN %7, %0 @@ -278,8 +247,7 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMAX %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -289,13 +257,12 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_K0min_K1Val__v2i16 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 @@ -305,10 +272,8 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY3]], [[SMIN]] ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](<2 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %9:sgpr(s32) = G_CONSTANT i32 17 %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %9(s32), %9(s32) %10:sgpr(s32) = G_CONSTANT i32 -12 @@ -318,8 +283,7 @@ body: | %12:vgpr(<2 x s16>) = COPY %5(<2 x s16>) %7:vgpr(<2 x s16>) = G_SMAX %12, %4 $vgpr0 = COPY %7(<2 x s16>) - %8:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %8, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -361,23 +325,20 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_non_inline_constant_i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 -12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMAX %0, %7 @@ -385,6 +346,5 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMIN %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir index 8c5d796a60e0..4ecc912fc7e4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir @@ -9,23 +9,20 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_min_max_ValK0_K1_u32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMAX %0, %7 @@ -33,8 +30,7 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMIN %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -44,23 +40,20 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: min_max_ValK0_K1_i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMAX %7, %0 @@ -68,8 +61,7 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMIN %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -79,23 +71,20 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_min_K1max_ValK0__u32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMAX %0, %7 @@ -103,8 +92,7 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMIN %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -114,23 +102,20 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_min_K1max_K0Val__u32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMAX %7, %0 @@ -138,8 +123,7 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMIN %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -149,23 +133,20 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_min_ValK1_K0_u32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMIN %0, %7 @@ -173,8 +154,7 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMAX %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -184,23 +164,20 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_min_K1Val_K0_u32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMIN %7, %0 @@ -208,8 +185,7 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMAX %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -219,23 +195,20 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_K0min_ValK1__u32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMIN %0, %7 @@ -243,8 +216,7 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMAX %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -254,23 +226,20 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_K0min_K1Val__u32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMIN %7, %0 @@ -278,8 +247,7 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMAX %8, %3 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -289,13 +257,12 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_max_K0min_K1Val__v2u16 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 @@ -305,10 +272,8 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY3]], [[UMIN]] ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](<2 x s16>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(<2 x s16>) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %9:sgpr(s32) = G_CONSTANT i32 17 %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %9(s32), %9(s32) %10:sgpr(s32) = G_CONSTANT i32 12 @@ -318,8 +283,7 @@ body: | %12:vgpr(<2 x s16>) = COPY %5(<2 x s16>) %7:vgpr(<2 x s16>) = G_UMAX %12, %4 $vgpr0 = COPY %7(<2 x s16>) - %8:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %8, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -362,23 +326,20 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0, $sgpr30_sgpr31 + liveins: $vgpr0 ; CHECK-LABEL: name: test_non_inline_constant_i32 - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: liveins: $vgpr0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 - %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMAX %0, %7 @@ -386,6 +347,5 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMIN %3, %8 $vgpr0 = COPY %5(s32) - %6:ccr_sgpr_64 = COPY %1 - S_SETPC_B64_return %6, implicit $vgpr0 + S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll index d9385251a4dc..641602657005 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll @@ -401,13 +401,13 @@ define <4 x half> @v_roundeven_v4f16(<4 x half> %x) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_rndne_f16_e32 v2, v0 +; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX10-NEXT: v_rndne_f16_e32 v3, v0 ; GFX10-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-NEXT: v_rndne_f16_e32 v3, v1 -; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX10-NEXT: v_rndne_f16_e32 v4, v1 ; GFX10-NEXT: v_rndne_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-NEXT: v_and_or_b32 v0, v2, v4, v0 -; GFX10-NEXT: v_and_or_b32 v1, v3, v4, v1 +; GFX10-NEXT: v_and_or_b32 v0, v3, v2, v0 +; GFX10-NEXT: v_and_or_b32 v1, v4, v2, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %roundeven = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %x) ret <4 x half> %roundeven diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll index c118e757ef53..1389e96f592c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll @@ -642,25 +642,25 @@ define i32 @v_saddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) { ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 ; GFX10-NEXT: s_mov_b32 s4, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff +; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX10-NEXT: v_lshrrev_b32_sdwa v3, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX10-NEXT: v_lshrrev_b32_sdwa v7, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX10-NEXT: v_and_or_b32 v0, v0, v2, v3 +; GFX10-NEXT: v_and_or_b32 v1, v1, v2, v7 ; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2 -; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6 -; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 -; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5 -; GFX10-NEXT: v_mov_b32_e32 v4, 24 +; GFX10-NEXT: v_and_or_b32 v3, v6, v2, v4 +; GFX10-NEXT: v_and_or_b32 v2, v8, v2, v5 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] +; GFX10-NEXT: v_mov_b32_e32 v4, 24 ; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] +; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_add_i16 v0, v0, v1 clamp -; GFX10-NEXT: v_pk_add_i16 v1, v2, v3 clamp +; GFX10-NEXT: v_pk_add_i16 v1, v3, v2 clamp ; GFX10-NEXT: v_mov_b32_e32 v2, 8 ; GFX10-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_ashrrev_i16 v1, 8, v1 op_sel_hi:[0,1] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll index 1f8a322dbdd1..8fee6c91474c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll @@ -642,25 +642,25 @@ define i32 @v_ssubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) { ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 ; GFX10-NEXT: s_mov_b32 s4, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff +; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX10-NEXT: v_lshrrev_b32_sdwa v3, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX10-NEXT: v_lshrrev_b32_sdwa v7, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX10-NEXT: v_and_or_b32 v0, v0, v2, v3 +; GFX10-NEXT: v_and_or_b32 v1, v1, v2, v7 ; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2 -; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6 -; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 -; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5 -; GFX10-NEXT: v_mov_b32_e32 v4, 24 +; GFX10-NEXT: v_and_or_b32 v3, v6, v2, v4 +; GFX10-NEXT: v_and_or_b32 v2, v8, v2, v5 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] +; GFX10-NEXT: v_mov_b32_e32 v4, 24 ; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] +; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_sub_i16 v0, v0, v1 clamp -; GFX10-NEXT: v_pk_sub_i16 v1, v2, v3 clamp +; GFX10-NEXT: v_pk_sub_i16 v1, v3, v2 clamp ; GFX10-NEXT: v_mov_b32_e32 v2, 8 ; GFX10-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_ashrrev_i16 v1, 8, v1 op_sel_hi:[0,1] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll index 503b45d49991..8e0d11434692 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll @@ -471,25 +471,25 @@ define i32 @v_uaddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) { ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 ; GFX10-NEXT: s_mov_b32 s4, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff +; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX10-NEXT: v_lshrrev_b32_sdwa v3, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX10-NEXT: v_lshrrev_b32_sdwa v7, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX10-NEXT: v_and_or_b32 v0, v0, v2, v3 +; GFX10-NEXT: v_and_or_b32 v1, v1, v2, v7 ; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2 -; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6 -; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 -; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5 -; GFX10-NEXT: v_mov_b32_e32 v4, 24 +; GFX10-NEXT: v_and_or_b32 v3, v6, v2, v4 +; GFX10-NEXT: v_and_or_b32 v2, v8, v2, v5 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] +; GFX10-NEXT: v_mov_b32_e32 v4, 24 ; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] +; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 clamp -; GFX10-NEXT: v_pk_add_u16 v1, v2, v3 clamp +; GFX10-NEXT: v_pk_add_u16 v1, v3, v2 clamp ; GFX10-NEXT: v_mov_b32_e32 v2, 8 ; GFX10-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll index a4da5822dac5..82959c8f22cb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll @@ -459,25 +459,25 @@ define i32 @v_usubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) { ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 ; GFX10-NEXT: s_mov_b32 s4, 8 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff +; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff +; GFX10-NEXT: v_lshrrev_b32_sdwa v3, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX10-NEXT: v_lshrrev_b32_sdwa v7, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 +; GFX10-NEXT: v_and_or_b32 v0, v0, v2, v3 +; GFX10-NEXT: v_and_or_b32 v1, v1, v2, v7 ; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2 -; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6 -; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 -; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5 -; GFX10-NEXT: v_mov_b32_e32 v4, 24 +; GFX10-NEXT: v_and_or_b32 v3, v6, v2, v4 +; GFX10-NEXT: v_and_or_b32 v2, v8, v2, v5 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] +; GFX10-NEXT: v_mov_b32_e32 v4, 24 ; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] +; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_sub_u16 v0, v0, v1 clamp -; GFX10-NEXT: v_pk_sub_u16 v1, v2, v3 clamp +; GFX10-NEXT: v_pk_sub_u16 v1, v3, v2 clamp ; GFX10-NEXT: v_mov_b32_e32 v2, 8 ; GFX10-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1] diff --git a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll index 11084620176d..dcde1b181b0c 100644 --- a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll +++ b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll @@ -29,15 +29,15 @@ define void @parent_func_missing_inputs() #0 { ; FIXEDABI-NEXT: s_add_u32 s16, s16, requires_all_inputs@rel32@lo+4 ; FIXEDABI-NEXT: s_addc_u32 s17, s17, requires_all_inputs@rel32@hi+12 ; FIXEDABI-NEXT: s_swappc_b64 s[30:31], s[16:17] -; FIXEDABI-NEXT: v_readlane_b32 s4, v40, 0 -; FIXEDABI-NEXT: v_readlane_b32 s5, v40, 1 +; FIXEDABI-NEXT: v_readlane_b32 s31, v40, 1 +; FIXEDABI-NEXT: v_readlane_b32 s30, v40, 0 ; FIXEDABI-NEXT: s_addk_i32 s32, 0xfc00 ; FIXEDABI-NEXT: v_readlane_b32 s33, v40, 2 -; FIXEDABI-NEXT: s_or_saveexec_b64 s[6:7], -1 +; FIXEDABI-NEXT: s_or_saveexec_b64 s[4:5], -1 ; FIXEDABI-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; FIXEDABI-NEXT: s_mov_b64 exec, s[6:7] +; FIXEDABI-NEXT: s_mov_b64 exec, s[4:5] ; FIXEDABI-NEXT: s_waitcnt vmcnt(0) -; FIXEDABI-NEXT: s_setpc_b64 s[4:5] +; FIXEDABI-NEXT: s_setpc_b64 s[30:31] call void @requires_all_inputs() ret void } diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll index 546bba7146b4..65f5c623a4e7 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll @@ -173,9 +173,9 @@ attributes #0 = { nounwind } ; GCN-NEXT: .vgpr_count: 0x1{{$}} ; GCN-NEXT: no_stack_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GCN-NEXT: .sgpr_count: 0x26{{$}} -; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}} -; GCN-NEXT: .vgpr_count: 0x2{{$}} +; GCN-NEXT: .sgpr_count: 0x24{{$}} +; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} +; GCN-NEXT: .vgpr_count: 0x3{{$}} ; GCN-NEXT: no_stack_extern_call: ; GCN-NEXT: .lds_size: 0{{$}} ; GFX8-NEXT: .sgpr_count: 0x28{{$}} @@ -213,9 +213,9 @@ attributes #0 = { nounwind } ; GCN-NEXT: .vgpr_count: 0x2{{$}} ; GCN-NEXT: simple_stack_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GCN-NEXT: .sgpr_count: 0x26{{$}} +; GCN-NEXT: .sgpr_count: 0x24{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} -; GCN-NEXT: .vgpr_count: 0x3{{$}} +; GCN-NEXT: .vgpr_count: 0x4{{$}} ; GCN-NEXT: simple_stack_extern_call: ; GCN-NEXT: .lds_size: 0{{$}} ; GFX8-NEXT: .sgpr_count: 0x28{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll index 8ea4dbed7866..0f86d0ee0360 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll @@ -903,80 +903,80 @@ define void @spill_func(i32 addrspace(1)* %arg) #0 { ; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_waitcnt expcnt(1) -; CHECK-NEXT: v_writelane_b32 v0, s33, 0 -; CHECK-NEXT: v_writelane_b32 v0, s34, 1 -; CHECK-NEXT: v_writelane_b32 v0, s35, 2 -; CHECK-NEXT: v_writelane_b32 v0, s36, 3 -; CHECK-NEXT: v_writelane_b32 v0, s37, 4 -; CHECK-NEXT: v_writelane_b32 v0, s38, 5 -; CHECK-NEXT: v_writelane_b32 v0, s39, 6 -; CHECK-NEXT: v_writelane_b32 v0, s40, 7 -; CHECK-NEXT: v_writelane_b32 v0, s41, 8 -; CHECK-NEXT: v_writelane_b32 v0, s42, 9 -; CHECK-NEXT: v_writelane_b32 v0, s43, 10 -; CHECK-NEXT: v_writelane_b32 v0, s44, 11 -; CHECK-NEXT: v_writelane_b32 v0, s45, 12 -; CHECK-NEXT: v_writelane_b32 v0, s46, 13 -; CHECK-NEXT: v_writelane_b32 v0, s47, 14 -; CHECK-NEXT: v_writelane_b32 v0, s48, 15 -; CHECK-NEXT: v_writelane_b32 v0, s49, 16 -; CHECK-NEXT: v_writelane_b32 v0, s50, 17 -; CHECK-NEXT: v_writelane_b32 v0, s51, 18 -; CHECK-NEXT: v_writelane_b32 v0, s52, 19 -; CHECK-NEXT: v_writelane_b32 v0, s53, 20 -; CHECK-NEXT: v_writelane_b32 v0, s54, 21 -; CHECK-NEXT: v_writelane_b32 v0, s55, 22 -; CHECK-NEXT: v_writelane_b32 v0, s56, 23 -; CHECK-NEXT: v_writelane_b32 v0, s57, 24 -; CHECK-NEXT: v_writelane_b32 v0, s58, 25 -; CHECK-NEXT: v_writelane_b32 v0, s59, 26 -; CHECK-NEXT: v_writelane_b32 v0, s60, 27 -; CHECK-NEXT: v_writelane_b32 v0, s61, 28 -; CHECK-NEXT: v_writelane_b32 v0, s62, 29 -; CHECK-NEXT: v_writelane_b32 v0, s63, 30 -; CHECK-NEXT: v_writelane_b32 v0, s64, 31 -; CHECK-NEXT: v_writelane_b32 v0, s65, 32 -; CHECK-NEXT: v_writelane_b32 v0, s66, 33 -; CHECK-NEXT: v_writelane_b32 v0, s67, 34 -; CHECK-NEXT: v_writelane_b32 v0, s68, 35 -; CHECK-NEXT: v_writelane_b32 v0, s69, 36 -; CHECK-NEXT: v_writelane_b32 v0, s70, 37 -; CHECK-NEXT: v_writelane_b32 v0, s71, 38 -; CHECK-NEXT: v_writelane_b32 v0, s72, 39 -; CHECK-NEXT: v_writelane_b32 v0, s73, 40 -; CHECK-NEXT: v_writelane_b32 v0, s74, 41 -; CHECK-NEXT: v_writelane_b32 v0, s75, 42 -; CHECK-NEXT: v_writelane_b32 v0, s76, 43 -; CHECK-NEXT: v_writelane_b32 v0, s77, 44 -; CHECK-NEXT: v_writelane_b32 v0, s78, 45 -; CHECK-NEXT: v_writelane_b32 v0, s79, 46 -; CHECK-NEXT: v_writelane_b32 v0, s80, 47 -; CHECK-NEXT: v_writelane_b32 v0, s81, 48 -; CHECK-NEXT: v_writelane_b32 v0, s82, 49 -; CHECK-NEXT: v_writelane_b32 v0, s83, 50 -; CHECK-NEXT: v_writelane_b32 v0, s84, 51 -; CHECK-NEXT: v_writelane_b32 v0, s85, 52 -; CHECK-NEXT: v_writelane_b32 v0, s86, 53 -; CHECK-NEXT: v_writelane_b32 v0, s87, 54 -; CHECK-NEXT: v_writelane_b32 v0, s88, 55 -; CHECK-NEXT: v_writelane_b32 v0, s89, 56 -; CHECK-NEXT: v_writelane_b32 v0, s90, 57 +; CHECK-NEXT: v_writelane_b32 v0, s30, 0 +; CHECK-NEXT: v_writelane_b32 v0, s31, 1 +; CHECK-NEXT: v_writelane_b32 v0, s33, 2 +; CHECK-NEXT: v_writelane_b32 v0, s34, 3 +; CHECK-NEXT: v_writelane_b32 v0, s35, 4 +; CHECK-NEXT: v_writelane_b32 v0, s36, 5 +; CHECK-NEXT: v_writelane_b32 v0, s37, 6 +; CHECK-NEXT: v_writelane_b32 v0, s38, 7 +; CHECK-NEXT: v_writelane_b32 v0, s39, 8 +; CHECK-NEXT: v_writelane_b32 v0, s40, 9 +; CHECK-NEXT: v_writelane_b32 v0, s41, 10 +; CHECK-NEXT: v_writelane_b32 v0, s42, 11 +; CHECK-NEXT: v_writelane_b32 v0, s43, 12 +; CHECK-NEXT: v_writelane_b32 v0, s44, 13 +; CHECK-NEXT: v_writelane_b32 v0, s45, 14 +; CHECK-NEXT: v_writelane_b32 v0, s46, 15 +; CHECK-NEXT: v_writelane_b32 v0, s47, 16 +; CHECK-NEXT: v_writelane_b32 v0, s48, 17 +; CHECK-NEXT: v_writelane_b32 v0, s49, 18 +; CHECK-NEXT: v_writelane_b32 v0, s50, 19 +; CHECK-NEXT: v_writelane_b32 v0, s51, 20 +; CHECK-NEXT: v_writelane_b32 v0, s52, 21 +; CHECK-NEXT: v_writelane_b32 v0, s53, 22 +; CHECK-NEXT: v_writelane_b32 v0, s54, 23 +; CHECK-NEXT: v_writelane_b32 v0, s55, 24 +; CHECK-NEXT: v_writelane_b32 v0, s56, 25 +; CHECK-NEXT: v_writelane_b32 v0, s57, 26 +; CHECK-NEXT: v_writelane_b32 v0, s58, 27 +; CHECK-NEXT: v_writelane_b32 v0, s59, 28 +; CHECK-NEXT: v_writelane_b32 v0, s60, 29 +; CHECK-NEXT: v_writelane_b32 v0, s61, 30 +; CHECK-NEXT: v_writelane_b32 v0, s62, 31 +; CHECK-NEXT: v_writelane_b32 v0, s63, 32 +; CHECK-NEXT: v_writelane_b32 v0, s64, 33 +; CHECK-NEXT: v_writelane_b32 v0, s65, 34 +; CHECK-NEXT: v_writelane_b32 v0, s66, 35 +; CHECK-NEXT: v_writelane_b32 v0, s67, 36 +; CHECK-NEXT: v_writelane_b32 v0, s68, 37 +; CHECK-NEXT: v_writelane_b32 v0, s69, 38 +; CHECK-NEXT: v_writelane_b32 v0, s70, 39 +; CHECK-NEXT: v_writelane_b32 v0, s71, 40 +; CHECK-NEXT: v_writelane_b32 v0, s72, 41 +; CHECK-NEXT: v_writelane_b32 v0, s73, 42 +; CHECK-NEXT: v_writelane_b32 v0, s74, 43 +; CHECK-NEXT: v_writelane_b32 v0, s75, 44 +; CHECK-NEXT: v_writelane_b32 v0, s76, 45 +; CHECK-NEXT: v_writelane_b32 v0, s77, 46 +; CHECK-NEXT: v_writelane_b32 v0, s78, 47 +; CHECK-NEXT: v_writelane_b32 v0, s79, 48 +; CHECK-NEXT: v_writelane_b32 v0, s80, 49 +; CHECK-NEXT: v_writelane_b32 v0, s81, 50 +; CHECK-NEXT: v_writelane_b32 v0, s82, 51 +; CHECK-NEXT: v_writelane_b32 v0, s83, 52 +; CHECK-NEXT: v_writelane_b32 v0, s84, 53 +; CHECK-NEXT: v_writelane_b32 v0, s85, 54 +; CHECK-NEXT: v_writelane_b32 v0, s86, 55 +; CHECK-NEXT: v_writelane_b32 v0, s87, 56 +; CHECK-NEXT: v_writelane_b32 v0, s88, 57 ; CHECK-NEXT: s_waitcnt expcnt(0) -; CHECK-NEXT: v_writelane_b32 v1, s97, 0 -; CHECK-NEXT: v_writelane_b32 v0, s91, 58 -; CHECK-NEXT: v_writelane_b32 v1, s98, 1 -; CHECK-NEXT: v_writelane_b32 v0, s92, 59 -; CHECK-NEXT: v_writelane_b32 v1, s99, 2 -; CHECK-NEXT: v_writelane_b32 v0, s93, 60 -; CHECK-NEXT: v_writelane_b32 v1, s100, 3 -; CHECK-NEXT: v_writelane_b32 v0, s94, 61 -; CHECK-NEXT: v_writelane_b32 v1, s101, 4 -; CHECK-NEXT: v_writelane_b32 v0, s95, 62 -; CHECK-NEXT: v_writelane_b32 v1, s30, 5 -; CHECK-NEXT: s_mov_b32 s29, s12 -; CHECK-NEXT: v_writelane_b32 v0, s96, 63 -; CHECK-NEXT: v_writelane_b32 v1, s31, 6 -; CHECK-NEXT: s_cmp_eq_u32 s29, 0 +; CHECK-NEXT: v_writelane_b32 v1, s95, 0 +; CHECK-NEXT: v_writelane_b32 v0, s89, 58 +; CHECK-NEXT: v_writelane_b32 v1, s96, 1 +; CHECK-NEXT: v_writelane_b32 v0, s90, 59 +; CHECK-NEXT: v_writelane_b32 v1, s97, 2 +; CHECK-NEXT: v_writelane_b32 v0, s91, 60 +; CHECK-NEXT: v_writelane_b32 v1, s98, 3 +; CHECK-NEXT: v_writelane_b32 v0, s92, 61 +; CHECK-NEXT: v_writelane_b32 v1, s99, 4 +; CHECK-NEXT: v_writelane_b32 v0, s93, 62 +; CHECK-NEXT: v_writelane_b32 v1, s100, 5 +; CHECK-NEXT: s_mov_b32 s31, s12 +; CHECK-NEXT: v_writelane_b32 v0, s94, 63 +; CHECK-NEXT: v_writelane_b32 v1, s101, 6 +; CHECK-NEXT: s_cmp_eq_u32 s31, 0 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: s_mov_b32 s0, 0 ; CHECK-NEXT: ;;#ASMEND @@ -1336,7 +1336,6 @@ define void @spill_func(i32 addrspace(1)* %arg) #0 { ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; reg use s5 ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s4, v1, 5 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; reg use s6 ; CHECK-NEXT: ;;#ASMEND @@ -1631,82 +1630,83 @@ define void @spill_func(i32 addrspace(1)* %arg) #0 { ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; reg use vcc_hi ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s5, v1, 6 -; CHECK-NEXT: v_readlane_b32 s101, v1, 4 -; CHECK-NEXT: v_readlane_b32 s100, v1, 3 -; CHECK-NEXT: v_readlane_b32 s99, v1, 2 -; CHECK-NEXT: v_readlane_b32 s98, v1, 1 -; CHECK-NEXT: v_readlane_b32 s97, v1, 0 -; CHECK-NEXT: v_readlane_b32 s96, v0, 63 -; CHECK-NEXT: v_readlane_b32 s95, v0, 62 -; CHECK-NEXT: v_readlane_b32 s94, v0, 61 -; CHECK-NEXT: v_readlane_b32 s93, v0, 60 -; CHECK-NEXT: v_readlane_b32 s92, v0, 59 -; CHECK-NEXT: v_readlane_b32 s91, v0, 58 -; CHECK-NEXT: v_readlane_b32 s90, v0, 57 -; CHECK-NEXT: v_readlane_b32 s89, v0, 56 -; CHECK-NEXT: v_readlane_b32 s88, v0, 55 -; CHECK-NEXT: v_readlane_b32 s87, v0, 54 -; CHECK-NEXT: v_readlane_b32 s86, v0, 53 -; CHECK-NEXT: v_readlane_b32 s85, v0, 52 -; CHECK-NEXT: v_readlane_b32 s84, v0, 51 -; CHECK-NEXT: v_readlane_b32 s83, v0, 50 -; CHECK-NEXT: v_readlane_b32 s82, v0, 49 -; CHECK-NEXT: v_readlane_b32 s81, v0, 48 -; CHECK-NEXT: v_readlane_b32 s80, v0, 47 -; CHECK-NEXT: v_readlane_b32 s79, v0, 46 -; CHECK-NEXT: v_readlane_b32 s78, v0, 45 -; CHECK-NEXT: v_readlane_b32 s77, v0, 44 -; CHECK-NEXT: v_readlane_b32 s76, v0, 43 -; CHECK-NEXT: v_readlane_b32 s75, v0, 42 -; CHECK-NEXT: v_readlane_b32 s74, v0, 41 -; CHECK-NEXT: v_readlane_b32 s73, v0, 40 -; CHECK-NEXT: v_readlane_b32 s72, v0, 39 -; CHECK-NEXT: v_readlane_b32 s71, v0, 38 -; CHECK-NEXT: v_readlane_b32 s70, v0, 37 -; CHECK-NEXT: v_readlane_b32 s69, v0, 36 -; CHECK-NEXT: v_readlane_b32 s68, v0, 35 -; CHECK-NEXT: v_readlane_b32 s67, v0, 34 -; CHECK-NEXT: v_readlane_b32 s66, v0, 33 -; CHECK-NEXT: v_readlane_b32 s65, v0, 32 -; CHECK-NEXT: v_readlane_b32 s64, v0, 31 -; CHECK-NEXT: v_readlane_b32 s63, v0, 30 -; CHECK-NEXT: v_readlane_b32 s62, v0, 29 -; CHECK-NEXT: v_readlane_b32 s61, v0, 28 -; CHECK-NEXT: v_readlane_b32 s60, v0, 27 -; CHECK-NEXT: v_readlane_b32 s59, v0, 26 -; CHECK-NEXT: v_readlane_b32 s58, v0, 25 -; CHECK-NEXT: v_readlane_b32 s57, v0, 24 -; CHECK-NEXT: v_readlane_b32 s56, v0, 23 -; CHECK-NEXT: v_readlane_b32 s55, v0, 22 -; CHECK-NEXT: v_readlane_b32 s54, v0, 21 -; CHECK-NEXT: v_readlane_b32 s53, v0, 20 -; CHECK-NEXT: v_readlane_b32 s52, v0, 19 -; CHECK-NEXT: v_readlane_b32 s51, v0, 18 -; CHECK-NEXT: v_readlane_b32 s50, v0, 17 -; CHECK-NEXT: v_readlane_b32 s49, v0, 16 -; CHECK-NEXT: v_readlane_b32 s48, v0, 15 -; CHECK-NEXT: v_readlane_b32 s47, v0, 14 -; CHECK-NEXT: v_readlane_b32 s46, v0, 13 -; CHECK-NEXT: v_readlane_b32 s45, v0, 12 -; CHECK-NEXT: v_readlane_b32 s44, v0, 11 -; CHECK-NEXT: v_readlane_b32 s43, v0, 10 -; CHECK-NEXT: v_readlane_b32 s42, v0, 9 -; CHECK-NEXT: v_readlane_b32 s41, v0, 8 -; CHECK-NEXT: v_readlane_b32 s40, v0, 7 -; CHECK-NEXT: v_readlane_b32 s39, v0, 6 -; CHECK-NEXT: v_readlane_b32 s38, v0, 5 -; CHECK-NEXT: v_readlane_b32 s37, v0, 4 -; CHECK-NEXT: v_readlane_b32 s36, v0, 3 -; CHECK-NEXT: v_readlane_b32 s35, v0, 2 -; CHECK-NEXT: v_readlane_b32 s34, v0, 1 -; CHECK-NEXT: v_readlane_b32 s33, v0, 0 -; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 +; CHECK-NEXT: v_readlane_b32 s101, v1, 6 +; CHECK-NEXT: v_readlane_b32 s100, v1, 5 +; CHECK-NEXT: v_readlane_b32 s99, v1, 4 +; CHECK-NEXT: v_readlane_b32 s98, v1, 3 +; CHECK-NEXT: v_readlane_b32 s97, v1, 2 +; CHECK-NEXT: v_readlane_b32 s96, v1, 1 +; CHECK-NEXT: v_readlane_b32 s95, v1, 0 +; CHECK-NEXT: v_readlane_b32 s94, v0, 63 +; CHECK-NEXT: v_readlane_b32 s93, v0, 62 +; CHECK-NEXT: v_readlane_b32 s92, v0, 61 +; CHECK-NEXT: v_readlane_b32 s91, v0, 60 +; CHECK-NEXT: v_readlane_b32 s90, v0, 59 +; CHECK-NEXT: v_readlane_b32 s89, v0, 58 +; CHECK-NEXT: v_readlane_b32 s88, v0, 57 +; CHECK-NEXT: v_readlane_b32 s87, v0, 56 +; CHECK-NEXT: v_readlane_b32 s86, v0, 55 +; CHECK-NEXT: v_readlane_b32 s85, v0, 54 +; CHECK-NEXT: v_readlane_b32 s84, v0, 53 +; CHECK-NEXT: v_readlane_b32 s83, v0, 52 +; CHECK-NEXT: v_readlane_b32 s82, v0, 51 +; CHECK-NEXT: v_readlane_b32 s81, v0, 50 +; CHECK-NEXT: v_readlane_b32 s80, v0, 49 +; CHECK-NEXT: v_readlane_b32 s79, v0, 48 +; CHECK-NEXT: v_readlane_b32 s78, v0, 47 +; CHECK-NEXT: v_readlane_b32 s77, v0, 46 +; CHECK-NEXT: v_readlane_b32 s76, v0, 45 +; CHECK-NEXT: v_readlane_b32 s75, v0, 44 +; CHECK-NEXT: v_readlane_b32 s74, v0, 43 +; CHECK-NEXT: v_readlane_b32 s73, v0, 42 +; CHECK-NEXT: v_readlane_b32 s72, v0, 41 +; CHECK-NEXT: v_readlane_b32 s71, v0, 40 +; CHECK-NEXT: v_readlane_b32 s70, v0, 39 +; CHECK-NEXT: v_readlane_b32 s69, v0, 38 +; CHECK-NEXT: v_readlane_b32 s68, v0, 37 +; CHECK-NEXT: v_readlane_b32 s67, v0, 36 +; CHECK-NEXT: v_readlane_b32 s66, v0, 35 +; CHECK-NEXT: v_readlane_b32 s65, v0, 34 +; CHECK-NEXT: v_readlane_b32 s64, v0, 33 +; CHECK-NEXT: v_readlane_b32 s63, v0, 32 +; CHECK-NEXT: v_readlane_b32 s62, v0, 31 +; CHECK-NEXT: v_readlane_b32 s61, v0, 30 +; CHECK-NEXT: v_readlane_b32 s60, v0, 29 +; CHECK-NEXT: v_readlane_b32 s59, v0, 28 +; CHECK-NEXT: v_readlane_b32 s58, v0, 27 +; CHECK-NEXT: v_readlane_b32 s57, v0, 26 +; CHECK-NEXT: v_readlane_b32 s56, v0, 25 +; CHECK-NEXT: v_readlane_b32 s55, v0, 24 +; CHECK-NEXT: v_readlane_b32 s54, v0, 23 +; CHECK-NEXT: v_readlane_b32 s53, v0, 22 +; CHECK-NEXT: v_readlane_b32 s52, v0, 21 +; CHECK-NEXT: v_readlane_b32 s51, v0, 20 +; CHECK-NEXT: v_readlane_b32 s50, v0, 19 +; CHECK-NEXT: v_readlane_b32 s49, v0, 18 +; CHECK-NEXT: v_readlane_b32 s48, v0, 17 +; CHECK-NEXT: v_readlane_b32 s47, v0, 16 +; CHECK-NEXT: v_readlane_b32 s46, v0, 15 +; CHECK-NEXT: v_readlane_b32 s45, v0, 14 +; CHECK-NEXT: v_readlane_b32 s44, v0, 13 +; CHECK-NEXT: v_readlane_b32 s43, v0, 12 +; CHECK-NEXT: v_readlane_b32 s42, v0, 11 +; CHECK-NEXT: v_readlane_b32 s41, v0, 10 +; CHECK-NEXT: v_readlane_b32 s40, v0, 9 +; CHECK-NEXT: v_readlane_b32 s39, v0, 8 +; CHECK-NEXT: v_readlane_b32 s38, v0, 7 +; CHECK-NEXT: v_readlane_b32 s37, v0, 6 +; CHECK-NEXT: v_readlane_b32 s36, v0, 5 +; CHECK-NEXT: v_readlane_b32 s35, v0, 4 +; CHECK-NEXT: v_readlane_b32 s34, v0, 3 +; CHECK-NEXT: v_readlane_b32 s33, v0, 2 +; CHECK-NEXT: v_readlane_b32 s31, v0, 1 +; CHECK-NEXT: v_readlane_b32 s30, v0, 0 +; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b64 exec, s[6:7] +; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: s_setpc_b64 s[4:5] +; CHECK-NEXT: s_setpc_b64 s[30:31] entry: %cnd = tail call i32 @llvm.amdgcn.workgroup.id.x() #0 %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={s0}"() #0 diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll index e91d62c4c3f2..1a9ec6c31555 100644 --- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll @@ -17,9 +17,10 @@ define void @use_vcc() #1 { ; GCN: v_writelane_b32 v40, s30, 0 ; GCN: v_writelane_b32 v40, s31, 1 ; GCN: s_swappc_b64 -; GCN: v_readlane_b32 s4, v40, 0 -; GCN: v_readlane_b32 s5, v40, 1 +; GCN: v_readlane_b32 s31, v40, 1 +; GCN: v_readlane_b32 s30, v40, 0 ; GCN: v_readlane_b32 s33, v40, 2 +; GCN: s_setpc_b64 s[30:31] ; GCN: ; NumSgprs: 36 ; GCN: ; NumVgprs: 41 define void @indirect_use_vcc() #1 { diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll index 82d879d76931..f3c875bd5803 100644 --- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll @@ -25,26 +25,26 @@ define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_ ; MUBUF: buffer_store_dword ; FLATSCR: scratch_store_dword ; GCN: v_writelane_b32 v40, s33, 4 -; GCN: v_writelane_b32 v40, s34, 0 -; GCN: v_writelane_b32 v40, s35, 1 -; GCN: v_writelane_b32 v40, s30, 2 -; GCN: v_writelane_b32 v40, s31, 3 +; GCN: v_writelane_b32 v40, s30, 0 +; GCN: v_writelane_b32 v40, s31, 1 +; GCN: v_writelane_b32 v40, s34, 2 +; GCN: v_writelane_b32 v40, s35, 3 ; GCN: s_swappc_b64 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: s_swappc_b64 -; MUBUF-DAG: v_readlane_b32 s4, v40, 2 -; MUBUF-DAG: v_readlane_b32 s5, v40, 3 -; FLATSCR-DAG: v_readlane_b32 s0, v40, 2 -; FLATSCR-DAG: v_readlane_b32 s1, v40, 3 -; GCN: v_readlane_b32 s35, v40, 1 -; GCN: v_readlane_b32 s34, v40, 0 +; GCN: v_readlane_b32 s35, v40, 3 +; GCN: v_readlane_b32 s34, v40, 2 +; MUBUF-DAG: v_readlane_b32 s31, v40, 1 +; MUBUF-DAG: v_readlane_b32 s30, v40, 0 +; FLATSCR-DAG: v_readlane_b32 s31, v40, 1 +; FLATSCR-DAG: v_readlane_b32 s30, v40, 0 ; GCN: v_readlane_b32 s33, v40, 4 ; MUBUF: buffer_load_dword ; FLATSCR: scratch_load_dword -; GCN: s_setpc_b64 +; GCN: s_setpc_b64 s[30:31] define void @test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 { call void @external_void_func_void() call void asm sideeffect "", ""() #0 @@ -74,11 +74,14 @@ define void @test_func_call_external_void_funcx2() #0 { ; GCN-LABEL: {{^}}void_func_void_clobber_s30_s31: ; GCN: s_waitcnt -; GCN-NEXT: s_mov_b64 [[SAVEPC:s\[[0-9]+:[0-9]+\]]], s[30:31] +; GCN: v_writelane_b32 v0, s30, 0 +; GCN: v_writelane_b32 v0, s31, 1 ; GCN-NEXT: #ASMSTART ; GCN: ; clobber ; GCN-NEXT: #ASMEND -; GCN-NEXT: s_setpc_b64 [[SAVEPC]] +; GCN: v_readlane_b32 s31, v0, 1 +; GCN: v_readlane_b32 s30, v0, 0 +; GCN: s_setpc_b64 s[30:31] define void @void_func_void_clobber_s30_s31() #2 { call void asm sideeffect "; clobber", "~{s[30:31]}"() #0 ret void diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll index e357b5adc8d4..e3e5f011ccfc 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -102,10 +102,8 @@ define void @callee_with_stack_no_fp_elim_non_leaf() #2 { ; GCN: s_swappc_b64 -; MUBUF-DAG: v_readlane_b32 s5, [[CSR_VGPR]] -; MUBUF-DAG: v_readlane_b32 s4, [[CSR_VGPR]] -; FLATSCR-DAG: v_readlane_b32 s0, [[CSR_VGPR]] -; FLATSCR-DAG: v_readlane_b32 s1, [[CSR_VGPR]] +; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]] +; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]] ; MUBUF: s_addk_i32 s32, 0xfc00{{$}} ; FLATSCR: s_add_i32 s32, s32, -16{{$}} @@ -116,7 +114,7 @@ define void @callee_with_stack_no_fp_elim_non_leaf() #2 { ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 +; GCN-NEXT: s_setpc_b64 s[30:31] define void @callee_with_stack_and_call() #0 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca @@ -144,10 +142,8 @@ define void @callee_with_stack_and_call() #0 { ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1 ; GCN: s_swappc_b64 -; MUBUF-DAG: v_readlane_b32 s4, v40, 0 -; MUBUF-DAG: v_readlane_b32 s5, v40, 1 -; FLATSCR-DAG: v_readlane_b32 s0, v40, 0 -; FLATSCR-DAG: v_readlane_b32 s1, v40, 1 +; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]], 0 +; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]], 1 ; MUBUF: s_addk_i32 s32, 0xfc00 ; FLATSCR: s_add_i32 s32, s32, -16 @@ -157,7 +153,7 @@ define void @callee_with_stack_and_call() #0 { ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 +; GCN-NEXT: s_setpc_b64 s[30:31] define void @callee_no_stack_with_call() #0 { call void @external_void_func_void() ret void @@ -393,31 +389,28 @@ define void @realign_stack_no_fp_elim() #1 { ; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] -; GCN-NEXT: v_writelane_b32 v0, s33, 2 +; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: v_writelane_b32 v0, s30, 0 +; GCN: v_writelane_b32 [[CSR_VGPR]], s30, 0 ; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 -; GCN: v_writelane_b32 v0, s31, 1 +; MUBUF: s_addk_i32 s32, 0x300 +; FLATSCR: s_add_i32 s32, s32, 12 +; GCN: v_writelane_b32 [[CSR_VGPR]], s31, 1 ; MUBUF: buffer_store_dword [[ZERO]], off, s[0:3], s33{{$}} ; FLATSCR: scratch_store_dword off, [[ZERO]], s33{{$}} ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN: ;;#ASMSTART -; MUBUF: s_addk_i32 s32, 0x300 -; MUBUF-NEXT: v_readlane_b32 s4, v0, 0 -; MUBUF-NEXT: v_readlane_b32 s5, v0, 1 -; FLATSCR: s_add_i32 s32, s32, 12 -; FLATSCR-NEXT: v_readlane_b32 s0, v0, 0 -; FLATSCR-NEXT: v_readlane_b32 s1, v0, 1 -; MUBUF-NEXT: s_addk_i32 s32, 0xfd00 -; FLATSCR-NEXT: s_add_i32 s32, s32, -12 -; GCN-NEXT: v_readlane_b32 s33, v0, 2 +; GCN: v_readlane_b32 s31, [[CSR_VGPR]], 1 +; GCN: v_readlane_b32 s30, [[CSR_VGPR]], 0 +; MUBUF: s_addk_i32 s32, 0xfd00 +; FLATSCR: s_add_i32 s32, s32, -12 +; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; MUBUF-NEXT: s_setpc_b64 s[4:5] -; FLATSCR-NEXT: s_setpc_b64 s[0:1] +; GCN-NEXT: s_setpc_b64 s[30:31] define void @no_unused_non_csr_sgpr_for_fp() #1 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca @@ -441,28 +434,22 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 { ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s30, 0 +; MUBUF: s_addk_i32 s32, 0x300{{$}} +; FLATSCR: s_add_i32 s32, s32, 12{{$}} -; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1 ; MUBUF-DAG: buffer_store_dword ; FLATSCR-DAG: scratch_store_dword -; MUBUF: s_addk_i32 s32, 0x300{{$}} -; FLATSCR: s_add_i32 s32, s32, 12{{$}} -; MUBUF: v_readlane_b32 s4, [[CSR_VGPR]], 0 -; FLATSCR: v_readlane_b32 s0, [[CSR_VGPR]], 0 ; GCN: ;;#ASMSTART -; MUBUF: v_readlane_b32 s5, [[CSR_VGPR]], 1 -; FLATSCR: v_readlane_b32 s1, [[CSR_VGPR]], 1 -; MUBUF-NEXT: s_addk_i32 s32, 0xfd00{{$}} -; FLATSCR-NEXT: s_add_i32 s32, s32, -12{{$}} +; MUBUF: s_addk_i32 s32, 0xfd00{{$}} +; FLATSCR: s_add_i32 s32, s32, -12{{$}} ; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 +; GCN-NEXT: s_setpc_b64 s[30:31] define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca @@ -494,21 +481,15 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 { ; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], [[SCRATCH_SGPR]] ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2 -; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30, 0 ; GCN-DAG: s_mov_b32 s33, s32 -; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1 ; MUBUF-DAG: s_add_i32 s32, s32, 0x40300{{$}} ; FLATSCR-DAG: s_addk_i32 s32, 0x100c{{$}} ; MUBUF-DAG: buffer_store_dword ; FLATSCR-DAG: scratch_store_dword -; MUBUF: v_readlane_b32 s4, [[CSR_VGPR]], 0 -; FLATSCR: v_readlane_b32 s0, [[CSR_VGPR]], 0 ; GCN: ;;#ASMSTART -; MUBUF: v_readlane_b32 s5, [[CSR_VGPR]], 1 -; FLATSCR: v_readlane_b32 s1, [[CSR_VGPR]], 1 -; MUBUF-NEXT: s_add_i32 s32, s32, 0xfffbfd00{{$}} -; FLATSCR-NEXT: s_addk_i32 s32, 0xeff4{{$}} +; MUBUF: s_add_i32 s32, s32, 0xfffbfd00{{$}} +; FLATSCR: s_addk_i32 s32, 0xeff4{{$}} ; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40100 @@ -517,7 +498,7 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 { ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, [[SCRATCH_SGPR]] ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 +; GCN-NEXT: s_setpc_b64 s[30:31] define void @scratch_reg_needed_mubuf_offset([4096 x i8] addrspace(5)* byval([4096 x i8]) align 4 %arg) #1 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca @@ -549,7 +530,7 @@ define internal void @local_empty_func() #0 { ; An FP is needed, despite not needing any spills ; TODO: Ccould see callee does not use stack and omit FP. ; GCN-LABEL: {{^}}ipra_call_with_stack: -; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 +; GCN: v_writelane_b32 v0, s33, 2 ; GCN: s_mov_b32 s33, s32 ; MUBUF: s_addk_i32 s32, 0x400 ; FLATSCR: s_add_i32 s32, s32, 16 @@ -558,7 +539,7 @@ define internal void @local_empty_func() #0 { ; GCN: s_swappc_b64 ; MUBUF: s_addk_i32 s32, 0xfc00 ; FLATSCR: s_add_i32 s32, s32, -16 -; GCN: s_mov_b32 s33, [[FP_COPY:s[0-9]+]] +; GCN: v_readlane_b32 s33, v0, 2 define void @ipra_call_with_stack() #0 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll index 351aabf25738..980b0bc631af 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll @@ -317,7 +317,7 @@ define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 { ; GCN-NOT: s12 ; GCN-NOT: s13 ; GCN-NOT: s14 -; GCN: v_readlane_b32 s4, v40, 0 +; GCN: v_readlane_b32 s30, v40, 0 define hidden void @func_indirect_use_workgroup_id_x() #1 { call void @use_workgroup_id_x() ret void diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll index 82dddcb6a148..29fc098899ee 100644 --- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll @@ -39,15 +39,15 @@ define float @call_split_type_used_outside_block_v2f32() #0 { ; GCN-NEXT: s_add_u32 s16, s16, func_v2f32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, func_v2f32@rel32@hi+12 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s4, v40, 0 -; GCN-NEXT: v_readlane_b32 s5, v40, 1 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] bb0: %split.ret.type = call <2 x float> @func_v2f32() br label %bb1 @@ -73,15 +73,15 @@ define float @call_split_type_used_outside_block_v3f32() #0 { ; GCN-NEXT: s_add_u32 s16, s16, func_v3f32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, func_v3f32@rel32@hi+12 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s4, v40, 0 -; GCN-NEXT: v_readlane_b32 s5, v40, 1 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] bb0: %split.ret.type = call <3 x float> @func_v3f32() br label %bb1 @@ -107,15 +107,15 @@ define half @call_split_type_used_outside_block_v4f16() #0 { ; GCN-NEXT: s_add_u32 s16, s16, func_v4f16@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, func_v4f16@rel32@hi+12 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s4, v40, 0 -; GCN-NEXT: v_readlane_b32 s5, v40, 1 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] bb0: %split.ret.type = call <4 x half> @func_v4f16() br label %bb1 @@ -141,16 +141,16 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 { ; GCN-NEXT: s_add_u32 s16, s16, func_struct@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, func_struct@rel32@hi+12 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s4, v40, 0 ; GCN-NEXT: v_mov_b32_e32 v1, v4 -; GCN-NEXT: v_readlane_b32 s5, v40, 1 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] bb0: %split.ret.type = call { <4 x i32>, <4 x half> } @func_struct() br label %bb1 diff --git a/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll b/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll index 121d5d35bfe7..cf610f9436ac 100644 --- a/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll +++ b/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll @@ -7,32 +7,29 @@ define float @fdiv_f32(float %a, float %b) #0 { ; GCN-LABEL: name: fdiv_f32 ; GCN: bb.0.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY2]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %8:vgpr_32, %9:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %10:vgpr_32 = nofpexcept V_RCP_F32_e64 0, %8, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: %4:vgpr_32, %5:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %8:vgpr_32 = nofpexcept V_RCP_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216 ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_]], 2305, implicit-def $mode, implicit $mode - ; GCN-NEXT: %14:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %8, 0, %10, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %15:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %14, 0, %10, 0, %10, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %16:vgpr_32 = nofpexcept V_MUL_F32_e64 0, %6, 0, %15, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %17:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %8, 0, %16, 0, %6, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %18:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %17, 0, %15, 0, %16, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %19:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %8, 0, %18, 0, %6, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %12:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %6, 0, %8, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %13:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %12, 0, %8, 0, %8, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %14:vgpr_32 = nofpexcept V_MUL_F32_e64 0, %4, 0, %13, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %15:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %6, 0, %14, 0, %4, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %16:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %15, 0, %13, 0, %14, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %17:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %6, 0, %16, 0, %4, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_2]], 2305, implicit-def dead $mode, implicit $mode - ; GCN-NEXT: $vcc = COPY %7 - ; GCN-NEXT: %20:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, killed %19, 0, %15, 0, %18, 0, 0, implicit $mode, implicit $vcc, implicit $exec - ; GCN-NEXT: %21:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, killed %20, 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: $vgpr0 = COPY %21 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GCN-NEXT: $vcc = COPY %5 + ; GCN-NEXT: %18:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, killed %17, 0, %13, 0, %16, 0, 0, implicit $mode, implicit $vcc, implicit $exec + ; GCN-NEXT: %19:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, killed %18, 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY %19 + ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %fdiv = fdiv float %a, %b ret float %fdiv @@ -41,32 +38,29 @@ entry: define float @fdiv_nnan_f32(float %a, float %b) #0 { ; GCN-LABEL: name: fdiv_nnan_f32 ; GCN: bb.0.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY2]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %8:vgpr_32, %9:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %10:vgpr_32 = nnan nofpexcept V_RCP_F32_e64 0, %8, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: %4:vgpr_32, %5:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %8:vgpr_32 = nnan nofpexcept V_RCP_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216 ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_]], 2305, implicit-def $mode, implicit $mode - ; GCN-NEXT: %14:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %8, 0, %10, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %15:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %14, 0, %10, 0, %10, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %16:vgpr_32 = nnan nofpexcept V_MUL_F32_e64 0, %6, 0, %15, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %17:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %8, 0, %16, 0, %6, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %18:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %17, 0, %15, 0, %16, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %19:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %8, 0, %18, 0, %6, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %12:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %6, 0, %8, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %13:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %12, 0, %8, 0, %8, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %14:vgpr_32 = nnan nofpexcept V_MUL_F32_e64 0, %4, 0, %13, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %15:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %6, 0, %14, 0, %4, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %16:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %15, 0, %13, 0, %14, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %17:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %6, 0, %16, 0, %4, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_2]], 2305, implicit-def dead $mode, implicit $mode - ; GCN-NEXT: $vcc = COPY %7 - ; GCN-NEXT: %20:vgpr_32 = nnan nofpexcept V_DIV_FMAS_F32_e64 0, killed %19, 0, %15, 0, %18, 0, 0, implicit $mode, implicit $vcc, implicit $exec - ; GCN-NEXT: %21:vgpr_32 = nnan nofpexcept V_DIV_FIXUP_F32_e64 0, killed %20, 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; GCN-NEXT: $vgpr0 = COPY %21 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] - ; GCN-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 + ; GCN-NEXT: $vcc = COPY %5 + ; GCN-NEXT: %18:vgpr_32 = nnan nofpexcept V_DIV_FMAS_F32_e64 0, killed %17, 0, %13, 0, %16, 0, 0, implicit $mode, implicit $vcc, implicit $exec + ; GCN-NEXT: %19:vgpr_32 = nnan nofpexcept V_DIV_FIXUP_F32_e64 0, killed %18, 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: $vgpr0 = COPY %19 + ; GCN-NEXT: SI_RETURN implicit $vgpr0 entry: %fdiv = fdiv nnan float %a, %b ret float %fdiv diff --git a/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll b/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll index 408e00f1f37d..47287f67b01e 100644 --- a/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll +++ b/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll @@ -10,17 +10,17 @@ define i32 @fp_save_restore_in_temp_sgpr(%struct.Data addrspace(5)* nocapture readonly byval(%struct.Data) align 4 %arg) #0 { ; GCN-LABEL: name: fp_save_restore_in_temp_sgpr ; GCN: bb.0.begin: - ; GCN: liveins: $sgpr11, $sgpr30_sgpr31 + ; GCN: liveins: $sgpr11 ; GCN: $sgpr11 = frame-setup COPY $sgpr33 ; GCN: $sgpr33 = frame-setup COPY $sgpr32 ; GCN: bb.1.lp_end: - ; GCN: liveins: $sgpr10, $sgpr11, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr30_sgpr31 + ; GCN: liveins: $sgpr10, $sgpr11, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN: bb.2.lp_begin: - ; GCN: liveins: $sgpr10, $sgpr11, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr30_sgpr31 + ; GCN: liveins: $sgpr10, $sgpr11, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7 ; GCN: bb.3.Flow: - ; GCN: liveins: $sgpr10, $sgpr11, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr30_sgpr31 + ; GCN: liveins: $sgpr10, $sgpr11, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 ; GCN: bb.4.end: - ; GCN: liveins: $sgpr11, $vgpr0, $sgpr4_sgpr5, $sgpr30_sgpr31 + ; GCN: liveins: $sgpr11, $vgpr0, $sgpr4_sgpr5 ; GCN: $sgpr33 = frame-destroy COPY $sgpr11 begin: br label %lp_begin diff --git a/llvm/test/CodeGen/AMDGPU/fpow.ll b/llvm/test/CodeGen/AMDGPU/fpow.ll index ea461cab629d..bf783cba5ace 100644 --- a/llvm/test/CodeGen/AMDGPU/fpow.ll +++ b/llvm/test/CodeGen/AMDGPU/fpow.ll @@ -202,9 +202,9 @@ define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX8-NEXT: v_exp_f32_e32 v2, v2 +; GFX8-NEXT: v_exp_f32_e32 v1, v2 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -220,9 +220,9 @@ define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX9-NEXT: v_exp_f32_e32 v2, v2 +; GFX9-NEXT: v_exp_f32_e32 v1, v2 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -238,9 +238,9 @@ define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { ; GFX90A-NEXT: v_log_f32_e32 v0, v0 ; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 -; GFX90A-NEXT: v_exp_f32_e32 v2, v2 +; GFX90A-NEXT: v_exp_f32_e32 v1, v2 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0 -; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX90A-NEXT: s_setpc_b64 s[30:31] @@ -302,9 +302,9 @@ define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) { ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX8-NEXT: v_exp_f32_e32 v2, v2 +; GFX8-NEXT: v_exp_f32_e32 v1, v2 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -320,9 +320,9 @@ define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) { ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX9-NEXT: v_exp_f32_e32 v2, v2 +; GFX9-NEXT: v_exp_f32_e32 v1, v2 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -338,9 +338,9 @@ define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) { ; GFX90A-NEXT: v_log_f32_e32 v0, v0 ; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 -; GFX90A-NEXT: v_exp_f32_e32 v2, v2 +; GFX90A-NEXT: v_exp_f32_e32 v1, v2 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0 -; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX90A-NEXT: s_setpc_b64 s[30:31] @@ -403,9 +403,9 @@ define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) { ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX8-NEXT: v_exp_f32_e32 v2, v2 +; GFX8-NEXT: v_exp_f32_e32 v1, v2 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -421,9 +421,9 @@ define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) { ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX9-NEXT: v_exp_f32_e32 v2, v2 +; GFX9-NEXT: v_exp_f32_e32 v1, v2 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -439,9 +439,9 @@ define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) { ; GFX90A-NEXT: v_log_f32_e32 v0, v0 ; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 -; GFX90A-NEXT: v_exp_f32_e32 v2, v2 +; GFX90A-NEXT: v_exp_f32_e32 v1, v2 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0 -; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX90A-NEXT: s_setpc_b64 s[30:31] @@ -509,9 +509,9 @@ define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) { ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX8-NEXT: v_exp_f32_e32 v2, v2 +; GFX8-NEXT: v_exp_f32_e32 v1, v2 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -527,9 +527,9 @@ define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) { ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX9-NEXT: v_exp_f32_e32 v2, v2 +; GFX9-NEXT: v_exp_f32_e32 v1, v2 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -545,9 +545,9 @@ define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) { ; GFX90A-NEXT: v_log_f32_e32 v0, v0 ; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 -; GFX90A-NEXT: v_exp_f32_e32 v2, v2 +; GFX90A-NEXT: v_exp_f32_e32 v1, v2 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0 -; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX90A-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll index e7121cec90c8..450cae8a4a38 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll @@ -25,15 +25,15 @@ define void @callee_with_stack_and_call() #0 { ; SPILL-TO-VGPR-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; SPILL-TO-VGPR-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 ; SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5] -; SPILL-TO-VGPR-NEXT: v_readlane_b32 s4, v40, 0 -; SPILL-TO-VGPR-NEXT: v_readlane_b32 s5, v40, 1 +; SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v40, 1 +; SPILL-TO-VGPR-NEXT: v_readlane_b32 s30, v40, 0 ; SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0xfc00 ; SPILL-TO-VGPR-NEXT: v_readlane_b32 s33, v40, 2 -; SPILL-TO-VGPR-NEXT: s_or_saveexec_b64 s[6:7], -1 +; SPILL-TO-VGPR-NEXT: s_or_saveexec_b64 s[4:5], -1 ; SPILL-TO-VGPR-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[6:7] +; SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] ; SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; SPILL-TO-VGPR-NEXT: s_setpc_b64 s[4:5] +; SPILL-TO-VGPR-NEXT: s_setpc_b64 s[30:31] ; ; NO-SPILL-TO-VGPR-LABEL: callee_with_stack_and_call: ; NO-SPILL-TO-VGPR: ; %bb.0: @@ -43,14 +43,21 @@ define void @callee_with_stack_and_call() #0 { ; NO-SPILL-TO-VGPR-NEXT: s_mov_b32 s33, s32 ; NO-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0x800 ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[8:9], exec -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 3 +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v1, s30, 0 -; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v1, s31, 1 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[8:9] +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[8:9], exec +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 +; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 +; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v1, s31, 0 +; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill +; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 +; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[8:9] ; NO-SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, 0 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) @@ -58,21 +65,29 @@ define void @callee_with_stack_and_call() #0 { ; NO-SPILL-TO-VGPR-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; NO-SPILL-TO-VGPR-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 ; NO-SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5] -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[6:7], exec -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 3 +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 +; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 +; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload +; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) +; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v1, 0 +; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 +; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s4, v1, 0 -; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s5, v1, 1 +; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s30, v1, 0 ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[6:7] +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] ; NO-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0xf800 ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) ; NO-SPILL-TO-VGPR-NEXT: v_readfirstlane_b32 s33, v0 -; NO-SPILL-TO-VGPR-NEXT: s_setpc_b64 s[4:5] +; NO-SPILL-TO-VGPR-NEXT: s_setpc_b64 s[30:31] %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca call void @external_void_func_void() diff --git a/llvm/test/CodeGen/AMDGPU/fshr.ll b/llvm/test/CodeGen/AMDGPU/fshr.ll index 5214a02b9f74..940614151e2d 100644 --- a/llvm/test/CodeGen/AMDGPU/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/fshr.ll @@ -949,24 +949,24 @@ define <4 x i16> @v_fshr_v4i16(<4 x i16> %src0, <4 x i16> %src1, <4 x i16> %src2 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v5 ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v3 ; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v4 -; GFX10-NEXT: v_lshlrev_b16 v1, 1, v1 +; GFX10-NEXT: v_lshrrev_b32_e32 v12, 16, v0 ; GFX10-NEXT: v_lshlrev_b16 v6, 1, v6 ; GFX10-NEXT: v_xor_b32_e32 v9, -1, v7 ; GFX10-NEXT: v_lshrrev_b16 v7, v7, v8 -; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v0 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 -; GFX10-NEXT: v_xor_b32_e32 v12, -1, v5 +; GFX10-NEXT: v_xor_b32_e32 v8, -1, v4 +; GFX10-NEXT: v_lshlrev_b16 v1, 1, v1 ; GFX10-NEXT: v_lshlrev_b16 v6, v9, v6 -; GFX10-NEXT: v_xor_b32_e32 v9, -1, v4 +; GFX10-NEXT: v_xor_b32_e32 v9, -1, v5 ; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v2 -; GFX10-NEXT: v_lshlrev_b16 v8, 1, v8 +; GFX10-NEXT: v_lshlrev_b16 v12, 1, v12 ; GFX10-NEXT: v_xor_b32_e32 v13, -1, v11 +; GFX10-NEXT: v_lshlrev_b16 v0, v8, v0 ; GFX10-NEXT: v_lshrrev_b16 v2, v4, v2 -; GFX10-NEXT: v_lshlrev_b16 v0, v9, v0 -; GFX10-NEXT: v_lshlrev_b16 v1, v12, v1 +; GFX10-NEXT: v_lshlrev_b16 v1, v9, v1 ; GFX10-NEXT: v_lshrrev_b16 v3, v5, v3 ; GFX10-NEXT: v_lshrrev_b16 v4, v11, v10 -; GFX10-NEXT: v_lshlrev_b16 v5, v13, v8 +; GFX10-NEXT: v_lshlrev_b16 v5, v13, v12 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff ; GFX10-NEXT: v_or_b32_e32 v1, v1, v3 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll index 989e8bfe7f56..44450ffe44ac 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll @@ -105,13 +105,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1@rel32@hi+12 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -132,15 +132,15 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -169,8 +169,8 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -197,14 +197,14 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1_signext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1_signext@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1_signext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1_signext@rel32@hi+12 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -226,16 +226,16 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1_signext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1_signext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1_signext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1_signext@rel32@hi+12 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -266,8 +266,8 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -295,14 +295,14 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1_zeroext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1_zeroext@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1_zeroext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1_zeroext@rel32@hi+12 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -324,16 +324,16 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1_zeroext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1_zeroext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1_zeroext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1_zeroext@rel32@hi+12 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -364,8 +364,8 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -392,12 +392,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -418,14 +418,14 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -453,8 +453,8 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -481,12 +481,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8_signext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8_signext@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8_signext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8_signext@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -508,14 +508,14 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8_signext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8_signext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8_signext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8_signext@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -544,8 +544,8 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -573,12 +573,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8_zeroext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8_zeroext@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8_zeroext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8_zeroext@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -600,14 +600,14 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8_zeroext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8_zeroext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8_zeroext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8_zeroext@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -636,8 +636,8 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -664,12 +664,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -690,14 +690,14 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -725,8 +725,8 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -753,12 +753,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16_signext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16_signext@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16_signext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16_signext@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -780,14 +780,14 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16_signext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16_signext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16_signext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16_signext@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -816,8 +816,8 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -845,12 +845,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16_zeroext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16_zeroext@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16_zeroext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16_zeroext@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -872,14 +872,14 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16_zeroext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16_zeroext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16_zeroext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16_zeroext@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -908,8 +908,8 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -936,12 +936,12 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -962,14 +962,14 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -997,8 +997,8 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1025,12 +1025,12 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1053,13 +1053,13 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1088,8 +1088,8 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1117,12 +1117,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1144,15 +1144,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1182,8 +1182,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1213,12 +1213,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v2, 3 ; GFX9-NEXT: v_mov_b32_e32 v3, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1243,13 +1243,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1280,8 +1280,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1311,12 +1311,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX9-NEXT: v_mov_b32_e32 v4, 1 ; GFX9-NEXT: v_mov_b32_e32 v5, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1342,13 +1342,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1380,8 +1380,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1416,12 +1416,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX9-NEXT: v_mov_b32_e32 v6, 3 ; GFX9-NEXT: v_mov_b32_e32 v7, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1449,13 +1449,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX10-NEXT: v_mov_b32_e32 v7, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1489,8 +1489,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1518,12 +1518,12 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1544,14 +1544,14 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1579,8 +1579,8 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1606,12 +1606,12 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1632,14 +1632,14 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1667,8 +1667,8 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1695,12 +1695,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1723,13 +1723,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1758,8 +1758,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1787,12 +1787,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1816,13 +1816,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1852,8 +1852,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1883,12 +1883,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v3, -1.0 ; GFX9-NEXT: v_mov_b32_e32 v4, 0.5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1914,13 +1914,13 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v4, 0.5 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1952,8 +1952,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5f32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1980,12 +1980,12 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2008,13 +2008,13 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2043,8 +2043,8 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2073,12 +2073,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2103,13 +2103,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2140,8 +2140,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2172,12 +2172,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x40200000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2205,12 +2205,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2243,8 +2243,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f64@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2270,12 +2270,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2296,14 +2296,14 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2331,8 +2331,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2359,12 +2359,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2385,14 +2385,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2420,8 +2420,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2448,12 +2448,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2474,14 +2474,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2509,8 +2509,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2538,12 +2538,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX9-NEXT: v_mov_b32_e32 v1, 3 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2566,13 +2566,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2601,8 +2601,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2629,12 +2629,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x4400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2657,13 +2657,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2692,8 +2692,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2719,12 +2719,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2745,14 +2745,14 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2780,8 +2780,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2809,12 +2809,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40003 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2837,13 +2837,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2872,8 +2872,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2899,12 +2899,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2925,14 +2925,14 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2960,8 +2960,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2988,12 +2988,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3014,14 +3014,14 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3049,8 +3049,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3078,12 +3078,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3106,13 +3106,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3141,8 +3141,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3170,12 +3170,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX9-NEXT: v_mov_b32_e32 v1, 4 ; GFX9-NEXT: v_mov_b32_e32 v2, 5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3199,13 +3199,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3235,8 +3235,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3265,12 +3265,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX9-NEXT: v_mov_b32_e32 v2, 5 ; GFX9-NEXT: v_mov_b32_e32 v3, 6 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3295,13 +3295,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3332,8 +3332,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3359,12 +3359,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3385,14 +3385,14 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3420,8 +3420,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3451,12 +3451,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v2, 3 ; GFX9-NEXT: v_mov_b32_e32 v3, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3481,13 +3481,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3518,8 +3518,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3549,12 +3549,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v3, 4 ; GFX9-NEXT: v_mov_b32_e32 v4, 5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3580,13 +3580,13 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v4, 5 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3618,8 +3618,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3639,22 +3639,22 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v8, 0 +; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v8, s[30:31] -; GFX9-NEXT: global_load_dwordx4 v[4:7], v8, s[30:31] offset:16 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v8, s[34:35] +; GFX9-NEXT: global_load_dwordx4 v[4:7], v8, s[34:35] offset:16 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3671,24 +3671,23 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v8, 0 +; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v8, s[30:31] -; GFX10-NEXT: global_load_dwordx4 v[4:7], v8, s[30:31] offset:16 -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v8, s[34:35] +; GFX10-NEXT: global_load_dwordx4 v[4:7], v8, s[34:35] offset:16 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3721,8 +3720,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3757,12 +3756,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v6, 7 ; GFX9-NEXT: v_mov_b32_e32 v7, 8 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3792,12 +3791,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3832,8 +3831,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3853,24 +3852,24 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v16, 0 +; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v16, s[30:31] -; GFX9-NEXT: global_load_dwordx4 v[4:7], v16, s[30:31] offset:16 -; GFX9-NEXT: global_load_dwordx4 v[8:11], v16, s[30:31] offset:32 -; GFX9-NEXT: global_load_dwordx4 v[12:15], v16, s[30:31] offset:48 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v16i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v16i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v16, s[34:35] +; GFX9-NEXT: global_load_dwordx4 v[4:7], v16, s[34:35] offset:16 +; GFX9-NEXT: global_load_dwordx4 v[8:11], v16, s[34:35] offset:32 +; GFX9-NEXT: global_load_dwordx4 v[12:15], v16, s[34:35] offset:48 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v16i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3887,26 +3886,25 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v16, 0 +; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x3 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v16, s[30:31] -; GFX10-NEXT: global_load_dwordx4 v[4:7], v16, s[30:31] offset:16 -; GFX10-NEXT: global_load_dwordx4 v[8:11], v16, s[30:31] offset:32 -; GFX10-NEXT: global_load_dwordx4 v[12:15], v16, s[30:31] offset:48 -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v16i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v16i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v16, s[34:35] +; GFX10-NEXT: global_load_dwordx4 v[4:7], v16, s[34:35] offset:16 +; GFX10-NEXT: global_load_dwordx4 v[8:11], v16, s[34:35] offset:32 +; GFX10-NEXT: global_load_dwordx4 v[12:15], v16, s[34:35] offset:48 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v16i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32@rel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3941,8 +3939,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3964,29 +3962,29 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v28, 0 +; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[30:31] -; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[30:31] offset:16 -; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[30:31] offset:32 -; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[30:31] offset:48 -; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[30:31] offset:64 -; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[30:31] offset:80 -; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[30:31] offset:96 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[34:35] +; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[34:35] offset:16 +; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[34:35] offset:32 +; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[34:35] offset:48 +; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[34:35] offset:64 +; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[34:35] offset:80 +; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[34:35] offset:96 ; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[30:31] offset:112 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[34:35] offset:112 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -4003,30 +4001,29 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v32, 0 +; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x7 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[30:31] -; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[30:31] offset:16 -; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[30:31] offset:32 -; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[30:31] offset:48 -; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[30:31] offset:64 -; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[30:31] offset:80 -; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[30:31] offset:96 -; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[30:31] offset:112 -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[34:35] +; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[34:35] offset:16 +; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[34:35] offset:32 +; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[34:35] offset:48 +; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[34:35] offset:64 +; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[34:35] offset:80 +; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[34:35] offset:96 +; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[34:35] offset:112 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32@rel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -4065,8 +4062,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -4088,32 +4085,32 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v28, 0 ; GFX9-NEXT: global_load_dword v32, v[0:1], off +; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[30:31] -; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[30:31] offset:16 -; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[30:31] offset:32 -; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[30:31] offset:48 -; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[30:31] offset:64 -; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[30:31] offset:80 -; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[30:31] offset:96 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[34:35] +; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[34:35] offset:16 +; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[34:35] offset:32 +; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[34:35] offset:48 +; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[34:35] offset:64 +; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[34:35] offset:80 +; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[34:35] offset:96 ; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[30:31] offset:112 +; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[34:35] offset:112 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32@rel32@hi+12 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(8) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -4130,33 +4127,32 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v32, 0 +; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: global_load_dword v33, v[0:1], off ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x7 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[30:31] -; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[30:31] offset:16 -; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[30:31] offset:32 -; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[30:31] offset:48 -; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[30:31] offset:64 -; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[30:31] offset:80 -; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[30:31] offset:96 -; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[30:31] offset:112 -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32@rel32@hi+12 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[34:35] +; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[34:35] offset:16 +; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[34:35] offset:32 +; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[34:35] offset:48 +; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[34:35] offset:64 +; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[34:35] offset:80 +; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[34:35] offset:96 +; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[34:35] offset:112 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32@rel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: s_waitcnt vmcnt(8) ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -4198,8 +4194,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(8) ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v33, s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -4232,16 +4228,16 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %o ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v42, v1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_i32_func_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_i32_func_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_i32_func_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_i32_func_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: global_store_dword v[41:42], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -4267,18 +4263,18 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %o ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_mov_b32_e32 v42, v1 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_i32_func_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_i32_func_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_i32_func_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_i32_func_i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: global_store_dword v[41:42], v0, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -4315,8 +4311,8 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %o ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:4 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -4337,22 +4333,22 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_ubyte v0, v2, s[30:31] -; GFX9-NEXT: global_load_dword v1, v2, s[30:31] offset:4 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_struct_i8_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: global_load_ubyte v0, v2, s[34:35] +; GFX9-NEXT: global_load_dword v1, v2, s[34:35] offset:4 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_struct_i8_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_struct_i8_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -4369,24 +4365,23 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: global_load_ubyte v0, v2, s[30:31] -; GFX10-NEXT: global_load_dword v1, v2, s[30:31] offset:4 -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_struct_i8_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_struct_i8_i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: global_load_ubyte v0, v2, s[34:35] +; GFX10-NEXT: global_load_dword v1, v2, s[34:35] offset:4 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_struct_i8_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_struct_i8_i32@rel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -4419,8 +4414,8 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_struct_i8_i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -4452,12 +4447,12 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_byval_struct_i8_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_byval_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_byval_struct_i8_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_byval_struct_i8_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -4483,13 +4478,13 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_byval_struct_i8_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_byval_struct_i8_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_byval_struct_i8_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_byval_struct_i8_i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -4521,8 +4516,8 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_byval_struct_i8_i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -4559,14 +4554,14 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX9-NEXT: v_add_u32_e32 v0, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e64 v1, 6, s33 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xf800 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -4598,17 +4593,17 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_lshrrev_b32_e64 v1, 5, s33 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 8, v0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfc00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -4650,8 +4645,8 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: scratch_load_ubyte v0, off, s33 offset:8 ; GFX10-SCRATCH-NEXT: scratch_load_dword v1, off, s33 offset:12 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) @@ -4689,18 +4684,18 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[30:31] -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v16i8@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v16i8@rel32@hi+12 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[34:35] +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v16i8@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v16i8@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v0 @@ -4720,9 +4715,9 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX9-NEXT: v_mov_b32_e32 v1, v16 ; GFX9-NEXT: v_mov_b32_e32 v2, v17 ; GFX9-NEXT: v_mov_b32_e32 v3, v18 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -4739,19 +4734,19 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[30:31] +; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[34:35] ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v16i8@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v16i8@rel32@hi+12 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v16i8@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v16i8@rel32@hi+12 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v16, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v17, 16, v0 @@ -4771,9 +4766,9 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10-NEXT: v_mov_b32_e32 v1, v16 ; GFX10-NEXT: v_mov_b32_e32 v2, v17 ; GFX10-NEXT: v_mov_b32_e32 v3, v18 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -4824,8 +4819,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, v17 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, v18 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -4847,88 +4842,91 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 30 +; GFX9-NEXT: v_writelane_b32 v40, s33, 32 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:20 ; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:16 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s33 -; GFX9-NEXT: v_writelane_b32 v40, s34, 0 -; GFX9-NEXT: v_writelane_b32 v40, s35, 1 -; GFX9-NEXT: v_writelane_b32 v40, s36, 2 -; GFX9-NEXT: v_writelane_b32 v40, s37, 3 -; GFX9-NEXT: v_writelane_b32 v40, s38, 4 -; GFX9-NEXT: v_writelane_b32 v40, s39, 5 -; GFX9-NEXT: v_writelane_b32 v40, s40, 6 -; GFX9-NEXT: v_writelane_b32 v40, s41, 7 -; GFX9-NEXT: v_writelane_b32 v40, s42, 8 -; GFX9-NEXT: v_writelane_b32 v40, s43, 9 -; GFX9-NEXT: v_writelane_b32 v40, s44, 10 -; GFX9-NEXT: v_writelane_b32 v40, s45, 11 -; GFX9-NEXT: v_writelane_b32 v40, s46, 12 -; GFX9-NEXT: v_writelane_b32 v40, s47, 13 -; GFX9-NEXT: v_writelane_b32 v40, s48, 14 -; GFX9-NEXT: v_writelane_b32 v40, s49, 15 -; GFX9-NEXT: v_writelane_b32 v40, s50, 16 -; GFX9-NEXT: v_writelane_b32 v40, s51, 17 -; GFX9-NEXT: v_writelane_b32 v40, s52, 18 -; GFX9-NEXT: v_writelane_b32 v40, s53, 19 -; GFX9-NEXT: v_writelane_b32 v40, s54, 20 -; GFX9-NEXT: v_writelane_b32 v40, s55, 21 -; GFX9-NEXT: v_writelane_b32 v40, s56, 22 -; GFX9-NEXT: v_writelane_b32 v40, s57, 23 -; GFX9-NEXT: v_writelane_b32 v40, s58, 24 -; GFX9-NEXT: v_writelane_b32 v40, s59, 25 -; GFX9-NEXT: v_writelane_b32 v40, s60, 26 -; GFX9-NEXT: v_writelane_b32 v40, s61, 27 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s35, 3 +; GFX9-NEXT: v_writelane_b32 v40, s36, 4 +; GFX9-NEXT: v_writelane_b32 v40, s37, 5 +; GFX9-NEXT: v_writelane_b32 v40, s38, 6 +; GFX9-NEXT: v_writelane_b32 v40, s39, 7 +; GFX9-NEXT: v_writelane_b32 v40, s40, 8 +; GFX9-NEXT: v_writelane_b32 v40, s41, 9 +; GFX9-NEXT: v_writelane_b32 v40, s42, 10 +; GFX9-NEXT: v_writelane_b32 v40, s43, 11 +; GFX9-NEXT: v_writelane_b32 v40, s44, 12 +; GFX9-NEXT: v_writelane_b32 v40, s45, 13 +; GFX9-NEXT: v_writelane_b32 v40, s46, 14 +; GFX9-NEXT: v_writelane_b32 v40, s47, 15 +; GFX9-NEXT: v_writelane_b32 v40, s48, 16 +; GFX9-NEXT: v_writelane_b32 v40, s49, 17 +; GFX9-NEXT: v_writelane_b32 v40, s50, 18 +; GFX9-NEXT: v_writelane_b32 v40, s51, 19 +; GFX9-NEXT: v_writelane_b32 v40, s52, 20 +; GFX9-NEXT: v_writelane_b32 v40, s53, 21 +; GFX9-NEXT: v_writelane_b32 v40, s54, 22 +; GFX9-NEXT: v_writelane_b32 v40, s55, 23 +; GFX9-NEXT: v_writelane_b32 v40, s56, 24 +; GFX9-NEXT: v_writelane_b32 v40, s57, 25 +; GFX9-NEXT: v_writelane_b32 v40, s58, 26 +; GFX9-NEXT: v_writelane_b32 v40, s59, 27 +; GFX9-NEXT: v_writelane_b32 v40, s60, 28 +; GFX9-NEXT: v_writelane_b32 v40, s61, 29 ; GFX9-NEXT: s_addk_i32 s32, 0x800 -; GFX9-NEXT: v_writelane_b32 v40, s62, 28 -; GFX9-NEXT: v_writelane_b32 v40, s63, 29 -; GFX9-NEXT: s_mov_b64 s[4:5], s[30:31] -; GFX9-NEXT: s_getpc_b64 s[6:7] -; GFX9-NEXT: s_add_u32 s6, s6, byval_align16_f64_arg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s7, s7, byval_align16_f64_arg@rel32@hi+12 +; GFX9-NEXT: v_writelane_b32 v40, s62, 30 +; GFX9-NEXT: v_writelane_b32 v40, s63, 31 +; GFX9-NEXT: s_getpc_b64 s[4:5] +; GFX9-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(2) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(2) ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX9-NEXT: v_readlane_b32 s63, v40, 29 -; GFX9-NEXT: v_readlane_b32 s62, v40, 28 -; GFX9-NEXT: v_readlane_b32 s61, v40, 27 -; GFX9-NEXT: v_readlane_b32 s60, v40, 26 -; GFX9-NEXT: v_readlane_b32 s59, v40, 25 -; GFX9-NEXT: v_readlane_b32 s58, v40, 24 -; GFX9-NEXT: v_readlane_b32 s57, v40, 23 -; GFX9-NEXT: v_readlane_b32 s56, v40, 22 -; GFX9-NEXT: v_readlane_b32 s55, v40, 21 -; GFX9-NEXT: v_readlane_b32 s54, v40, 20 -; GFX9-NEXT: v_readlane_b32 s53, v40, 19 -; GFX9-NEXT: v_readlane_b32 s52, v40, 18 -; GFX9-NEXT: v_readlane_b32 s51, v40, 17 -; GFX9-NEXT: v_readlane_b32 s50, v40, 16 -; GFX9-NEXT: v_readlane_b32 s49, v40, 15 -; GFX9-NEXT: v_readlane_b32 s48, v40, 14 -; GFX9-NEXT: v_readlane_b32 s47, v40, 13 -; GFX9-NEXT: v_readlane_b32 s46, v40, 12 -; GFX9-NEXT: v_readlane_b32 s45, v40, 11 -; GFX9-NEXT: v_readlane_b32 s44, v40, 10 -; GFX9-NEXT: v_readlane_b32 s43, v40, 9 -; GFX9-NEXT: v_readlane_b32 s42, v40, 8 -; GFX9-NEXT: v_readlane_b32 s41, v40, 7 -; GFX9-NEXT: v_readlane_b32 s40, v40, 6 -; GFX9-NEXT: v_readlane_b32 s39, v40, 5 -; GFX9-NEXT: v_readlane_b32 s38, v40, 4 -; GFX9-NEXT: v_readlane_b32 s37, v40, 3 -; GFX9-NEXT: v_readlane_b32 s36, v40, 2 -; GFX9-NEXT: v_readlane_b32 s35, v40, 1 -; GFX9-NEXT: v_readlane_b32 s34, v40, 0 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX9-NEXT: v_readlane_b32 s63, v40, 31 +; GFX9-NEXT: v_readlane_b32 s62, v40, 30 +; GFX9-NEXT: v_readlane_b32 s61, v40, 29 +; GFX9-NEXT: v_readlane_b32 s60, v40, 28 +; GFX9-NEXT: v_readlane_b32 s59, v40, 27 +; GFX9-NEXT: v_readlane_b32 s58, v40, 26 +; GFX9-NEXT: v_readlane_b32 s57, v40, 25 +; GFX9-NEXT: v_readlane_b32 s56, v40, 24 +; GFX9-NEXT: v_readlane_b32 s55, v40, 23 +; GFX9-NEXT: v_readlane_b32 s54, v40, 22 +; GFX9-NEXT: v_readlane_b32 s53, v40, 21 +; GFX9-NEXT: v_readlane_b32 s52, v40, 20 +; GFX9-NEXT: v_readlane_b32 s51, v40, 19 +; GFX9-NEXT: v_readlane_b32 s50, v40, 18 +; GFX9-NEXT: v_readlane_b32 s49, v40, 17 +; GFX9-NEXT: v_readlane_b32 s48, v40, 16 +; GFX9-NEXT: v_readlane_b32 s47, v40, 15 +; GFX9-NEXT: v_readlane_b32 s46, v40, 14 +; GFX9-NEXT: v_readlane_b32 s45, v40, 13 +; GFX9-NEXT: v_readlane_b32 s44, v40, 12 +; GFX9-NEXT: v_readlane_b32 s43, v40, 11 +; GFX9-NEXT: v_readlane_b32 s42, v40, 10 +; GFX9-NEXT: v_readlane_b32 s41, v40, 9 +; GFX9-NEXT: v_readlane_b32 s40, v40, 8 +; GFX9-NEXT: v_readlane_b32 s39, v40, 7 +; GFX9-NEXT: v_readlane_b32 s38, v40, 6 +; GFX9-NEXT: v_readlane_b32 s37, v40, 5 +; GFX9-NEXT: v_readlane_b32 s36, v40, 4 +; GFX9-NEXT: v_readlane_b32 s35, v40, 3 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xf800 -; GFX9-NEXT: v_readlane_b32 s33, v40, 30 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 32 +; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: tail_call_byval_align16: ; GFX10: ; %bb.0: ; %entry @@ -4938,90 +4936,93 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: v_writelane_b32 v40, s33, 30 +; GFX10-NEXT: s_mov_b32 s6, s33 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_clause 0x2 ; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:20 ; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:16 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s33 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x400 -; GFX10-NEXT: v_writelane_b32 v40, s34, 0 -; GFX10-NEXT: s_mov_b64 s[4:5], s[30:31] -; GFX10-NEXT: s_getpc_b64 s[6:7] -; GFX10-NEXT: s_add_u32 s6, s6, byval_align16_f64_arg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s7, s7, byval_align16_f64_arg@rel32@hi+12 +; GFX10-NEXT: s_getpc_b64 s[4:5] +; GFX10-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 ; GFX10-NEXT: s_waitcnt vmcnt(2) ; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 -; GFX10-NEXT: v_writelane_b32 v40, s35, 1 -; GFX10-NEXT: v_writelane_b32 v40, s36, 2 -; GFX10-NEXT: v_writelane_b32 v40, s37, 3 -; GFX10-NEXT: v_writelane_b32 v40, s38, 4 -; GFX10-NEXT: v_writelane_b32 v40, s39, 5 -; GFX10-NEXT: v_writelane_b32 v40, s40, 6 -; GFX10-NEXT: v_writelane_b32 v40, s41, 7 -; GFX10-NEXT: v_writelane_b32 v40, s42, 8 -; GFX10-NEXT: v_writelane_b32 v40, s43, 9 -; GFX10-NEXT: v_writelane_b32 v40, s44, 10 -; GFX10-NEXT: v_writelane_b32 v40, s45, 11 -; GFX10-NEXT: v_writelane_b32 v40, s46, 12 -; GFX10-NEXT: v_writelane_b32 v40, s47, 13 -; GFX10-NEXT: v_writelane_b32 v40, s48, 14 -; GFX10-NEXT: v_writelane_b32 v40, s49, 15 -; GFX10-NEXT: v_writelane_b32 v40, s50, 16 -; GFX10-NEXT: v_writelane_b32 v40, s51, 17 -; GFX10-NEXT: v_writelane_b32 v40, s52, 18 -; GFX10-NEXT: v_writelane_b32 v40, s53, 19 -; GFX10-NEXT: v_writelane_b32 v40, s54, 20 -; GFX10-NEXT: v_writelane_b32 v40, s55, 21 -; GFX10-NEXT: v_writelane_b32 v40, s56, 22 -; GFX10-NEXT: v_writelane_b32 v40, s57, 23 -; GFX10-NEXT: v_writelane_b32 v40, s58, 24 -; GFX10-NEXT: v_writelane_b32 v40, s59, 25 -; GFX10-NEXT: v_writelane_b32 v40, s60, 26 -; GFX10-NEXT: v_writelane_b32 v40, s61, 27 -; GFX10-NEXT: v_writelane_b32 v40, s62, 28 -; GFX10-NEXT: v_writelane_b32 v40, s63, 29 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] -; GFX10-NEXT: v_readlane_b32 s63, v40, 29 -; GFX10-NEXT: v_readlane_b32 s62, v40, 28 -; GFX10-NEXT: v_readlane_b32 s61, v40, 27 -; GFX10-NEXT: v_readlane_b32 s60, v40, 26 -; GFX10-NEXT: v_readlane_b32 s59, v40, 25 -; GFX10-NEXT: v_readlane_b32 s58, v40, 24 -; GFX10-NEXT: v_readlane_b32 s57, v40, 23 -; GFX10-NEXT: v_readlane_b32 s56, v40, 22 -; GFX10-NEXT: v_readlane_b32 s55, v40, 21 -; GFX10-NEXT: v_readlane_b32 s54, v40, 20 -; GFX10-NEXT: v_readlane_b32 s53, v40, 19 -; GFX10-NEXT: v_readlane_b32 s52, v40, 18 -; GFX10-NEXT: v_readlane_b32 s51, v40, 17 -; GFX10-NEXT: v_readlane_b32 s50, v40, 16 -; GFX10-NEXT: v_readlane_b32 s49, v40, 15 -; GFX10-NEXT: v_readlane_b32 s48, v40, 14 -; GFX10-NEXT: v_readlane_b32 s47, v40, 13 -; GFX10-NEXT: v_readlane_b32 s46, v40, 12 -; GFX10-NEXT: v_readlane_b32 s45, v40, 11 -; GFX10-NEXT: v_readlane_b32 s44, v40, 10 -; GFX10-NEXT: v_readlane_b32 s43, v40, 9 -; GFX10-NEXT: v_readlane_b32 s42, v40, 8 -; GFX10-NEXT: v_readlane_b32 s41, v40, 7 -; GFX10-NEXT: v_readlane_b32 s40, v40, 6 -; GFX10-NEXT: v_readlane_b32 s39, v40, 5 -; GFX10-NEXT: v_readlane_b32 s38, v40, 4 -; GFX10-NEXT: v_readlane_b32 s37, v40, 3 -; GFX10-NEXT: v_readlane_b32 s36, v40, 2 -; GFX10-NEXT: v_readlane_b32 s35, v40, 1 -; GFX10-NEXT: v_readlane_b32 s34, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-NEXT: v_writelane_b32 v40, s35, 3 +; GFX10-NEXT: v_writelane_b32 v40, s36, 4 +; GFX10-NEXT: v_writelane_b32 v40, s37, 5 +; GFX10-NEXT: v_writelane_b32 v40, s38, 6 +; GFX10-NEXT: v_writelane_b32 v40, s39, 7 +; GFX10-NEXT: v_writelane_b32 v40, s40, 8 +; GFX10-NEXT: v_writelane_b32 v40, s41, 9 +; GFX10-NEXT: v_writelane_b32 v40, s42, 10 +; GFX10-NEXT: v_writelane_b32 v40, s43, 11 +; GFX10-NEXT: v_writelane_b32 v40, s44, 12 +; GFX10-NEXT: v_writelane_b32 v40, s45, 13 +; GFX10-NEXT: v_writelane_b32 v40, s46, 14 +; GFX10-NEXT: v_writelane_b32 v40, s47, 15 +; GFX10-NEXT: v_writelane_b32 v40, s48, 16 +; GFX10-NEXT: v_writelane_b32 v40, s49, 17 +; GFX10-NEXT: v_writelane_b32 v40, s50, 18 +; GFX10-NEXT: v_writelane_b32 v40, s51, 19 +; GFX10-NEXT: v_writelane_b32 v40, s52, 20 +; GFX10-NEXT: v_writelane_b32 v40, s53, 21 +; GFX10-NEXT: v_writelane_b32 v40, s54, 22 +; GFX10-NEXT: v_writelane_b32 v40, s55, 23 +; GFX10-NEXT: v_writelane_b32 v40, s56, 24 +; GFX10-NEXT: v_writelane_b32 v40, s57, 25 +; GFX10-NEXT: v_writelane_b32 v40, s58, 26 +; GFX10-NEXT: v_writelane_b32 v40, s59, 27 +; GFX10-NEXT: v_writelane_b32 v40, s60, 28 +; GFX10-NEXT: v_writelane_b32 v40, s61, 29 +; GFX10-NEXT: v_writelane_b32 v40, s62, 30 +; GFX10-NEXT: v_writelane_b32 v40, s63, 31 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GFX10-NEXT: v_readlane_b32 s63, v40, 31 +; GFX10-NEXT: v_readlane_b32 s62, v40, 30 +; GFX10-NEXT: v_readlane_b32 s61, v40, 29 +; GFX10-NEXT: v_readlane_b32 s60, v40, 28 +; GFX10-NEXT: v_readlane_b32 s59, v40, 27 +; GFX10-NEXT: v_readlane_b32 s58, v40, 26 +; GFX10-NEXT: v_readlane_b32 s57, v40, 25 +; GFX10-NEXT: v_readlane_b32 s56, v40, 24 +; GFX10-NEXT: v_readlane_b32 s55, v40, 23 +; GFX10-NEXT: v_readlane_b32 s54, v40, 22 +; GFX10-NEXT: v_readlane_b32 s53, v40, 21 +; GFX10-NEXT: v_readlane_b32 s52, v40, 20 +; GFX10-NEXT: v_readlane_b32 s51, v40, 19 +; GFX10-NEXT: v_readlane_b32 s50, v40, 18 +; GFX10-NEXT: v_readlane_b32 s49, v40, 17 +; GFX10-NEXT: v_readlane_b32 s48, v40, 16 +; GFX10-NEXT: v_readlane_b32 s47, v40, 15 +; GFX10-NEXT: v_readlane_b32 s46, v40, 14 +; GFX10-NEXT: v_readlane_b32 s45, v40, 13 +; GFX10-NEXT: v_readlane_b32 s44, v40, 12 +; GFX10-NEXT: v_readlane_b32 s43, v40, 11 +; GFX10-NEXT: v_readlane_b32 s42, v40, 10 +; GFX10-NEXT: v_readlane_b32 s41, v40, 9 +; GFX10-NEXT: v_readlane_b32 s40, v40, 8 +; GFX10-NEXT: v_readlane_b32 s39, v40, 7 +; GFX10-NEXT: v_readlane_b32 s38, v40, 6 +; GFX10-NEXT: v_readlane_b32 s37, v40, 5 +; GFX10-NEXT: v_readlane_b32 s36, v40, 4 +; GFX10-NEXT: v_readlane_b32 s35, v40, 3 +; GFX10-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfc00 -; GFX10-NEXT: v_readlane_b32 s33, v40, 30 -; GFX10-NEXT: s_or_saveexec_b32 s6, -1 +; GFX10-NEXT: s_mov_b32 s33, s6 +; GFX10-NEXT: s_or_saveexec_b32 s4, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_mov_b32 exec_lo, s4 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-SCRATCH-LABEL: tail_call_byval_align16: ; GFX10-SCRATCH: ; %bb.0: ; %entry @@ -5031,87 +5032,90 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 offset:24 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 30 +; GFX10-SCRATCH-NEXT: s_mov_b32 s4, s33 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33 offset:16 ; GFX10-SCRATCH-NEXT: scratch_load_dword v31, off, s33 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s34, 0 -; GFX10-SCRATCH-NEXT: s_mov_b64 s[4:5], s[30:31] ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, byval_align16_f64_arg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s35, 1 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 3 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 5 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s40, 6 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s41, 7 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s42, 8 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s43, 9 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s44, 10 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s45, 11 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s46, 12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s47, 13 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 14 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 15 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 17 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s52, 18 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s53, 19 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s54, 20 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s55, 21 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s56, 22 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s57, 23 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s58, 24 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s59, 25 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s60, 26 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s61, 27 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s62, 28 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s63, 29 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s34, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s35, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 6 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 7 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s40, 8 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s41, 9 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s42, 10 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s43, 11 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s44, 12 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s45, 13 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s46, 14 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s47, 15 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 17 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 18 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 19 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s52, 20 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s53, 21 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s54, 22 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s55, 23 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s56, 24 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s57, 25 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s58, 26 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s59, 27 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s60, 28 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s61, 29 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s62, 30 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s63, 31 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1) ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s63, v40, 29 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s62, v40, 28 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s61, v40, 27 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s60, v40, 26 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s59, v40, 25 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s58, v40, 24 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s57, v40, 23 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s56, v40, 22 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s55, v40, 21 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s54, v40, 20 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s53, v40, 19 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s52, v40, 18 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 17 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 15 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 14 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s47, v40, 13 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s46, v40, 12 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s45, v40, 11 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s44, v40, 10 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s43, v40, 9 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s42, v40, 8 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s41, v40, 7 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s40, v40, 6 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 5 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 4 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 3 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 2 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s35, v40, 1 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s34, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s63, v40, 31 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s62, v40, 30 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s61, v40, 29 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s60, v40, 28 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s59, v40, 27 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s58, v40, 26 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s57, v40, 25 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s56, v40, 24 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s55, v40, 23 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s54, v40, 22 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s53, v40, 21 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s52, v40, 20 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 19 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 18 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 17 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 16 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s47, v40, 15 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s46, v40, 14 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s45, v40, 13 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s44, v40, 12 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s43, v40, 11 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s42, v40, 10 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s41, v40, 9 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s40, v40, 8 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 7 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s35, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s34, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 30 +; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s4 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:24 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[4:5] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] entry: %alloca = alloca double, align 8, addrspace(5) tail call amdgpu_gfx void @byval_align16_f64_arg(<32 x i32> %val, double addrspace(5)* byval(double) align 16 %alloca) @@ -5132,13 +5136,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1_inreg@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1_inreg@rel32@hi+12 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -5159,15 +5163,15 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -5196,8 +5200,8 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -5224,12 +5228,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_movk_i32 s4, 0x7b ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -5250,16 +5254,16 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x7b ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -5289,8 +5293,8 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 @@ -5318,12 +5322,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_movk_i32 s4, 0x7b ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -5344,16 +5348,16 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x7b ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -5383,8 +5387,8 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 @@ -5412,12 +5416,12 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_mov_b32 s4, 42 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -5438,16 +5442,16 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 42 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -5477,8 +5481,8 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 @@ -5508,12 +5512,12 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX9-NEXT: s_movk_i32 s4, 0x7b ; GFX9-NEXT: s_mov_b32 s5, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -5535,18 +5539,18 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x7b ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -5579,8 +5583,8 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -5606,19 +5610,19 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: s_mov_b64 s[34:35], 0 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 -; GFX9-NEXT: v_writelane_b32 v40, s30, 4 -; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_mov_b64 s[30:31], 0 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: v_writelane_b32 v40, s31, 5 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: v_readlane_b32 s30, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -5640,22 +5644,22 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 6 +; GFX10-NEXT: s_mov_b64 s[34:35], 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_mov_b64 s[30:31], 0 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -5692,8 +5696,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -5731,12 +5735,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s6, 3 ; GFX9-NEXT: s_mov_b32 s7, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: v_readlane_b32 s30, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -5760,6 +5764,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -5770,12 +5777,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX10-NEXT: s_mov_b32 s7, 4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -5814,8 +5818,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -5843,23 +5847,23 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: s_mov_b64 s[34:35], 0 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 -; GFX9-NEXT: v_writelane_b32 v40, s30, 6 -; GFX9-NEXT: v_writelane_b32 v40, s31, 7 -; GFX9-NEXT: s_mov_b64 s[30:31], 0 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 6 ; GFX9-NEXT: s_mov_b32 s8, 1 ; GFX9-NEXT: s_mov_b32 s9, 2 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 6 +; GFX9-NEXT: v_writelane_b32 v40, s31, 7 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 7 +; GFX9-NEXT: v_readlane_b32 s30, v40, 6 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 @@ -5883,26 +5887,26 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 8 +; GFX10-NEXT: s_mov_b64 s[34:35], 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-NEXT: s_mov_b32 s8, 1 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5 ; GFX10-NEXT: s_mov_b32 s9, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 6 ; GFX10-NEXT: v_writelane_b32 v40, s31, 7 -; GFX10-NEXT: s_mov_b64 s[30:31], 0 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 6 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 7 +; GFX10-NEXT: v_readlane_b32 s30, v40, 6 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 @@ -5945,8 +5949,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 6 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 7 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 @@ -5980,26 +5984,26 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 +; GFX9-NEXT: s_mov_b64 s[34:35], 0 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7 -; GFX9-NEXT: v_writelane_b32 v40, s30, 8 -; GFX9-NEXT: v_writelane_b32 v40, s31, 9 -; GFX9-NEXT: s_mov_b64 s[30:31], 0 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 8 ; GFX9-NEXT: s_mov_b32 s8, 1 ; GFX9-NEXT: s_mov_b32 s9, 2 ; GFX9-NEXT: s_mov_b32 s10, 3 ; GFX9-NEXT: s_mov_b32 s11, 4 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 8 +; GFX9-NEXT: v_writelane_b32 v40, s31, 9 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 9 +; GFX9-NEXT: v_readlane_b32 s30, v40, 8 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 @@ -6025,12 +6029,17 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 10 +; GFX10-NEXT: s_mov_b64 s[34:35], 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-NEXT: s_mov_b32 s8, 1 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5 @@ -6041,14 +6050,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX10-NEXT: s_mov_b32 s11, 4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-NEXT: v_writelane_b32 v40, s31, 9 -; GFX10-NEXT: s_mov_b64 s[30:31], 0 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 9 +; GFX10-NEXT: v_readlane_b32 s30, v40, 8 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 @@ -6097,8 +6101,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 @@ -6135,12 +6139,12 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_movk_i32 s4, 0x4400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -6161,16 +6165,16 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x4400 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -6200,8 +6204,8 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 @@ -6229,12 +6233,12 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_mov_b32 s4, 4.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -6255,16 +6259,16 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 4.0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -6294,8 +6298,8 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 @@ -6325,12 +6329,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s4, 1.0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -6352,18 +6356,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1.0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2.0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -6396,8 +6400,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -6430,12 +6434,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: s_mov_b32 s6, 4.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 4 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 3 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 4 +; GFX9-NEXT: v_readlane_b32 s30, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 @@ -6458,6 +6462,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 5 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1.0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -6466,12 +6473,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX10-NEXT: s_mov_b32 s6, 4.0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 3 ; GFX10-NEXT: v_writelane_b32 v40, s31, 4 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 3 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 4 +; GFX10-NEXT: v_readlane_b32 s30, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 @@ -6507,8 +6511,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 3 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 4 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 @@ -6546,12 +6550,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s7, -1.0 ; GFX9-NEXT: s_mov_b32 s8, 0.5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 6 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 5 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5f32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 6 +; GFX9-NEXT: v_readlane_b32 s30, v40, 5 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 @@ -6576,6 +6580,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 7 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5f32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1.0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -6588,12 +6595,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX10-NEXT: s_mov_b32 s8, 0.5 ; GFX10-NEXT: v_writelane_b32 v40, s30, 5 ; GFX10-NEXT: v_writelane_b32 v40, s31, 6 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 5 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 6 +; GFX10-NEXT: v_readlane_b32 s30, v40, 5 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 @@ -6635,8 +6639,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 5 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 6 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 @@ -6670,12 +6674,12 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s4, 0 ; GFX9-NEXT: s_mov_b32 s5, 0x40100000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -6697,18 +6701,18 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 0x40100000 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -6741,8 +6745,8 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -6777,12 +6781,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s6, 0 ; GFX9-NEXT: s_mov_b32 s7, 0x40100000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: v_readlane_b32 s30, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -6806,6 +6810,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -6816,12 +6823,9 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX10-NEXT: s_mov_b32 s7, 0x40100000 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -6860,8 +6864,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -6902,12 +6906,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s8, 0 ; GFX9-NEXT: s_mov_b32 s9, 0x40200000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 7 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 6 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 7 +; GFX9-NEXT: v_readlane_b32 s30, v40, 6 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 @@ -6933,6 +6937,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 8 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -6947,12 +6954,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX10-NEXT: s_mov_b32 s9, 0x40200000 ; GFX10-NEXT: v_writelane_b32 v40, s30, 6 ; GFX10-NEXT: v_writelane_b32 v40, s31, 7 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 6 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 7 +; GFX10-NEXT: v_readlane_b32 s30, v40, 6 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 @@ -6997,8 +7001,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 6 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 7 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 @@ -7026,17 +7030,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: v_writelane_b32 v40, s30, 1 -; GFX9-NEXT: s_load_dword s4, s[30:31], 0x0 +; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -7058,15 +7062,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_load_dword s4, s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-NEXT: s_load_dword s4, s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -7096,8 +7100,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 @@ -7122,17 +7126,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s33, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: v_writelane_b32 v40, s30, 2 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7156,15 +7160,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -7196,8 +7200,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -7223,17 +7227,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s33, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: v_writelane_b32 v40, s30, 2 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7257,15 +7261,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -7297,8 +7301,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -7330,12 +7334,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s4, 0x20001 ; GFX9-NEXT: s_mov_b32 s5, 3 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7357,18 +7361,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0x20001 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 3 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -7401,8 +7405,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -7433,12 +7437,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s4, 0x40003c00 ; GFX9-NEXT: s_movk_i32 s5, 0x4400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7460,18 +7464,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0x40003c00 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_movk_i32 s5, 0x4400 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -7504,8 +7508,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -7530,17 +7534,17 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s33, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: v_writelane_b32 v40, s30, 2 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7564,15 +7568,15 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -7604,8 +7608,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -7637,12 +7641,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s4, 0x20001 ; GFX9-NEXT: s_mov_b32 s5, 0x40003 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7664,18 +7668,18 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0x20001 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 0x40003 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -7708,8 +7712,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -7733,17 +7737,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: v_writelane_b32 v40, s30, 1 -; GFX9-NEXT: s_load_dword s4, s[30:31], 0x0 +; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -7765,15 +7769,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 +; GFX10-NEXT: s_load_dword s4, s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 -; GFX10-NEXT: s_load_dword s4, s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -7803,8 +7807,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 @@ -7829,17 +7833,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s33, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: v_writelane_b32 v40, s30, 2 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7863,15 +7867,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -7903,8 +7907,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -7936,12 +7940,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7963,18 +7967,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -8007,8 +8011,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -8041,12 +8045,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX9-NEXT: s_mov_b32 s5, 4 ; GFX9-NEXT: s_mov_b32 s6, 5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 4 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 3 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 4 +; GFX9-NEXT: v_readlane_b32 s30, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 @@ -8069,6 +8073,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 5 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 3 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8077,12 +8084,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX10-NEXT: s_mov_b32 s6, 5 ; GFX10-NEXT: v_writelane_b32 v40, s30, 3 ; GFX10-NEXT: v_writelane_b32 v40, s31, 4 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 3 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 4 +; GFX10-NEXT: v_readlane_b32 s30, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 @@ -8118,8 +8122,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 3 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 4 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 @@ -8155,12 +8159,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX9-NEXT: s_mov_b32 s6, 5 ; GFX9-NEXT: s_mov_b32 s7, 6 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: v_readlane_b32 s30, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -8184,6 +8188,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 3 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8194,12 +8201,9 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX10-NEXT: s_mov_b32 s7, 6 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -8238,8 +8242,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -8268,17 +8272,17 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 -; GFX9-NEXT: v_writelane_b32 v40, s30, 4 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: v_readlane_b32 s30, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -8306,15 +8310,15 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -8350,8 +8354,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -8389,12 +8393,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s6, 3 ; GFX9-NEXT: s_mov_b32 s7, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: v_readlane_b32 s30, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -8418,6 +8422,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8428,12 +8435,9 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX10-NEXT: s_mov_b32 s7, 4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -8472,8 +8476,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -8512,12 +8516,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s7, 4 ; GFX9-NEXT: s_mov_b32 s8, 5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 6 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 5 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 6 +; GFX9-NEXT: v_readlane_b32 s30, v40, 5 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 @@ -8542,6 +8546,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 7 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8554,12 +8561,9 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX10-NEXT: s_mov_b32 s8, 5 ; GFX10-NEXT: v_writelane_b32 v40, s30, 5 ; GFX10-NEXT: v_writelane_b32 v40, s31, 6 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 5 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 6 +; GFX10-NEXT: v_readlane_b32 s30, v40, 5 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 @@ -8601,8 +8605,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 5 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 6 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 @@ -8631,24 +8635,24 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7 -; GFX9-NEXT: v_writelane_b32 v40, s30, 8 -; GFX9-NEXT: v_writelane_b32 v40, s31, 9 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dwordx8 s[4:11], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx8 s[4:11], s[30:31], 0x0 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 8 +; GFX9-NEXT: v_writelane_b32 v40, s30, 8 +; GFX9-NEXT: v_writelane_b32 v40, s31, 9 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 9 +; GFX9-NEXT: v_readlane_b32 s30, v40, 8 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 @@ -8674,6 +8678,7 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 10 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 @@ -8684,17 +8689,16 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s9, 5 ; GFX10-NEXT: v_writelane_b32 v40, s10, 6 ; GFX10-NEXT: v_writelane_b32 v40, s11, 7 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_load_dwordx8 s[4:11], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-NEXT: v_writelane_b32 v40, s31, 9 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_load_dwordx8 s[4:11], s[30:31], 0x0 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 9 +; GFX10-NEXT: v_readlane_b32 s30, v40, 8 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 @@ -8740,8 +8744,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 @@ -8792,12 +8796,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s10, 7 ; GFX9-NEXT: s_mov_b32 s11, 8 ; GFX9-NEXT: v_writelane_b32 v40, s31, 9 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 8 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 9 +; GFX9-NEXT: v_readlane_b32 s30, v40, 8 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 @@ -8825,6 +8829,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 10 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8843,12 +8850,9 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX10-NEXT: s_mov_b32 s11, 8 ; GFX10-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-NEXT: v_writelane_b32 v40, s31, 9 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 9 +; GFX10-NEXT: v_readlane_b32 s30, v40, 8 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 @@ -8899,8 +8903,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 @@ -8940,24 +8944,24 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s12, 8 ; GFX9-NEXT: v_writelane_b32 v40, s13, 9 ; GFX9-NEXT: v_writelane_b32 v40, s14, 10 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s15, 11 ; GFX9-NEXT: v_writelane_b32 v40, s16, 12 ; GFX9-NEXT: v_writelane_b32 v40, s17, 13 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15 -; GFX9-NEXT: v_writelane_b32 v40, s30, 16 -; GFX9-NEXT: v_writelane_b32 v40, s31, 17 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v16i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v16i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 16 +; GFX9-NEXT: v_writelane_b32 v40, s30, 16 +; GFX9-NEXT: v_writelane_b32 v40, s31, 17 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v16i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 17 +; GFX9-NEXT: v_readlane_b32 s30, v40, 16 ; GFX9-NEXT: v_readlane_b32 s19, v40, 15 ; GFX9-NEXT: v_readlane_b32 s18, v40, 14 ; GFX9-NEXT: v_readlane_b32 s17, v40, 13 @@ -8991,6 +8995,7 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 18 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 @@ -9009,17 +9014,16 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v16i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 16 ; GFX10-NEXT: v_writelane_b32 v40, s31, 17 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v16i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v16i32_inreg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 16 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 17 +; GFX10-NEXT: v_readlane_b32 s30, v40, 16 ; GFX10-NEXT: v_readlane_b32 s19, v40, 15 ; GFX10-NEXT: v_readlane_b32 s18, v40, 14 ; GFX10-NEXT: v_readlane_b32 s17, v40, 13 @@ -9081,8 +9085,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 17 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 17 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s19, v40, 15 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s18, v40, 14 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s17, v40, 13 @@ -9137,29 +9141,24 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s17, 13 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s20, 16 ; GFX9-NEXT: v_writelane_b32 v40, s21, 17 ; GFX9-NEXT: v_writelane_b32 v40, s22, 18 ; GFX9-NEXT: v_writelane_b32 v40, s23, 19 ; GFX9-NEXT: v_writelane_b32 v40, s24, 20 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 +; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s25, 21 ; GFX9-NEXT: v_writelane_b32 v40, s26, 22 ; GFX9-NEXT: v_writelane_b32 v40, s27, 23 -; GFX9-NEXT: v_writelane_b32 v40, s28, 24 -; GFX9-NEXT: v_writelane_b32 v40, s29, 25 -; GFX9-NEXT: v_writelane_b32 v40, s30, 26 -; GFX9-NEXT: v_writelane_b32 v40, s31, 27 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40 -; GFX9-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_inreg@rel32@hi+12 +; GFX9-NEXT: v_writelane_b32 v40, s28, 24 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s46 +; GFX9-NEXT: v_writelane_b32 v40, s29, 25 ; GFX9-NEXT: v_mov_b32_e32 v1, s47 ; GFX9-NEXT: v_mov_b32_e32 v2, s48 ; GFX9-NEXT: v_mov_b32_e32 v3, s49 @@ -9168,6 +9167,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; GFX9-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; GFX9-NEXT: v_mov_b32_e32 v0, s50 +; GFX9-NEXT: v_writelane_b32 v40, s30, 26 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; GFX9-NEXT: v_mov_b32_e32 v0, s51 ; GFX9-NEXT: s_mov_b32 s20, s36 @@ -9180,10 +9180,14 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s27, s43 ; GFX9-NEXT: s_mov_b32 s28, s44 ; GFX9-NEXT: s_mov_b32 s29, s45 +; GFX9-NEXT: v_writelane_b32 v40, s31, 27 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 26 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 27 +; GFX9-NEXT: v_readlane_b32 s30, v40, 26 ; GFX9-NEXT: v_readlane_b32 s29, v40, 25 ; GFX9-NEXT: v_readlane_b32 s28, v40, 24 ; GFX9-NEXT: v_readlane_b32 s27, v40, 23 @@ -9227,6 +9231,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 28 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 @@ -9245,41 +9250,30 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_clause 0x1 +; GFX10-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 +; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s20, 16 ; GFX10-NEXT: v_writelane_b32 v40, s21, 17 ; GFX10-NEXT: v_writelane_b32 v40, s22, 18 -; GFX10-NEXT: v_writelane_b32 v40, s23, 19 -; GFX10-NEXT: v_writelane_b32 v40, s24, 20 -; GFX10-NEXT: v_writelane_b32 v40, s25, 21 -; GFX10-NEXT: v_writelane_b32 v40, s26, 22 -; GFX10-NEXT: v_writelane_b32 v40, s27, 23 -; GFX10-NEXT: v_writelane_b32 v40, s28, 24 -; GFX10-NEXT: v_writelane_b32 v40, s29, 25 -; GFX10-NEXT: v_writelane_b32 v40, s30, 26 -; GFX10-NEXT: v_writelane_b32 v40, s31, 27 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40 -; GFX10-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_inreg@rel32@hi+12 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s46 +; GFX10-NEXT: v_writelane_b32 v40, s23, 19 ; GFX10-NEXT: v_mov_b32_e32 v1, s47 ; GFX10-NEXT: v_mov_b32_e32 v2, s48 ; GFX10-NEXT: v_mov_b32_e32 v3, s49 ; GFX10-NEXT: s_mov_b32 s20, s36 +; GFX10-NEXT: v_writelane_b32 v40, s24, 20 ; GFX10-NEXT: s_mov_b32 s21, s37 ; GFX10-NEXT: s_mov_b32 s22, s38 ; GFX10-NEXT: s_mov_b32 s23, s39 ; GFX10-NEXT: s_mov_b32 s24, s40 +; GFX10-NEXT: v_writelane_b32 v40, s25, 21 ; GFX10-NEXT: s_mov_b32 s25, s41 -; GFX10-NEXT: s_mov_b32 s26, s42 -; GFX10-NEXT: s_mov_b32 s27, s43 -; GFX10-NEXT: s_mov_b32 s28, s44 -; GFX10-NEXT: s_mov_b32 s29, s45 ; GFX10-NEXT: v_mov_b32_e32 v4, s50 ; GFX10-NEXT: v_mov_b32_e32 v5, s51 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -9288,9 +9282,19 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 26 +; GFX10-NEXT: v_writelane_b32 v40, s26, 22 +; GFX10-NEXT: s_mov_b32 s26, s42 +; GFX10-NEXT: v_writelane_b32 v40, s27, 23 +; GFX10-NEXT: s_mov_b32 s27, s43 +; GFX10-NEXT: v_writelane_b32 v40, s28, 24 +; GFX10-NEXT: s_mov_b32 s28, s44 +; GFX10-NEXT: v_writelane_b32 v40, s29, 25 +; GFX10-NEXT: s_mov_b32 s29, s45 +; GFX10-NEXT: v_writelane_b32 v40, s30, 26 +; GFX10-NEXT: v_writelane_b32 v40, s31, 27 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 27 +; GFX10-NEXT: v_readlane_b32 s30, v40, 26 ; GFX10-NEXT: v_readlane_b32 s29, v40, 25 ; GFX10-NEXT: v_readlane_b32 s28, v40, 24 ; GFX10-NEXT: v_readlane_b32 s27, v40, 23 @@ -9393,8 +9397,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s29, v40, 25 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s28, v40, 24 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s27, v40, 23 @@ -9458,43 +9462,39 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX9-NEXT: v_writelane_b32 v40, s16, 12 ; GFX9-NEXT: v_writelane_b32 v40, s17, 13 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15 ; GFX9-NEXT: v_writelane_b32 v40, s20, 16 ; GFX9-NEXT: v_writelane_b32 v40, s21, 17 ; GFX9-NEXT: v_writelane_b32 v40, s22, 18 ; GFX9-NEXT: v_writelane_b32 v40, s23, 19 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dword s52, s[34:35], 0x0 +; GFX9-NEXT: ; kill: killed $sgpr34_sgpr35 +; GFX9-NEXT: ; kill: killed $sgpr34_sgpr35 +; GFX9-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 +; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s24, 20 ; GFX9-NEXT: v_writelane_b32 v40, s25, 21 -; GFX9-NEXT: v_writelane_b32 v40, s26, 22 -; GFX9-NEXT: v_writelane_b32 v40, s27, 23 -; GFX9-NEXT: v_writelane_b32 v40, s28, 24 -; GFX9-NEXT: v_writelane_b32 v40, s29, 25 -; GFX9-NEXT: v_writelane_b32 v40, s30, 26 -; GFX9-NEXT: v_writelane_b32 v40, s31, 27 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s26, 22 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dword s34, s[30:31], 0x0 -; GFX9-NEXT: ; kill: killed $sgpr30_sgpr31 -; GFX9-NEXT: ; kill: killed $sgpr30_sgpr31 -; GFX9-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40 -; GFX9-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, s34 +; GFX9-NEXT: v_mov_b32_e32 v0, s52 +; GFX9-NEXT: v_writelane_b32 v40, s27, 23 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32_inreg@rel32@hi+12 ; GFX9-NEXT: v_mov_b32_e32 v0, s46 +; GFX9-NEXT: v_writelane_b32 v40, s28, 24 ; GFX9-NEXT: v_mov_b32_e32 v1, s47 +; GFX9-NEXT: v_mov_b32_e32 v2, s48 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 -; GFX9-NEXT: v_mov_b32_e32 v0, s48 -; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 +; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, s49 +; GFX9-NEXT: v_writelane_b32 v40, s29, 25 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; GFX9-NEXT: v_mov_b32_e32 v0, s50 +; GFX9-NEXT: v_writelane_b32 v40, s30, 26 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; GFX9-NEXT: v_mov_b32_e32 v0, s51 ; GFX9-NEXT: s_mov_b32 s20, s36 @@ -9507,10 +9507,14 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX9-NEXT: s_mov_b32 s27, s43 ; GFX9-NEXT: s_mov_b32 s28, s44 ; GFX9-NEXT: s_mov_b32 s29, s45 +; GFX9-NEXT: v_writelane_b32 v40, s31, 27 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 26 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 27 +; GFX9-NEXT: v_readlane_b32 s30, v40, 26 ; GFX9-NEXT: v_readlane_b32 s29, v40, 25 ; GFX9-NEXT: v_readlane_b32 s28, v40, 24 ; GFX9-NEXT: v_readlane_b32 s27, v40, 23 @@ -9554,6 +9558,7 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 28 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 @@ -9572,57 +9577,56 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15 -; GFX10-NEXT: v_writelane_b32 v40, s20, 16 -; GFX10-NEXT: v_writelane_b32 v40, s21, 17 -; GFX10-NEXT: v_writelane_b32 v40, s22, 18 -; GFX10-NEXT: v_writelane_b32 v40, s23, 19 -; GFX10-NEXT: v_writelane_b32 v40, s24, 20 -; GFX10-NEXT: v_writelane_b32 v40, s25, 21 -; GFX10-NEXT: v_writelane_b32 v40, s26, 22 -; GFX10-NEXT: v_writelane_b32 v40, s27, 23 -; GFX10-NEXT: v_writelane_b32 v40, s28, 24 -; GFX10-NEXT: v_writelane_b32 v40, s29, 25 -; GFX10-NEXT: v_writelane_b32 v40, s30, 26 -; GFX10-NEXT: v_writelane_b32 v40, s31, 27 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x2 -; GFX10-NEXT: s_load_dword s34, s[30:31], 0x0 +; GFX10-NEXT: s_load_dword s52, s[34:35], 0x0 ; GFX10-NEXT: ; meta instruction ; GFX10-NEXT: ; meta instruction -; GFX10-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40 -; GFX10-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 +; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32_inreg@rel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v40, s20, 16 +; GFX10-NEXT: v_writelane_b32 v40, s21, 17 +; GFX10-NEXT: v_writelane_b32 v40, s22, 18 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, s34 +; GFX10-NEXT: v_mov_b32_e32 v0, s52 ; GFX10-NEXT: v_mov_b32_e32 v1, s47 -; GFX10-NEXT: v_mov_b32_e32 v2, s48 +; GFX10-NEXT: v_writelane_b32 v40, s23, 19 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; GFX10-NEXT: v_mov_b32_e32 v0, s46 +; GFX10-NEXT: v_mov_b32_e32 v2, s48 ; GFX10-NEXT: v_mov_b32_e32 v3, s49 +; GFX10-NEXT: v_writelane_b32 v40, s24, 20 ; GFX10-NEXT: s_mov_b32 s20, s36 ; GFX10-NEXT: s_mov_b32 s21, s37 ; GFX10-NEXT: s_mov_b32 s22, s38 ; GFX10-NEXT: s_mov_b32 s23, s39 +; GFX10-NEXT: v_writelane_b32 v40, s25, 21 ; GFX10-NEXT: s_mov_b32 s24, s40 ; GFX10-NEXT: s_mov_b32 s25, s41 -; GFX10-NEXT: s_mov_b32 s26, s42 -; GFX10-NEXT: s_mov_b32 s27, s43 -; GFX10-NEXT: s_mov_b32 s28, s44 -; GFX10-NEXT: s_mov_b32 s29, s45 ; GFX10-NEXT: v_mov_b32_e32 v4, s50 ; GFX10-NEXT: v_mov_b32_e32 v5, s51 +; GFX10-NEXT: v_writelane_b32 v40, s26, 22 +; GFX10-NEXT: s_mov_b32 s26, s42 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 26 +; GFX10-NEXT: v_writelane_b32 v40, s27, 23 +; GFX10-NEXT: s_mov_b32 s27, s43 +; GFX10-NEXT: v_writelane_b32 v40, s28, 24 +; GFX10-NEXT: s_mov_b32 s28, s44 +; GFX10-NEXT: v_writelane_b32 v40, s29, 25 +; GFX10-NEXT: s_mov_b32 s29, s45 +; GFX10-NEXT: v_writelane_b32 v40, s30, 26 +; GFX10-NEXT: v_writelane_b32 v40, s31, 27 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 27 +; GFX10-NEXT: v_readlane_b32 s30, v40, 26 ; GFX10-NEXT: v_readlane_b32 s29, v40, 25 ; GFX10-NEXT: v_readlane_b32 s28, v40, 24 ; GFX10-NEXT: v_readlane_b32 s27, v40, 23 @@ -9730,8 +9734,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s29, v40, 25 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s28, v40, 24 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s27, v40, 23 @@ -9787,16 +9791,16 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, stack_passed_f64_arg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, stack_passed_f64_arg@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, stack_passed_f64_arg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, stack_passed_f64_arg@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -9820,17 +9824,17 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, stack_passed_f64_arg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, stack_passed_f64_arg@rel32@hi+12 ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, stack_passed_f64_arg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, stack_passed_f64_arg@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -9860,8 +9864,8 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -9927,12 +9931,12 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v30, 10 ; GFX9-NEXT: v_mov_b32_e32 v31, 11 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_12xv3i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_12xv3i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_12xv3i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_12xv3i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -9994,12 +9998,12 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v30, 10 ; GFX10-NEXT: v_mov_b32_e32 v31, 11 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_12xv3i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_12xv3i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_12xv3i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_12xv3i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -10063,8 +10067,8 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_12xv3i32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -10150,12 +10154,12 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v30, 6 ; GFX9-NEXT: v_mov_b32_e32 v31, 7 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_8xv5i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_8xv5i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -10225,12 +10229,12 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v30, 6 ; GFX10-NEXT: v_mov_b32_e32 v31, 7 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_8xv5i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_8xv5i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -10299,8 +10303,8 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_8xv5i32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -10382,12 +10386,12 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v30, 0x40c00000 ; GFX9-NEXT: v_mov_b32_e32 v31, 0x40e00000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_8xv5f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_8xv5f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -10457,12 +10461,12 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v30, 0x40c00000 ; GFX10-NEXT: v_mov_b32_e32 v31, 0x40e00000 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_8xv5f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5f32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_8xv5f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5f32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -10531,8 +10535,8 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_8xv5f32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5f32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll index fa8f51cf412f..26bc62d0a99b 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll @@ -25,8 +25,8 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: v_readlane_b32 s30, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -59,8 +59,8 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: v_readlane_b32 s30, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -77,26 +77,54 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ret void } -define amdgpu_gfx void @void_func_void_clobber_s30_s31() #1 { -; GFX9-LABEL: void_func_void_clobber_s30_s31: +define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 { +; GFX9-LABEL: void_func_void_clobber_s28_s29: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v0, s28, 0 +; GFX9-NEXT: v_writelane_b32 v0, s29, 1 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; clobber ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_setpc_b64 s[36:37] +; GFX9-NEXT: v_readlane_b32 s29, v0, 1 +; GFX9-NEXT: v_readlane_b32 s28, v0, 0 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: void_func_void_clobber_s30_s31: +; GFX10-LABEL: void_func_void_clobber_s28_s29: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v0, s28, 0 +; GFX10-NEXT: v_writelane_b32 v0, s29, 1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; clobber ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_setpc_b64 s[36:37] - call void asm sideeffect "; clobber", "~{s[30:31]}"() #0 +; GFX10-NEXT: v_readlane_b32 s29, v0, 1 +; GFX10-NEXT: v_readlane_b32 s28, v0, 0 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; GCN: v_writelane_b32 v0, s28, 0 +; GCN: v_writelane_b32 v0, s29, 1 + +; GCN: v_readlane_b32 s28, v0, 0 +; GCN: v_readlane_b32 s29, v0, 1 + call void asm sideeffect "; clobber", "~{s[28:29]}"() #0 ret void } @@ -109,24 +137,24 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1) ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s31 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_mov_b32 s4, s31 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: s_mov_b32 s31, s4 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s31 ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -162,8 +190,8 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1) ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s31 ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -196,17 +224,17 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1) ; GFX9-NEXT: ; def v31 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_mov_b32_e32 v41, v31 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_mov_b32_e32 v31, v41 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use v31 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -232,18 +260,18 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1) ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v41, v31 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_mov_b32_e32 v31, v41 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use v31 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -276,16 +304,16 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)* ; GFX9-NEXT: ; def s33 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_mov_b32 s4, s33 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: s_mov_b32 s33, s4 -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s33 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -306,6 +334,9 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)* ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s33 ; GFX10-NEXT: ;;#ASMEND @@ -313,16 +344,13 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)* ; GFX10-NEXT: s_mov_b32 s4, s33 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: s_mov_b32 s33, s4 -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s33 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -350,21 +378,21 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)* ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s34 ; GFX9-NEXT: ;;#ASMEND +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_mov_b32 s4, s34 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: s_mov_b32 s34, s4 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s34 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -385,6 +413,9 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)* ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[36:37] +; GFX10-NEXT: s_add_u32 s36, s36, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s37, s37, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s34 ; GFX10-NEXT: ;;#ASMEND @@ -392,16 +423,13 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)* ; GFX10-NEXT: s_mov_b32 s4, s34 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[36:37] ; GFX10-NEXT: s_mov_b32 s34, s4 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s34 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -433,16 +461,16 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)* ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def v40 ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use v40 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s30, v41, 0 ; GFX9-NEXT: v_readlane_b32 s31, v41, 1 +; GFX9-NEXT: v_readlane_b32 s30, v41, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v41, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -467,17 +495,17 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)* ; GFX10-NEXT: ; def v40 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_writelane_b32 v41, s30, 0 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v41, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use v40 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX10-NEXT: v_readlane_b32 s30, v41, 0 ; GFX10-NEXT: v_readlane_b32 s31, v41, 1 +; GFX10-NEXT: v_readlane_b32 s30, v41, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v41, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -588,12 +616,12 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, void_func_void_clobber_s33@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, void_func_void_clobber_s33@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s33@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s33@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -613,14 +641,14 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s33@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s33@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, void_func_void_clobber_s33@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, void_func_void_clobber_s33@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -645,12 +673,12 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, void_func_void_clobber_s34@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, void_func_void_clobber_s34@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s34@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s34@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -670,14 +698,14 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s34@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s34@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, void_func_void_clobber_s34@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, void_func_void_clobber_s34@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: v_readlane_b32 s30, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -707,15 +735,15 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX9-NEXT: ; def s40 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_mov_b32 s4, s40 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s4 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -736,6 +764,9 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s40 ; GFX10-NEXT: ;;#ASMEND @@ -743,15 +774,12 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX10-NEXT: s_mov_b32 s4, s40 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s4 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -789,10 +817,10 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX9-NEXT: ; def v32 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_mov_b32_e32 v41, v32 -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s4 ; GFX9-NEXT: ;;#ASMEND @@ -800,8 +828,8 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX9-NEXT: ; use v41 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -832,12 +860,12 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX10-NEXT: ; def v32 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_mov_b32_e32 v41, v32 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s4 ; GFX10-NEXT: ;;#ASMEND @@ -845,8 +873,8 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX10-NEXT: ; use v41 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll index 1b84b0a45458..1f2243116812 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll @@ -23,27 +23,59 @@ define amdgpu_gfx void @call_i1() #0 { ; GFX9-LABEL: call_i1: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, return_i1@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, return_i1@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v1, s33, 2 +; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, return_i1@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, return_i1@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_writelane_b32 v1, s30, 0 +; GFX9-NEXT: v_writelane_b32 v1, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: s_setpc_b64 s[36:37] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_readlane_b32 s31, v1, 1 +; GFX9-NEXT: v_readlane_b32 s30, v1, 0 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 +; GFX9-NEXT: v_readlane_b32 s33, v1, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: call_i1: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, return_i1@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, return_i1@gotpcrel32@hi+12 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v1, s33, 2 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, return_i1@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, return_i1@gotpcrel32@hi+12 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: v_writelane_b32 v1, s30, 0 +; GFX10-NEXT: v_writelane_b32 v1, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: s_setpc_b64 s[36:37] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: v_readlane_b32 s31, v1, 1 +; GFX10-NEXT: v_readlane_b32 s30, v1, 0 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: v_readlane_b32 s33, v1, 2 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] entry: call amdgpu_gfx i1 @return_i1() ret void @@ -70,27 +102,59 @@ define amdgpu_gfx void @call_i16() #0 { ; GFX9-LABEL: call_i16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, return_i16@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, return_i16@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v1, s33, 2 +; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, return_i16@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, return_i16@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_writelane_b32 v1, s30, 0 +; GFX9-NEXT: v_writelane_b32 v1, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: s_setpc_b64 s[36:37] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_readlane_b32 s31, v1, 1 +; GFX9-NEXT: v_readlane_b32 s30, v1, 0 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 +; GFX9-NEXT: v_readlane_b32 s33, v1, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: call_i16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, return_i16@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, return_i16@gotpcrel32@hi+12 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v1, s33, 2 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, return_i16@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, return_i16@gotpcrel32@hi+12 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: v_writelane_b32 v1, s30, 0 +; GFX10-NEXT: v_writelane_b32 v1, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: s_setpc_b64 s[36:37] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: v_readlane_b32 s31, v1, 1 +; GFX10-NEXT: v_readlane_b32 s30, v1, 0 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: v_readlane_b32 s33, v1, 2 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] entry: call amdgpu_gfx i16 @return_i16() ret void @@ -117,27 +181,59 @@ define amdgpu_gfx void @call_2xi16() #0 { ; GFX9-LABEL: call_2xi16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, return_2xi16@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, return_2xi16@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v1, s33, 2 +; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, return_2xi16@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, return_2xi16@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_writelane_b32 v1, s30, 0 +; GFX9-NEXT: v_writelane_b32 v1, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: s_setpc_b64 s[36:37] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_readlane_b32 s31, v1, 1 +; GFX9-NEXT: v_readlane_b32 s30, v1, 0 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 +; GFX9-NEXT: v_readlane_b32 s33, v1, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: call_2xi16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, return_2xi16@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, return_2xi16@gotpcrel32@hi+12 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v1, s33, 2 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, return_2xi16@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, return_2xi16@gotpcrel32@hi+12 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: v_writelane_b32 v1, s30, 0 +; GFX10-NEXT: v_writelane_b32 v1, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: s_setpc_b64 s[36:37] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: v_readlane_b32 s31, v1, 1 +; GFX10-NEXT: v_readlane_b32 s30, v1, 0 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: v_readlane_b32 s33, v1, 2 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] entry: call amdgpu_gfx <2 x i16> @return_2xi16() ret void @@ -166,27 +262,59 @@ define amdgpu_gfx void @call_3xi16() #0 { ; GFX9-LABEL: call_3xi16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, return_3xi16@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, return_3xi16@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v2, s33, 2 +; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, return_3xi16@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, return_3xi16@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_writelane_b32 v2, s30, 0 +; GFX9-NEXT: v_writelane_b32 v2, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-NEXT: s_setpc_b64 s[36:37] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_readlane_b32 s31, v2, 1 +; GFX9-NEXT: v_readlane_b32 s30, v2, 0 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 +; GFX9-NEXT: v_readlane_b32 s33, v2, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: call_3xi16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, return_3xi16@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, return_3xi16@gotpcrel32@hi+12 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v2, s33, 2 +; GFX10-NEXT: s_mov_b32 s33, s32 +; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, return_3xi16@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, return_3xi16@gotpcrel32@hi+12 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: v_writelane_b32 v2, s30, 0 +; GFX10-NEXT: v_writelane_b32 v2, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX10-NEXT: s_setpc_b64 s[36:37] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: v_readlane_b32 s31, v2, 1 +; GFX10-NEXT: v_readlane_b32 s30, v2, 0 +; GFX10-NEXT: s_addk_i32 s32, 0xfe00 +; GFX10-NEXT: v_readlane_b32 s33, v2, 2 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] entry: call amdgpu_gfx <3 x i16> @return_3xi16() ret void @@ -1241,41 +1369,63 @@ define amdgpu_gfx void @call_512xi32() #0 { ; GFX9-LABEL: call_512xi32: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s34, s33 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: v_writelane_b32 v2, s33, 2 ; GFX9-NEXT: s_add_i32 s33, s32, 0x1ffc0 ; GFX9-NEXT: s_and_b32 s33, s33, 0xfffe0000 ; GFX9-NEXT: s_add_i32 s32, s32, 0x60000 -; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX9-NEXT: s_getpc_b64 s[30:31] -; GFX9-NEXT: s_add_u32 s30, s30, return_512xi32@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s31, s31, return_512xi32@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_getpc_b64 s[34:35] +; GFX9-NEXT: s_add_u32 s34, s34, return_512xi32@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s35, s35, return_512xi32@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_writelane_b32 v2, s30, 0 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 +; GFX9-NEXT: v_writelane_b32 v2, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: v_readlane_b32 s31, v2, 1 +; GFX9-NEXT: v_readlane_b32 s30, v2, 0 ; GFX9-NEXT: s_add_i32 s32, s32, 0xfffa0000 -; GFX9-NEXT: s_mov_b32 s33, s34 -; GFX9-NEXT: s_setpc_b64 s[36:37] +; GFX9-NEXT: v_readlane_b32 s33, v2, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: call_512xi32: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_mov_b32 s34, s33 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Spill +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: v_writelane_b32 v2, s33, 2 ; GFX10-NEXT: s_add_i32 s33, s32, 0xffe0 ; GFX10-NEXT: s_add_i32 s32, s32, 0x30000 ; GFX10-NEXT: s_and_b32 s33, s33, 0xffff0000 -; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] -; GFX10-NEXT: s_getpc_b64 s[30:31] -; GFX10-NEXT: s_add_u32 s30, s30, return_512xi32@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s31, s31, return_512xi32@gotpcrel32@hi+12 +; GFX10-NEXT: s_getpc_b64 s[34:35] +; GFX10-NEXT: s_add_u32 s34, s34, return_512xi32@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s35, s35, return_512xi32@gotpcrel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v2, s30, 0 +; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: v_writelane_b32 v2, s31, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: v_readlane_b32 s31, v2, 1 +; GFX10-NEXT: v_readlane_b32 s30, v2, 0 ; GFX10-NEXT: s_add_i32 s32, s32, 0xfffd0000 -; GFX10-NEXT: s_mov_b32 s33, s34 -; GFX10-NEXT: s_setpc_b64 s[36:37] +; GFX10-NEXT: v_readlane_b32 s33, v2, 2 +; GFX10-NEXT: s_or_saveexec_b32 s34, -1 +; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Reload +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_mov_b32 exec_lo, s34 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_setpc_b64 s[30:31] entry: call amdgpu_gfx <512 x i32> @return_512xi32() ret void diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll index a47606e14745..a1a6a805f367 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll @@ -399,23 +399,23 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) { ; GCN-NEXT: v_writelane_b32 v40, s33, 17 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s34, 0 -; GCN-NEXT: v_writelane_b32 v40, s35, 1 -; GCN-NEXT: v_writelane_b32 v40, s36, 2 -; GCN-NEXT: v_writelane_b32 v40, s37, 3 -; GCN-NEXT: v_writelane_b32 v40, s38, 4 -; GCN-NEXT: v_writelane_b32 v40, s39, 5 -; GCN-NEXT: v_writelane_b32 v40, s40, 6 -; GCN-NEXT: v_writelane_b32 v40, s41, 7 -; GCN-NEXT: v_writelane_b32 v40, s42, 8 -; GCN-NEXT: v_writelane_b32 v40, s43, 9 -; GCN-NEXT: v_writelane_b32 v40, s44, 10 -; GCN-NEXT: v_writelane_b32 v40, s46, 11 -; GCN-NEXT: v_writelane_b32 v40, s47, 12 -; GCN-NEXT: v_writelane_b32 v40, s48, 13 -; GCN-NEXT: v_writelane_b32 v40, s49, 14 -; GCN-NEXT: v_writelane_b32 v40, s30, 15 -; GCN-NEXT: v_writelane_b32 v40, s31, 16 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: v_writelane_b32 v40, s34, 2 +; GCN-NEXT: v_writelane_b32 v40, s35, 3 +; GCN-NEXT: v_writelane_b32 v40, s36, 4 +; GCN-NEXT: v_writelane_b32 v40, s37, 5 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 +; GCN-NEXT: v_writelane_b32 v40, s40, 8 +; GCN-NEXT: v_writelane_b32 v40, s41, 9 +; GCN-NEXT: v_writelane_b32 v40, s42, 10 +; GCN-NEXT: v_writelane_b32 v40, s43, 11 +; GCN-NEXT: v_writelane_b32 v40, s44, 12 +; GCN-NEXT: v_writelane_b32 v40, s46, 13 +; GCN-NEXT: v_writelane_b32 v40, s47, 14 +; GCN-NEXT: v_writelane_b32 v40, s48, 15 +; GCN-NEXT: v_writelane_b32 v40, s49, 16 ; GCN-NEXT: s_mov_b32 s42, s14 ; GCN-NEXT: s_mov_b32 s43, s13 ; GCN-NEXT: s_mov_b32 s44, s12 @@ -443,30 +443,30 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) { ; GCN-NEXT: s_cbranch_execnz .LBB2_1 ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[46:47] -; GCN-NEXT: v_readlane_b32 s4, v40, 15 -; GCN-NEXT: v_readlane_b32 s5, v40, 16 -; GCN-NEXT: v_readlane_b32 s49, v40, 14 -; GCN-NEXT: v_readlane_b32 s48, v40, 13 -; GCN-NEXT: v_readlane_b32 s47, v40, 12 -; GCN-NEXT: v_readlane_b32 s46, v40, 11 -; GCN-NEXT: v_readlane_b32 s44, v40, 10 -; GCN-NEXT: v_readlane_b32 s43, v40, 9 -; GCN-NEXT: v_readlane_b32 s42, v40, 8 -; GCN-NEXT: v_readlane_b32 s41, v40, 7 -; GCN-NEXT: v_readlane_b32 s40, v40, 6 -; GCN-NEXT: v_readlane_b32 s39, v40, 5 -; GCN-NEXT: v_readlane_b32 s38, v40, 4 -; GCN-NEXT: v_readlane_b32 s37, v40, 3 -; GCN-NEXT: v_readlane_b32 s36, v40, 2 -; GCN-NEXT: v_readlane_b32 s35, v40, 1 -; GCN-NEXT: v_readlane_b32 s34, v40, 0 +; GCN-NEXT: v_readlane_b32 s49, v40, 16 +; GCN-NEXT: v_readlane_b32 s48, v40, 15 +; GCN-NEXT: v_readlane_b32 s47, v40, 14 +; GCN-NEXT: v_readlane_b32 s46, v40, 13 +; GCN-NEXT: v_readlane_b32 s44, v40, 12 +; GCN-NEXT: v_readlane_b32 s43, v40, 11 +; GCN-NEXT: v_readlane_b32 s42, v40, 10 +; GCN-NEXT: v_readlane_b32 s41, v40, 9 +; GCN-NEXT: v_readlane_b32 s40, v40, 8 +; GCN-NEXT: v_readlane_b32 s39, v40, 7 +; GCN-NEXT: v_readlane_b32 s38, v40, 6 +; GCN-NEXT: v_readlane_b32 s37, v40, 5 +; GCN-NEXT: v_readlane_b32 s36, v40, 4 +; GCN-NEXT: v_readlane_b32 s35, v40, 3 +; GCN-NEXT: v_readlane_b32 s34, v40, 2 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 17 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr: ; GISEL: ; %bb.0: @@ -477,21 +477,23 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) { ; GISEL-NEXT: v_writelane_b32 v40, s33, 17 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s34, 0 -; GISEL-NEXT: v_writelane_b32 v40, s35, 1 -; GISEL-NEXT: v_writelane_b32 v40, s36, 2 -; GISEL-NEXT: v_writelane_b32 v40, s37, 3 -; GISEL-NEXT: v_writelane_b32 v40, s38, 4 -; GISEL-NEXT: v_writelane_b32 v40, s39, 5 -; GISEL-NEXT: v_writelane_b32 v40, s40, 6 -; GISEL-NEXT: v_writelane_b32 v40, s41, 7 -; GISEL-NEXT: v_writelane_b32 v40, s42, 8 -; GISEL-NEXT: v_writelane_b32 v40, s43, 9 -; GISEL-NEXT: v_writelane_b32 v40, s44, 10 -; GISEL-NEXT: v_writelane_b32 v40, s46, 11 -; GISEL-NEXT: v_writelane_b32 v40, s47, 12 -; GISEL-NEXT: v_writelane_b32 v40, s48, 13 -; GISEL-NEXT: v_writelane_b32 v40, s49, 14 +; GISEL-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL-NEXT: v_writelane_b32 v40, s46, 13 +; GISEL-NEXT: v_writelane_b32 v40, s47, 14 +; GISEL-NEXT: v_writelane_b32 v40, s48, 15 +; GISEL-NEXT: v_writelane_b32 v40, s49, 16 ; GISEL-NEXT: s_mov_b32 s42, s14 ; GISEL-NEXT: s_mov_b32 s43, s13 ; GISEL-NEXT: s_mov_b32 s44, s12 @@ -499,8 +501,6 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) { ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s30, 15 -; GISEL-NEXT: v_writelane_b32 v40, s31, 16 ; GISEL-NEXT: s_mov_b64 s[46:47], exec ; GISEL-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0 @@ -521,30 +521,30 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) { ; GISEL-NEXT: s_cbranch_execnz .LBB2_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[46:47] -; GISEL-NEXT: v_readlane_b32 s4, v40, 15 -; GISEL-NEXT: v_readlane_b32 s5, v40, 16 -; GISEL-NEXT: v_readlane_b32 s49, v40, 14 -; GISEL-NEXT: v_readlane_b32 s48, v40, 13 -; GISEL-NEXT: v_readlane_b32 s47, v40, 12 -; GISEL-NEXT: v_readlane_b32 s46, v40, 11 -; GISEL-NEXT: v_readlane_b32 s44, v40, 10 -; GISEL-NEXT: v_readlane_b32 s43, v40, 9 -; GISEL-NEXT: v_readlane_b32 s42, v40, 8 -; GISEL-NEXT: v_readlane_b32 s41, v40, 7 -; GISEL-NEXT: v_readlane_b32 s40, v40, 6 -; GISEL-NEXT: v_readlane_b32 s39, v40, 5 -; GISEL-NEXT: v_readlane_b32 s38, v40, 4 -; GISEL-NEXT: v_readlane_b32 s37, v40, 3 -; GISEL-NEXT: v_readlane_b32 s36, v40, 2 -; GISEL-NEXT: v_readlane_b32 s35, v40, 1 -; GISEL-NEXT: v_readlane_b32 s34, v40, 0 +; GISEL-NEXT: v_readlane_b32 s49, v40, 16 +; GISEL-NEXT: v_readlane_b32 s48, v40, 15 +; GISEL-NEXT: v_readlane_b32 s47, v40, 14 +; GISEL-NEXT: v_readlane_b32 s46, v40, 13 +; GISEL-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL-NEXT: v_readlane_b32 s30, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 ; GISEL-NEXT: v_readlane_b32 s33, v40, 17 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] call void %fptr() ret void } @@ -559,23 +559,23 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) { ; GCN-NEXT: v_writelane_b32 v40, s33, 17 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s34, 0 -; GCN-NEXT: v_writelane_b32 v40, s35, 1 -; GCN-NEXT: v_writelane_b32 v40, s36, 2 -; GCN-NEXT: v_writelane_b32 v40, s37, 3 -; GCN-NEXT: v_writelane_b32 v40, s38, 4 -; GCN-NEXT: v_writelane_b32 v40, s39, 5 -; GCN-NEXT: v_writelane_b32 v40, s40, 6 -; GCN-NEXT: v_writelane_b32 v40, s41, 7 -; GCN-NEXT: v_writelane_b32 v40, s42, 8 -; GCN-NEXT: v_writelane_b32 v40, s43, 9 -; GCN-NEXT: v_writelane_b32 v40, s44, 10 -; GCN-NEXT: v_writelane_b32 v40, s46, 11 -; GCN-NEXT: v_writelane_b32 v40, s47, 12 -; GCN-NEXT: v_writelane_b32 v40, s48, 13 -; GCN-NEXT: v_writelane_b32 v40, s49, 14 -; GCN-NEXT: v_writelane_b32 v40, s30, 15 -; GCN-NEXT: v_writelane_b32 v40, s31, 16 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: v_writelane_b32 v40, s34, 2 +; GCN-NEXT: v_writelane_b32 v40, s35, 3 +; GCN-NEXT: v_writelane_b32 v40, s36, 4 +; GCN-NEXT: v_writelane_b32 v40, s37, 5 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 +; GCN-NEXT: v_writelane_b32 v40, s40, 8 +; GCN-NEXT: v_writelane_b32 v40, s41, 9 +; GCN-NEXT: v_writelane_b32 v40, s42, 10 +; GCN-NEXT: v_writelane_b32 v40, s43, 11 +; GCN-NEXT: v_writelane_b32 v40, s44, 12 +; GCN-NEXT: v_writelane_b32 v40, s46, 13 +; GCN-NEXT: v_writelane_b32 v40, s47, 14 +; GCN-NEXT: v_writelane_b32 v40, s48, 15 +; GCN-NEXT: v_writelane_b32 v40, s49, 16 ; GCN-NEXT: s_mov_b32 s42, s14 ; GCN-NEXT: s_mov_b32 s43, s13 ; GCN-NEXT: s_mov_b32 s44, s12 @@ -606,30 +606,30 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) { ; GCN-NEXT: s_cbranch_execnz .LBB3_1 ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[46:47] -; GCN-NEXT: v_readlane_b32 s4, v40, 15 -; GCN-NEXT: v_readlane_b32 s5, v40, 16 -; GCN-NEXT: v_readlane_b32 s49, v40, 14 -; GCN-NEXT: v_readlane_b32 s48, v40, 13 -; GCN-NEXT: v_readlane_b32 s47, v40, 12 -; GCN-NEXT: v_readlane_b32 s46, v40, 11 -; GCN-NEXT: v_readlane_b32 s44, v40, 10 -; GCN-NEXT: v_readlane_b32 s43, v40, 9 -; GCN-NEXT: v_readlane_b32 s42, v40, 8 -; GCN-NEXT: v_readlane_b32 s41, v40, 7 -; GCN-NEXT: v_readlane_b32 s40, v40, 6 -; GCN-NEXT: v_readlane_b32 s39, v40, 5 -; GCN-NEXT: v_readlane_b32 s38, v40, 4 -; GCN-NEXT: v_readlane_b32 s37, v40, 3 -; GCN-NEXT: v_readlane_b32 s36, v40, 2 -; GCN-NEXT: v_readlane_b32 s35, v40, 1 -; GCN-NEXT: v_readlane_b32 s34, v40, 0 +; GCN-NEXT: v_readlane_b32 s49, v40, 16 +; GCN-NEXT: v_readlane_b32 s48, v40, 15 +; GCN-NEXT: v_readlane_b32 s47, v40, 14 +; GCN-NEXT: v_readlane_b32 s46, v40, 13 +; GCN-NEXT: v_readlane_b32 s44, v40, 12 +; GCN-NEXT: v_readlane_b32 s43, v40, 11 +; GCN-NEXT: v_readlane_b32 s42, v40, 10 +; GCN-NEXT: v_readlane_b32 s41, v40, 9 +; GCN-NEXT: v_readlane_b32 s40, v40, 8 +; GCN-NEXT: v_readlane_b32 s39, v40, 7 +; GCN-NEXT: v_readlane_b32 s38, v40, 6 +; GCN-NEXT: v_readlane_b32 s37, v40, 5 +; GCN-NEXT: v_readlane_b32 s36, v40, 4 +; GCN-NEXT: v_readlane_b32 s35, v40, 3 +; GCN-NEXT: v_readlane_b32 s34, v40, 2 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 17 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg: ; GISEL: ; %bb.0: @@ -640,21 +640,23 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) { ; GISEL-NEXT: v_writelane_b32 v40, s33, 17 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s34, 0 -; GISEL-NEXT: v_writelane_b32 v40, s35, 1 -; GISEL-NEXT: v_writelane_b32 v40, s36, 2 -; GISEL-NEXT: v_writelane_b32 v40, s37, 3 -; GISEL-NEXT: v_writelane_b32 v40, s38, 4 -; GISEL-NEXT: v_writelane_b32 v40, s39, 5 -; GISEL-NEXT: v_writelane_b32 v40, s40, 6 -; GISEL-NEXT: v_writelane_b32 v40, s41, 7 -; GISEL-NEXT: v_writelane_b32 v40, s42, 8 -; GISEL-NEXT: v_writelane_b32 v40, s43, 9 -; GISEL-NEXT: v_writelane_b32 v40, s44, 10 -; GISEL-NEXT: v_writelane_b32 v40, s46, 11 -; GISEL-NEXT: v_writelane_b32 v40, s47, 12 -; GISEL-NEXT: v_writelane_b32 v40, s48, 13 -; GISEL-NEXT: v_writelane_b32 v40, s49, 14 +; GISEL-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL-NEXT: v_writelane_b32 v40, s46, 13 +; GISEL-NEXT: v_writelane_b32 v40, s47, 14 +; GISEL-NEXT: v_writelane_b32 v40, s48, 15 +; GISEL-NEXT: v_writelane_b32 v40, s49, 16 ; GISEL-NEXT: s_mov_b32 s42, s14 ; GISEL-NEXT: s_mov_b32 s43, s13 ; GISEL-NEXT: s_mov_b32 s44, s12 @@ -662,8 +664,6 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) { ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s30, 15 -; GISEL-NEXT: v_writelane_b32 v40, s31, 16 ; GISEL-NEXT: s_mov_b64 s[46:47], exec ; GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0 @@ -685,30 +685,30 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) { ; GISEL-NEXT: s_cbranch_execnz .LBB3_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[46:47] -; GISEL-NEXT: v_readlane_b32 s4, v40, 15 -; GISEL-NEXT: v_readlane_b32 s5, v40, 16 -; GISEL-NEXT: v_readlane_b32 s49, v40, 14 -; GISEL-NEXT: v_readlane_b32 s48, v40, 13 -; GISEL-NEXT: v_readlane_b32 s47, v40, 12 -; GISEL-NEXT: v_readlane_b32 s46, v40, 11 -; GISEL-NEXT: v_readlane_b32 s44, v40, 10 -; GISEL-NEXT: v_readlane_b32 s43, v40, 9 -; GISEL-NEXT: v_readlane_b32 s42, v40, 8 -; GISEL-NEXT: v_readlane_b32 s41, v40, 7 -; GISEL-NEXT: v_readlane_b32 s40, v40, 6 -; GISEL-NEXT: v_readlane_b32 s39, v40, 5 -; GISEL-NEXT: v_readlane_b32 s38, v40, 4 -; GISEL-NEXT: v_readlane_b32 s37, v40, 3 -; GISEL-NEXT: v_readlane_b32 s36, v40, 2 -; GISEL-NEXT: v_readlane_b32 s35, v40, 1 -; GISEL-NEXT: v_readlane_b32 s34, v40, 0 +; GISEL-NEXT: v_readlane_b32 s49, v40, 16 +; GISEL-NEXT: v_readlane_b32 s48, v40, 15 +; GISEL-NEXT: v_readlane_b32 s47, v40, 14 +; GISEL-NEXT: v_readlane_b32 s46, v40, 13 +; GISEL-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL-NEXT: v_readlane_b32 s30, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 ; GISEL-NEXT: v_readlane_b32 s33, v40, 17 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] call void %fptr(i32 123) ret void } @@ -723,23 +723,23 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) { ; GCN-NEXT: v_writelane_b32 v40, s33, 17 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s34, 0 -; GCN-NEXT: v_writelane_b32 v40, s35, 1 -; GCN-NEXT: v_writelane_b32 v40, s36, 2 -; GCN-NEXT: v_writelane_b32 v40, s37, 3 -; GCN-NEXT: v_writelane_b32 v40, s38, 4 -; GCN-NEXT: v_writelane_b32 v40, s39, 5 -; GCN-NEXT: v_writelane_b32 v40, s40, 6 -; GCN-NEXT: v_writelane_b32 v40, s41, 7 -; GCN-NEXT: v_writelane_b32 v40, s42, 8 -; GCN-NEXT: v_writelane_b32 v40, s43, 9 -; GCN-NEXT: v_writelane_b32 v40, s44, 10 -; GCN-NEXT: v_writelane_b32 v40, s46, 11 -; GCN-NEXT: v_writelane_b32 v40, s47, 12 -; GCN-NEXT: v_writelane_b32 v40, s48, 13 -; GCN-NEXT: v_writelane_b32 v40, s49, 14 -; GCN-NEXT: v_writelane_b32 v40, s30, 15 -; GCN-NEXT: v_writelane_b32 v40, s31, 16 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: v_writelane_b32 v40, s34, 2 +; GCN-NEXT: v_writelane_b32 v40, s35, 3 +; GCN-NEXT: v_writelane_b32 v40, s36, 4 +; GCN-NEXT: v_writelane_b32 v40, s37, 5 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 +; GCN-NEXT: v_writelane_b32 v40, s40, 8 +; GCN-NEXT: v_writelane_b32 v40, s41, 9 +; GCN-NEXT: v_writelane_b32 v40, s42, 10 +; GCN-NEXT: v_writelane_b32 v40, s43, 11 +; GCN-NEXT: v_writelane_b32 v40, s44, 12 +; GCN-NEXT: v_writelane_b32 v40, s46, 13 +; GCN-NEXT: v_writelane_b32 v40, s47, 14 +; GCN-NEXT: v_writelane_b32 v40, s48, 15 +; GCN-NEXT: v_writelane_b32 v40, s49, 16 ; GCN-NEXT: s_mov_b32 s42, s14 ; GCN-NEXT: s_mov_b32 s43, s13 ; GCN-NEXT: s_mov_b32 s44, s12 @@ -769,30 +769,30 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) { ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[46:47] ; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v2 -; GCN-NEXT: v_readlane_b32 s4, v40, 15 -; GCN-NEXT: v_readlane_b32 s5, v40, 16 -; GCN-NEXT: v_readlane_b32 s49, v40, 14 -; GCN-NEXT: v_readlane_b32 s48, v40, 13 -; GCN-NEXT: v_readlane_b32 s47, v40, 12 -; GCN-NEXT: v_readlane_b32 s46, v40, 11 -; GCN-NEXT: v_readlane_b32 s44, v40, 10 -; GCN-NEXT: v_readlane_b32 s43, v40, 9 -; GCN-NEXT: v_readlane_b32 s42, v40, 8 -; GCN-NEXT: v_readlane_b32 s41, v40, 7 -; GCN-NEXT: v_readlane_b32 s40, v40, 6 -; GCN-NEXT: v_readlane_b32 s39, v40, 5 -; GCN-NEXT: v_readlane_b32 s38, v40, 4 -; GCN-NEXT: v_readlane_b32 s37, v40, 3 -; GCN-NEXT: v_readlane_b32 s36, v40, 2 -; GCN-NEXT: v_readlane_b32 s35, v40, 1 -; GCN-NEXT: v_readlane_b32 s34, v40, 0 +; GCN-NEXT: v_readlane_b32 s49, v40, 16 +; GCN-NEXT: v_readlane_b32 s48, v40, 15 +; GCN-NEXT: v_readlane_b32 s47, v40, 14 +; GCN-NEXT: v_readlane_b32 s46, v40, 13 +; GCN-NEXT: v_readlane_b32 s44, v40, 12 +; GCN-NEXT: v_readlane_b32 s43, v40, 11 +; GCN-NEXT: v_readlane_b32 s42, v40, 10 +; GCN-NEXT: v_readlane_b32 s41, v40, 9 +; GCN-NEXT: v_readlane_b32 s40, v40, 8 +; GCN-NEXT: v_readlane_b32 s39, v40, 7 +; GCN-NEXT: v_readlane_b32 s38, v40, 6 +; GCN-NEXT: v_readlane_b32 s37, v40, 5 +; GCN-NEXT: v_readlane_b32 s36, v40, 4 +; GCN-NEXT: v_readlane_b32 s35, v40, 3 +; GCN-NEXT: v_readlane_b32 s34, v40, 2 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 17 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_ret: ; GISEL: ; %bb.0: @@ -803,21 +803,23 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) { ; GISEL-NEXT: v_writelane_b32 v40, s33, 17 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s34, 0 -; GISEL-NEXT: v_writelane_b32 v40, s35, 1 -; GISEL-NEXT: v_writelane_b32 v40, s36, 2 -; GISEL-NEXT: v_writelane_b32 v40, s37, 3 -; GISEL-NEXT: v_writelane_b32 v40, s38, 4 -; GISEL-NEXT: v_writelane_b32 v40, s39, 5 -; GISEL-NEXT: v_writelane_b32 v40, s40, 6 -; GISEL-NEXT: v_writelane_b32 v40, s41, 7 -; GISEL-NEXT: v_writelane_b32 v40, s42, 8 -; GISEL-NEXT: v_writelane_b32 v40, s43, 9 -; GISEL-NEXT: v_writelane_b32 v40, s44, 10 -; GISEL-NEXT: v_writelane_b32 v40, s46, 11 -; GISEL-NEXT: v_writelane_b32 v40, s47, 12 -; GISEL-NEXT: v_writelane_b32 v40, s48, 13 -; GISEL-NEXT: v_writelane_b32 v40, s49, 14 +; GISEL-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL-NEXT: v_writelane_b32 v40, s46, 13 +; GISEL-NEXT: v_writelane_b32 v40, s47, 14 +; GISEL-NEXT: v_writelane_b32 v40, s48, 15 +; GISEL-NEXT: v_writelane_b32 v40, s49, 16 ; GISEL-NEXT: s_mov_b32 s42, s14 ; GISEL-NEXT: s_mov_b32 s43, s13 ; GISEL-NEXT: s_mov_b32 s44, s12 @@ -825,8 +827,6 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) { ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s30, 15 -; GISEL-NEXT: v_writelane_b32 v40, s31, 16 ; GISEL-NEXT: s_mov_b64 s[46:47], exec ; GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0 @@ -849,30 +849,30 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) { ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[46:47] ; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v2 -; GISEL-NEXT: v_readlane_b32 s4, v40, 15 -; GISEL-NEXT: v_readlane_b32 s5, v40, 16 -; GISEL-NEXT: v_readlane_b32 s49, v40, 14 -; GISEL-NEXT: v_readlane_b32 s48, v40, 13 -; GISEL-NEXT: v_readlane_b32 s47, v40, 12 -; GISEL-NEXT: v_readlane_b32 s46, v40, 11 -; GISEL-NEXT: v_readlane_b32 s44, v40, 10 -; GISEL-NEXT: v_readlane_b32 s43, v40, 9 -; GISEL-NEXT: v_readlane_b32 s42, v40, 8 -; GISEL-NEXT: v_readlane_b32 s41, v40, 7 -; GISEL-NEXT: v_readlane_b32 s40, v40, 6 -; GISEL-NEXT: v_readlane_b32 s39, v40, 5 -; GISEL-NEXT: v_readlane_b32 s38, v40, 4 -; GISEL-NEXT: v_readlane_b32 s37, v40, 3 -; GISEL-NEXT: v_readlane_b32 s36, v40, 2 -; GISEL-NEXT: v_readlane_b32 s35, v40, 1 -; GISEL-NEXT: v_readlane_b32 s34, v40, 0 +; GISEL-NEXT: v_readlane_b32 s49, v40, 16 +; GISEL-NEXT: v_readlane_b32 s48, v40, 15 +; GISEL-NEXT: v_readlane_b32 s47, v40, 14 +; GISEL-NEXT: v_readlane_b32 s46, v40, 13 +; GISEL-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL-NEXT: v_readlane_b32 s30, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 ; GISEL-NEXT: v_readlane_b32 s33, v40, 17 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] %a = call i32 %fptr() %b = add i32 %a, 1 ret i32 %b @@ -888,23 +888,25 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) { ; GCN-NEXT: v_writelane_b32 v40, s33, 19 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s34, 0 -; GCN-NEXT: v_writelane_b32 v40, s35, 1 -; GCN-NEXT: v_writelane_b32 v40, s36, 2 -; GCN-NEXT: v_writelane_b32 v40, s37, 3 -; GCN-NEXT: v_writelane_b32 v40, s38, 4 -; GCN-NEXT: v_writelane_b32 v40, s39, 5 -; GCN-NEXT: v_writelane_b32 v40, s40, 6 -; GCN-NEXT: v_writelane_b32 v40, s41, 7 -; GCN-NEXT: v_writelane_b32 v40, s42, 8 -; GCN-NEXT: v_writelane_b32 v40, s43, 9 -; GCN-NEXT: v_writelane_b32 v40, s44, 10 -; GCN-NEXT: v_writelane_b32 v40, s46, 11 -; GCN-NEXT: v_writelane_b32 v40, s47, 12 -; GCN-NEXT: v_writelane_b32 v40, s48, 13 -; GCN-NEXT: v_writelane_b32 v40, s49, 14 -; GCN-NEXT: v_writelane_b32 v40, s50, 15 -; GCN-NEXT: v_writelane_b32 v40, s51, 16 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: v_writelane_b32 v40, s34, 2 +; GCN-NEXT: v_writelane_b32 v40, s35, 3 +; GCN-NEXT: v_writelane_b32 v40, s36, 4 +; GCN-NEXT: v_writelane_b32 v40, s37, 5 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 +; GCN-NEXT: v_writelane_b32 v40, s40, 8 +; GCN-NEXT: v_writelane_b32 v40, s41, 9 +; GCN-NEXT: v_writelane_b32 v40, s42, 10 +; GCN-NEXT: v_writelane_b32 v40, s43, 11 +; GCN-NEXT: v_writelane_b32 v40, s44, 12 +; GCN-NEXT: v_writelane_b32 v40, s46, 13 +; GCN-NEXT: v_writelane_b32 v40, s47, 14 +; GCN-NEXT: v_writelane_b32 v40, s48, 15 +; GCN-NEXT: v_writelane_b32 v40, s49, 16 +; GCN-NEXT: v_writelane_b32 v40, s50, 17 +; GCN-NEXT: v_writelane_b32 v40, s51, 18 ; GCN-NEXT: s_mov_b32 s42, s14 ; GCN-NEXT: s_mov_b32 s43, s13 ; GCN-NEXT: s_mov_b32 s44, s12 @@ -917,8 +919,6 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) { ; GCN-NEXT: s_and_saveexec_b64 s[46:47], vcc ; GCN-NEXT: s_cbranch_execz .LBB5_4 ; GCN-NEXT: ; %bb.1: ; %bb1 -; GCN-NEXT: v_writelane_b32 v40, s30, 17 -; GCN-NEXT: v_writelane_b32 v40, s31, 18 ; GCN-NEXT: s_mov_b64 s[48:49], exec ; GCN-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s16, v0 @@ -939,27 +939,27 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) { ; GCN-NEXT: s_cbranch_execnz .LBB5_2 ; GCN-NEXT: ; %bb.3: ; GCN-NEXT: s_mov_b64 exec, s[48:49] -; GCN-NEXT: v_readlane_b32 s30, v40, 17 -; GCN-NEXT: v_readlane_b32 s31, v40, 18 ; GCN-NEXT: .LBB5_4: ; %bb2 ; GCN-NEXT: s_or_b64 exec, exec, s[46:47] -; GCN-NEXT: v_readlane_b32 s51, v40, 16 -; GCN-NEXT: v_readlane_b32 s50, v40, 15 -; GCN-NEXT: v_readlane_b32 s49, v40, 14 -; GCN-NEXT: v_readlane_b32 s48, v40, 13 -; GCN-NEXT: v_readlane_b32 s47, v40, 12 -; GCN-NEXT: v_readlane_b32 s46, v40, 11 -; GCN-NEXT: v_readlane_b32 s44, v40, 10 -; GCN-NEXT: v_readlane_b32 s43, v40, 9 -; GCN-NEXT: v_readlane_b32 s42, v40, 8 -; GCN-NEXT: v_readlane_b32 s41, v40, 7 -; GCN-NEXT: v_readlane_b32 s40, v40, 6 -; GCN-NEXT: v_readlane_b32 s39, v40, 5 -; GCN-NEXT: v_readlane_b32 s38, v40, 4 -; GCN-NEXT: v_readlane_b32 s37, v40, 3 -; GCN-NEXT: v_readlane_b32 s36, v40, 2 -; GCN-NEXT: v_readlane_b32 s35, v40, 1 -; GCN-NEXT: v_readlane_b32 s34, v40, 0 +; GCN-NEXT: v_readlane_b32 s51, v40, 18 +; GCN-NEXT: v_readlane_b32 s50, v40, 17 +; GCN-NEXT: v_readlane_b32 s49, v40, 16 +; GCN-NEXT: v_readlane_b32 s48, v40, 15 +; GCN-NEXT: v_readlane_b32 s47, v40, 14 +; GCN-NEXT: v_readlane_b32 s46, v40, 13 +; GCN-NEXT: v_readlane_b32 s44, v40, 12 +; GCN-NEXT: v_readlane_b32 s43, v40, 11 +; GCN-NEXT: v_readlane_b32 s42, v40, 10 +; GCN-NEXT: v_readlane_b32 s41, v40, 9 +; GCN-NEXT: v_readlane_b32 s40, v40, 8 +; GCN-NEXT: v_readlane_b32 s39, v40, 7 +; GCN-NEXT: v_readlane_b32 s38, v40, 6 +; GCN-NEXT: v_readlane_b32 s37, v40, 5 +; GCN-NEXT: v_readlane_b32 s36, v40, 4 +; GCN-NEXT: v_readlane_b32 s35, v40, 3 +; GCN-NEXT: v_readlane_b32 s34, v40, 2 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 19 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 @@ -977,23 +977,25 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) { ; GISEL-NEXT: v_writelane_b32 v40, s33, 19 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s34, 0 -; GISEL-NEXT: v_writelane_b32 v40, s35, 1 -; GISEL-NEXT: v_writelane_b32 v40, s36, 2 -; GISEL-NEXT: v_writelane_b32 v40, s37, 3 -; GISEL-NEXT: v_writelane_b32 v40, s38, 4 -; GISEL-NEXT: v_writelane_b32 v40, s39, 5 -; GISEL-NEXT: v_writelane_b32 v40, s40, 6 -; GISEL-NEXT: v_writelane_b32 v40, s41, 7 -; GISEL-NEXT: v_writelane_b32 v40, s42, 8 -; GISEL-NEXT: v_writelane_b32 v40, s43, 9 -; GISEL-NEXT: v_writelane_b32 v40, s44, 10 -; GISEL-NEXT: v_writelane_b32 v40, s46, 11 -; GISEL-NEXT: v_writelane_b32 v40, s47, 12 -; GISEL-NEXT: v_writelane_b32 v40, s48, 13 -; GISEL-NEXT: v_writelane_b32 v40, s49, 14 -; GISEL-NEXT: v_writelane_b32 v40, s50, 15 -; GISEL-NEXT: v_writelane_b32 v40, s51, 16 +; GISEL-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL-NEXT: v_writelane_b32 v40, s46, 13 +; GISEL-NEXT: v_writelane_b32 v40, s47, 14 +; GISEL-NEXT: v_writelane_b32 v40, s48, 15 +; GISEL-NEXT: v_writelane_b32 v40, s49, 16 +; GISEL-NEXT: v_writelane_b32 v40, s50, 17 +; GISEL-NEXT: v_writelane_b32 v40, s51, 18 ; GISEL-NEXT: s_mov_b32 s42, s14 ; GISEL-NEXT: s_mov_b32 s43, s13 ; GISEL-NEXT: s_mov_b32 s44, s12 @@ -1006,8 +1008,6 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) { ; GISEL-NEXT: s_and_saveexec_b64 s[46:47], vcc ; GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GISEL-NEXT: ; %bb.1: ; %bb1 -; GISEL-NEXT: v_writelane_b32 v40, s30, 17 -; GISEL-NEXT: v_writelane_b32 v40, s31, 18 ; GISEL-NEXT: s_mov_b64 s[48:49], exec ; GISEL-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0 @@ -1028,27 +1028,27 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) { ; GISEL-NEXT: s_cbranch_execnz .LBB5_2 ; GISEL-NEXT: ; %bb.3: ; GISEL-NEXT: s_mov_b64 exec, s[48:49] -; GISEL-NEXT: v_readlane_b32 s30, v40, 17 -; GISEL-NEXT: v_readlane_b32 s31, v40, 18 ; GISEL-NEXT: .LBB5_4: ; %bb2 ; GISEL-NEXT: s_or_b64 exec, exec, s[46:47] -; GISEL-NEXT: v_readlane_b32 s51, v40, 16 -; GISEL-NEXT: v_readlane_b32 s50, v40, 15 -; GISEL-NEXT: v_readlane_b32 s49, v40, 14 -; GISEL-NEXT: v_readlane_b32 s48, v40, 13 -; GISEL-NEXT: v_readlane_b32 s47, v40, 12 -; GISEL-NEXT: v_readlane_b32 s46, v40, 11 -; GISEL-NEXT: v_readlane_b32 s44, v40, 10 -; GISEL-NEXT: v_readlane_b32 s43, v40, 9 -; GISEL-NEXT: v_readlane_b32 s42, v40, 8 -; GISEL-NEXT: v_readlane_b32 s41, v40, 7 -; GISEL-NEXT: v_readlane_b32 s40, v40, 6 -; GISEL-NEXT: v_readlane_b32 s39, v40, 5 -; GISEL-NEXT: v_readlane_b32 s38, v40, 4 -; GISEL-NEXT: v_readlane_b32 s37, v40, 3 -; GISEL-NEXT: v_readlane_b32 s36, v40, 2 -; GISEL-NEXT: v_readlane_b32 s35, v40, 1 -; GISEL-NEXT: v_readlane_b32 s34, v40, 0 +; GISEL-NEXT: v_readlane_b32 s51, v40, 18 +; GISEL-NEXT: v_readlane_b32 s50, v40, 17 +; GISEL-NEXT: v_readlane_b32 s49, v40, 16 +; GISEL-NEXT: v_readlane_b32 s48, v40, 15 +; GISEL-NEXT: v_readlane_b32 s47, v40, 14 +; GISEL-NEXT: v_readlane_b32 s46, v40, 13 +; GISEL-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL-NEXT: v_readlane_b32 s30, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 ; GISEL-NEXT: v_readlane_b32 s33, v40, 19 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 @@ -1074,90 +1074,93 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) { ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 30 +; GCN-NEXT: v_writelane_b32 v40, s33, 32 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s34, 0 -; GCN-NEXT: v_writelane_b32 v40, s35, 1 -; GCN-NEXT: v_writelane_b32 v40, s36, 2 -; GCN-NEXT: v_writelane_b32 v40, s37, 3 -; GCN-NEXT: v_writelane_b32 v40, s38, 4 -; GCN-NEXT: v_writelane_b32 v40, s39, 5 -; GCN-NEXT: v_writelane_b32 v40, s40, 6 -; GCN-NEXT: v_writelane_b32 v40, s41, 7 -; GCN-NEXT: v_writelane_b32 v40, s42, 8 -; GCN-NEXT: v_writelane_b32 v40, s43, 9 -; GCN-NEXT: v_writelane_b32 v40, s44, 10 -; GCN-NEXT: v_writelane_b32 v40, s45, 11 -; GCN-NEXT: v_writelane_b32 v40, s46, 12 -; GCN-NEXT: v_writelane_b32 v40, s47, 13 -; GCN-NEXT: v_writelane_b32 v40, s48, 14 -; GCN-NEXT: v_writelane_b32 v40, s49, 15 -; GCN-NEXT: v_writelane_b32 v40, s50, 16 -; GCN-NEXT: v_writelane_b32 v40, s51, 17 -; GCN-NEXT: v_writelane_b32 v40, s52, 18 -; GCN-NEXT: v_writelane_b32 v40, s53, 19 -; GCN-NEXT: v_writelane_b32 v40, s54, 20 -; GCN-NEXT: v_writelane_b32 v40, s55, 21 -; GCN-NEXT: v_writelane_b32 v40, s56, 22 -; GCN-NEXT: v_writelane_b32 v40, s57, 23 -; GCN-NEXT: v_writelane_b32 v40, s58, 24 -; GCN-NEXT: v_writelane_b32 v40, s59, 25 -; GCN-NEXT: v_writelane_b32 v40, s60, 26 -; GCN-NEXT: v_writelane_b32 v40, s61, 27 -; GCN-NEXT: v_writelane_b32 v40, s62, 28 -; GCN-NEXT: v_writelane_b32 v40, s63, 29 -; GCN-NEXT: s_mov_b64 s[6:7], s[30:31] -; GCN-NEXT: s_mov_b64 s[8:9], exec +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: v_writelane_b32 v40, s34, 2 +; GCN-NEXT: v_writelane_b32 v40, s35, 3 +; GCN-NEXT: v_writelane_b32 v40, s36, 4 +; GCN-NEXT: v_writelane_b32 v40, s37, 5 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 +; GCN-NEXT: v_writelane_b32 v40, s40, 8 +; GCN-NEXT: v_writelane_b32 v40, s41, 9 +; GCN-NEXT: v_writelane_b32 v40, s42, 10 +; GCN-NEXT: v_writelane_b32 v40, s43, 11 +; GCN-NEXT: v_writelane_b32 v40, s44, 12 +; GCN-NEXT: v_writelane_b32 v40, s45, 13 +; GCN-NEXT: v_writelane_b32 v40, s46, 14 +; GCN-NEXT: v_writelane_b32 v40, s47, 15 +; GCN-NEXT: v_writelane_b32 v40, s48, 16 +; GCN-NEXT: v_writelane_b32 v40, s49, 17 +; GCN-NEXT: v_writelane_b32 v40, s50, 18 +; GCN-NEXT: v_writelane_b32 v40, s51, 19 +; GCN-NEXT: v_writelane_b32 v40, s52, 20 +; GCN-NEXT: v_writelane_b32 v40, s53, 21 +; GCN-NEXT: v_writelane_b32 v40, s54, 22 +; GCN-NEXT: v_writelane_b32 v40, s55, 23 +; GCN-NEXT: v_writelane_b32 v40, s56, 24 +; GCN-NEXT: v_writelane_b32 v40, s57, 25 +; GCN-NEXT: v_writelane_b32 v40, s58, 26 +; GCN-NEXT: v_writelane_b32 v40, s59, 27 +; GCN-NEXT: v_writelane_b32 v40, s60, 28 +; GCN-NEXT: v_writelane_b32 v40, s61, 29 +; GCN-NEXT: v_writelane_b32 v40, s62, 30 +; GCN-NEXT: v_writelane_b32 v40, s63, 31 +; GCN-NEXT: s_mov_b64 s[6:7], exec ; GCN-NEXT: s_movk_i32 s4, 0x7b ; GCN-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s12, v0 -; GCN-NEXT: v_readfirstlane_b32 s13, v1 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[12:13], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[10:11], vcc -; GCN-NEXT: s_swappc_b64 s[30:31], s[12:13] +; GCN-NEXT: v_readfirstlane_b32 s10, v0 +; GCN-NEXT: v_readfirstlane_b32 s11, v1 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1] +; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11] ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GCN-NEXT: s_xor_b64 exec, exec, s[10:11] +; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] ; GCN-NEXT: s_cbranch_execnz .LBB6_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[8:9] -; GCN-NEXT: v_readlane_b32 s63, v40, 29 -; GCN-NEXT: v_readlane_b32 s62, v40, 28 -; GCN-NEXT: v_readlane_b32 s61, v40, 27 -; GCN-NEXT: v_readlane_b32 s60, v40, 26 -; GCN-NEXT: v_readlane_b32 s59, v40, 25 -; GCN-NEXT: v_readlane_b32 s58, v40, 24 -; GCN-NEXT: v_readlane_b32 s57, v40, 23 -; GCN-NEXT: v_readlane_b32 s56, v40, 22 -; GCN-NEXT: v_readlane_b32 s55, v40, 21 -; GCN-NEXT: v_readlane_b32 s54, v40, 20 -; GCN-NEXT: v_readlane_b32 s53, v40, 19 -; GCN-NEXT: v_readlane_b32 s52, v40, 18 -; GCN-NEXT: v_readlane_b32 s51, v40, 17 -; GCN-NEXT: v_readlane_b32 s50, v40, 16 -; GCN-NEXT: v_readlane_b32 s49, v40, 15 -; GCN-NEXT: v_readlane_b32 s48, v40, 14 -; GCN-NEXT: v_readlane_b32 s47, v40, 13 -; GCN-NEXT: v_readlane_b32 s46, v40, 12 -; GCN-NEXT: v_readlane_b32 s45, v40, 11 -; GCN-NEXT: v_readlane_b32 s44, v40, 10 -; GCN-NEXT: v_readlane_b32 s43, v40, 9 -; GCN-NEXT: v_readlane_b32 s42, v40, 8 -; GCN-NEXT: v_readlane_b32 s41, v40, 7 -; GCN-NEXT: v_readlane_b32 s40, v40, 6 -; GCN-NEXT: v_readlane_b32 s39, v40, 5 -; GCN-NEXT: v_readlane_b32 s38, v40, 4 -; GCN-NEXT: v_readlane_b32 s37, v40, 3 -; GCN-NEXT: v_readlane_b32 s36, v40, 2 -; GCN-NEXT: v_readlane_b32 s35, v40, 1 -; GCN-NEXT: v_readlane_b32 s34, v40, 0 +; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: v_readlane_b32 s63, v40, 31 +; GCN-NEXT: v_readlane_b32 s62, v40, 30 +; GCN-NEXT: v_readlane_b32 s61, v40, 29 +; GCN-NEXT: v_readlane_b32 s60, v40, 28 +; GCN-NEXT: v_readlane_b32 s59, v40, 27 +; GCN-NEXT: v_readlane_b32 s58, v40, 26 +; GCN-NEXT: v_readlane_b32 s57, v40, 25 +; GCN-NEXT: v_readlane_b32 s56, v40, 24 +; GCN-NEXT: v_readlane_b32 s55, v40, 23 +; GCN-NEXT: v_readlane_b32 s54, v40, 22 +; GCN-NEXT: v_readlane_b32 s53, v40, 21 +; GCN-NEXT: v_readlane_b32 s52, v40, 20 +; GCN-NEXT: v_readlane_b32 s51, v40, 19 +; GCN-NEXT: v_readlane_b32 s50, v40, 18 +; GCN-NEXT: v_readlane_b32 s49, v40, 17 +; GCN-NEXT: v_readlane_b32 s48, v40, 16 +; GCN-NEXT: v_readlane_b32 s47, v40, 15 +; GCN-NEXT: v_readlane_b32 s46, v40, 14 +; GCN-NEXT: v_readlane_b32 s45, v40, 13 +; GCN-NEXT: v_readlane_b32 s44, v40, 12 +; GCN-NEXT: v_readlane_b32 s43, v40, 11 +; GCN-NEXT: v_readlane_b32 s42, v40, 10 +; GCN-NEXT: v_readlane_b32 s41, v40, 9 +; GCN-NEXT: v_readlane_b32 s40, v40, 8 +; GCN-NEXT: v_readlane_b32 s39, v40, 7 +; GCN-NEXT: v_readlane_b32 s38, v40, 6 +; GCN-NEXT: v_readlane_b32 s37, v40, 5 +; GCN-NEXT: v_readlane_b32 s36, v40, 4 +; GCN-NEXT: v_readlane_b32 s35, v40, 3 +; GCN-NEXT: v_readlane_b32 s34, v40, 2 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 30 +; GCN-NEXT: v_readlane_b32 s33, v40, 32 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[6:7] +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: ; GISEL: ; %bb.0: @@ -1165,90 +1168,93 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) { ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 30 +; GISEL-NEXT: v_writelane_b32 v40, s33, 32 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s34, 0 -; GISEL-NEXT: v_writelane_b32 v40, s35, 1 -; GISEL-NEXT: v_writelane_b32 v40, s36, 2 -; GISEL-NEXT: v_writelane_b32 v40, s37, 3 -; GISEL-NEXT: v_writelane_b32 v40, s38, 4 -; GISEL-NEXT: v_writelane_b32 v40, s39, 5 -; GISEL-NEXT: v_writelane_b32 v40, s40, 6 -; GISEL-NEXT: v_writelane_b32 v40, s41, 7 -; GISEL-NEXT: v_writelane_b32 v40, s42, 8 -; GISEL-NEXT: v_writelane_b32 v40, s43, 9 -; GISEL-NEXT: v_writelane_b32 v40, s44, 10 -; GISEL-NEXT: v_writelane_b32 v40, s45, 11 -; GISEL-NEXT: v_writelane_b32 v40, s46, 12 -; GISEL-NEXT: v_writelane_b32 v40, s47, 13 -; GISEL-NEXT: v_writelane_b32 v40, s48, 14 -; GISEL-NEXT: v_writelane_b32 v40, s49, 15 -; GISEL-NEXT: v_writelane_b32 v40, s50, 16 -; GISEL-NEXT: v_writelane_b32 v40, s51, 17 -; GISEL-NEXT: v_writelane_b32 v40, s52, 18 -; GISEL-NEXT: v_writelane_b32 v40, s53, 19 -; GISEL-NEXT: v_writelane_b32 v40, s54, 20 -; GISEL-NEXT: v_writelane_b32 v40, s55, 21 -; GISEL-NEXT: v_writelane_b32 v40, s56, 22 -; GISEL-NEXT: v_writelane_b32 v40, s57, 23 -; GISEL-NEXT: v_writelane_b32 v40, s58, 24 -; GISEL-NEXT: v_writelane_b32 v40, s59, 25 -; GISEL-NEXT: v_writelane_b32 v40, s60, 26 -; GISEL-NEXT: v_writelane_b32 v40, s61, 27 -; GISEL-NEXT: v_writelane_b32 v40, s62, 28 -; GISEL-NEXT: v_writelane_b32 v40, s63, 29 -; GISEL-NEXT: s_mov_b64 s[6:7], s[30:31] +; GISEL-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL-NEXT: v_writelane_b32 v40, s50, 18 +; GISEL-NEXT: v_writelane_b32 v40, s51, 19 +; GISEL-NEXT: v_writelane_b32 v40, s52, 20 +; GISEL-NEXT: v_writelane_b32 v40, s53, 21 +; GISEL-NEXT: v_writelane_b32 v40, s54, 22 +; GISEL-NEXT: v_writelane_b32 v40, s55, 23 +; GISEL-NEXT: v_writelane_b32 v40, s56, 24 +; GISEL-NEXT: v_writelane_b32 v40, s57, 25 +; GISEL-NEXT: v_writelane_b32 v40, s58, 26 +; GISEL-NEXT: v_writelane_b32 v40, s59, 27 +; GISEL-NEXT: v_writelane_b32 v40, s60, 28 +; GISEL-NEXT: v_writelane_b32 v40, s61, 29 +; GISEL-NEXT: v_writelane_b32 v40, s62, 30 +; GISEL-NEXT: v_writelane_b32 v40, s63, 31 ; GISEL-NEXT: s_movk_i32 s4, 0x7b -; GISEL-NEXT: s_mov_b64 s[8:9], exec +; GISEL-NEXT: s_mov_b64 s[6:7], exec ; GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s10, v0 -; GISEL-NEXT: v_readfirstlane_b32 s11, v1 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc -; GISEL-NEXT: s_swappc_b64 s[30:31], s[10:11] +; GISEL-NEXT: v_readfirstlane_b32 s8, v0 +; GISEL-NEXT: v_readfirstlane_b32 s9, v1 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] +; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc +; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GISEL-NEXT: s_xor_b64 exec, exec, s[12:13] +; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] ; GISEL-NEXT: s_cbranch_execnz .LBB6_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[8:9] -; GISEL-NEXT: v_readlane_b32 s63, v40, 29 -; GISEL-NEXT: v_readlane_b32 s62, v40, 28 -; GISEL-NEXT: v_readlane_b32 s61, v40, 27 -; GISEL-NEXT: v_readlane_b32 s60, v40, 26 -; GISEL-NEXT: v_readlane_b32 s59, v40, 25 -; GISEL-NEXT: v_readlane_b32 s58, v40, 24 -; GISEL-NEXT: v_readlane_b32 s57, v40, 23 -; GISEL-NEXT: v_readlane_b32 s56, v40, 22 -; GISEL-NEXT: v_readlane_b32 s55, v40, 21 -; GISEL-NEXT: v_readlane_b32 s54, v40, 20 -; GISEL-NEXT: v_readlane_b32 s53, v40, 19 -; GISEL-NEXT: v_readlane_b32 s52, v40, 18 -; GISEL-NEXT: v_readlane_b32 s51, v40, 17 -; GISEL-NEXT: v_readlane_b32 s50, v40, 16 -; GISEL-NEXT: v_readlane_b32 s49, v40, 15 -; GISEL-NEXT: v_readlane_b32 s48, v40, 14 -; GISEL-NEXT: v_readlane_b32 s47, v40, 13 -; GISEL-NEXT: v_readlane_b32 s46, v40, 12 -; GISEL-NEXT: v_readlane_b32 s45, v40, 11 -; GISEL-NEXT: v_readlane_b32 s44, v40, 10 -; GISEL-NEXT: v_readlane_b32 s43, v40, 9 -; GISEL-NEXT: v_readlane_b32 s42, v40, 8 -; GISEL-NEXT: v_readlane_b32 s41, v40, 7 -; GISEL-NEXT: v_readlane_b32 s40, v40, 6 -; GISEL-NEXT: v_readlane_b32 s39, v40, 5 -; GISEL-NEXT: v_readlane_b32 s38, v40, 4 -; GISEL-NEXT: v_readlane_b32 s37, v40, 3 -; GISEL-NEXT: v_readlane_b32 s36, v40, 2 -; GISEL-NEXT: v_readlane_b32 s35, v40, 1 -; GISEL-NEXT: v_readlane_b32 s34, v40, 0 +; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: v_readlane_b32 s63, v40, 31 +; GISEL-NEXT: v_readlane_b32 s62, v40, 30 +; GISEL-NEXT: v_readlane_b32 s61, v40, 29 +; GISEL-NEXT: v_readlane_b32 s60, v40, 28 +; GISEL-NEXT: v_readlane_b32 s59, v40, 27 +; GISEL-NEXT: v_readlane_b32 s58, v40, 26 +; GISEL-NEXT: v_readlane_b32 s57, v40, 25 +; GISEL-NEXT: v_readlane_b32 s56, v40, 24 +; GISEL-NEXT: v_readlane_b32 s55, v40, 23 +; GISEL-NEXT: v_readlane_b32 s54, v40, 22 +; GISEL-NEXT: v_readlane_b32 s53, v40, 21 +; GISEL-NEXT: v_readlane_b32 s52, v40, 20 +; GISEL-NEXT: v_readlane_b32 s51, v40, 19 +; GISEL-NEXT: v_readlane_b32 s50, v40, 18 +; GISEL-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL-NEXT: v_readlane_b32 s30, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 30 +; GISEL-NEXT: v_readlane_b32 s33, v40, 32 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[6:7] +; GISEL-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void %fptr(i32 inreg 123) ret void } @@ -1260,94 +1266,97 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr) ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 30 +; GCN-NEXT: v_writelane_b32 v40, s33, 32 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_writelane_b32 v40, s34, 0 -; GCN-NEXT: v_writelane_b32 v40, s35, 1 -; GCN-NEXT: v_writelane_b32 v40, s36, 2 -; GCN-NEXT: v_writelane_b32 v40, s37, 3 -; GCN-NEXT: v_writelane_b32 v40, s38, 4 -; GCN-NEXT: v_writelane_b32 v40, s39, 5 -; GCN-NEXT: v_writelane_b32 v40, s40, 6 -; GCN-NEXT: v_writelane_b32 v40, s41, 7 -; GCN-NEXT: v_writelane_b32 v40, s42, 8 -; GCN-NEXT: v_writelane_b32 v40, s43, 9 -; GCN-NEXT: v_writelane_b32 v40, s44, 10 -; GCN-NEXT: v_writelane_b32 v40, s45, 11 -; GCN-NEXT: v_writelane_b32 v40, s46, 12 -; GCN-NEXT: v_writelane_b32 v40, s47, 13 -; GCN-NEXT: v_writelane_b32 v40, s48, 14 -; GCN-NEXT: v_writelane_b32 v40, s49, 15 -; GCN-NEXT: v_writelane_b32 v40, s50, 16 -; GCN-NEXT: v_writelane_b32 v40, s51, 17 -; GCN-NEXT: v_writelane_b32 v40, s52, 18 -; GCN-NEXT: v_writelane_b32 v40, s53, 19 -; GCN-NEXT: v_writelane_b32 v40, s54, 20 -; GCN-NEXT: v_writelane_b32 v40, s55, 21 -; GCN-NEXT: v_writelane_b32 v40, s56, 22 -; GCN-NEXT: v_writelane_b32 v40, s57, 23 -; GCN-NEXT: v_writelane_b32 v40, s58, 24 -; GCN-NEXT: v_writelane_b32 v40, s59, 25 -; GCN-NEXT: v_writelane_b32 v40, s60, 26 -; GCN-NEXT: v_writelane_b32 v40, s61, 27 -; GCN-NEXT: v_writelane_b32 v40, s62, 28 -; GCN-NEXT: v_writelane_b32 v40, s63, 29 -; GCN-NEXT: s_mov_b64 s[4:5], s[30:31] +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: v_writelane_b32 v40, s34, 2 +; GCN-NEXT: v_writelane_b32 v40, s35, 3 +; GCN-NEXT: v_writelane_b32 v40, s36, 4 +; GCN-NEXT: v_writelane_b32 v40, s37, 5 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 +; GCN-NEXT: v_writelane_b32 v40, s40, 8 +; GCN-NEXT: v_writelane_b32 v40, s41, 9 +; GCN-NEXT: v_writelane_b32 v40, s42, 10 +; GCN-NEXT: v_writelane_b32 v40, s43, 11 +; GCN-NEXT: v_writelane_b32 v40, s44, 12 +; GCN-NEXT: v_writelane_b32 v40, s45, 13 +; GCN-NEXT: v_writelane_b32 v40, s46, 14 +; GCN-NEXT: v_writelane_b32 v40, s47, 15 +; GCN-NEXT: v_writelane_b32 v40, s48, 16 +; GCN-NEXT: v_writelane_b32 v40, s49, 17 +; GCN-NEXT: v_writelane_b32 v40, s50, 18 +; GCN-NEXT: v_writelane_b32 v40, s51, 19 +; GCN-NEXT: v_writelane_b32 v40, s52, 20 +; GCN-NEXT: v_writelane_b32 v40, s53, 21 +; GCN-NEXT: v_writelane_b32 v40, s54, 22 +; GCN-NEXT: v_writelane_b32 v40, s55, 23 +; GCN-NEXT: v_writelane_b32 v40, s56, 24 +; GCN-NEXT: v_writelane_b32 v40, s57, 25 +; GCN-NEXT: v_writelane_b32 v40, s58, 26 +; GCN-NEXT: v_writelane_b32 v40, s59, 27 +; GCN-NEXT: v_writelane_b32 v40, s60, 28 +; GCN-NEXT: v_writelane_b32 v40, s61, 29 +; GCN-NEXT: v_writelane_b32 v40, s62, 30 +; GCN-NEXT: v_writelane_b32 v40, s63, 31 ; GCN-NEXT: v_mov_b32_e32 v41, v0 -; GCN-NEXT: s_mov_b64 s[6:7], exec +; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s10, v1 -; GCN-NEXT: v_readfirstlane_b32 s11, v2 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[1:2] -; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GCN-NEXT: v_readfirstlane_b32 s8, v1 +; GCN-NEXT: v_readfirstlane_b32 s9, v2 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] +; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc ; GCN-NEXT: v_mov_b32_e32 v0, v41 -; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11] +; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] +; GCN-NEXT: s_xor_b64 exec, exec, s[6:7] ; GCN-NEXT: s_cbranch_execnz .LBB7_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_mov_b32_e32 v0, v41 -; GCN-NEXT: v_readlane_b32 s63, v40, 29 -; GCN-NEXT: v_readlane_b32 s62, v40, 28 -; GCN-NEXT: v_readlane_b32 s61, v40, 27 -; GCN-NEXT: v_readlane_b32 s60, v40, 26 -; GCN-NEXT: v_readlane_b32 s59, v40, 25 -; GCN-NEXT: v_readlane_b32 s58, v40, 24 -; GCN-NEXT: v_readlane_b32 s57, v40, 23 -; GCN-NEXT: v_readlane_b32 s56, v40, 22 -; GCN-NEXT: v_readlane_b32 s55, v40, 21 -; GCN-NEXT: v_readlane_b32 s54, v40, 20 -; GCN-NEXT: v_readlane_b32 s53, v40, 19 -; GCN-NEXT: v_readlane_b32 s52, v40, 18 -; GCN-NEXT: v_readlane_b32 s51, v40, 17 -; GCN-NEXT: v_readlane_b32 s50, v40, 16 -; GCN-NEXT: v_readlane_b32 s49, v40, 15 -; GCN-NEXT: v_readlane_b32 s48, v40, 14 -; GCN-NEXT: v_readlane_b32 s47, v40, 13 -; GCN-NEXT: v_readlane_b32 s46, v40, 12 -; GCN-NEXT: v_readlane_b32 s45, v40, 11 -; GCN-NEXT: v_readlane_b32 s44, v40, 10 -; GCN-NEXT: v_readlane_b32 s43, v40, 9 -; GCN-NEXT: v_readlane_b32 s42, v40, 8 -; GCN-NEXT: v_readlane_b32 s41, v40, 7 -; GCN-NEXT: v_readlane_b32 s40, v40, 6 -; GCN-NEXT: v_readlane_b32 s39, v40, 5 -; GCN-NEXT: v_readlane_b32 s38, v40, 4 -; GCN-NEXT: v_readlane_b32 s37, v40, 3 -; GCN-NEXT: v_readlane_b32 s36, v40, 2 -; GCN-NEXT: v_readlane_b32 s35, v40, 1 -; GCN-NEXT: v_readlane_b32 s34, v40, 0 +; GCN-NEXT: v_readlane_b32 s63, v40, 31 +; GCN-NEXT: v_readlane_b32 s62, v40, 30 +; GCN-NEXT: v_readlane_b32 s61, v40, 29 +; GCN-NEXT: v_readlane_b32 s60, v40, 28 +; GCN-NEXT: v_readlane_b32 s59, v40, 27 +; GCN-NEXT: v_readlane_b32 s58, v40, 26 +; GCN-NEXT: v_readlane_b32 s57, v40, 25 +; GCN-NEXT: v_readlane_b32 s56, v40, 24 +; GCN-NEXT: v_readlane_b32 s55, v40, 23 +; GCN-NEXT: v_readlane_b32 s54, v40, 22 +; GCN-NEXT: v_readlane_b32 s53, v40, 21 +; GCN-NEXT: v_readlane_b32 s52, v40, 20 +; GCN-NEXT: v_readlane_b32 s51, v40, 19 +; GCN-NEXT: v_readlane_b32 s50, v40, 18 +; GCN-NEXT: v_readlane_b32 s49, v40, 17 +; GCN-NEXT: v_readlane_b32 s48, v40, 16 +; GCN-NEXT: v_readlane_b32 s47, v40, 15 +; GCN-NEXT: v_readlane_b32 s46, v40, 14 +; GCN-NEXT: v_readlane_b32 s45, v40, 13 +; GCN-NEXT: v_readlane_b32 s44, v40, 12 +; GCN-NEXT: v_readlane_b32 s43, v40, 11 +; GCN-NEXT: v_readlane_b32 s42, v40, 10 +; GCN-NEXT: v_readlane_b32 s41, v40, 9 +; GCN-NEXT: v_readlane_b32 s40, v40, 8 +; GCN-NEXT: v_readlane_b32 s39, v40, 7 +; GCN-NEXT: v_readlane_b32 s38, v40, 6 +; GCN-NEXT: v_readlane_b32 s37, v40, 5 +; GCN-NEXT: v_readlane_b32 s36, v40, 4 +; GCN-NEXT: v_readlane_b32 s35, v40, 3 +; GCN-NEXT: v_readlane_b32 s34, v40, 2 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 30 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: v_readlane_b32 s33, v40, 32 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: ; GISEL: ; %bb.0: @@ -1355,94 +1364,97 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr) ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 30 +; GISEL-NEXT: v_writelane_b32 v40, s33, 32 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; GISEL-NEXT: v_writelane_b32 v40, s34, 0 -; GISEL-NEXT: v_writelane_b32 v40, s35, 1 -; GISEL-NEXT: v_writelane_b32 v40, s36, 2 -; GISEL-NEXT: v_writelane_b32 v40, s37, 3 -; GISEL-NEXT: v_writelane_b32 v40, s38, 4 -; GISEL-NEXT: v_writelane_b32 v40, s39, 5 -; GISEL-NEXT: v_writelane_b32 v40, s40, 6 -; GISEL-NEXT: v_writelane_b32 v40, s41, 7 -; GISEL-NEXT: v_writelane_b32 v40, s42, 8 -; GISEL-NEXT: v_writelane_b32 v40, s43, 9 -; GISEL-NEXT: v_writelane_b32 v40, s44, 10 -; GISEL-NEXT: v_writelane_b32 v40, s45, 11 -; GISEL-NEXT: v_writelane_b32 v40, s46, 12 -; GISEL-NEXT: v_writelane_b32 v40, s47, 13 -; GISEL-NEXT: v_writelane_b32 v40, s48, 14 -; GISEL-NEXT: v_writelane_b32 v40, s49, 15 -; GISEL-NEXT: v_writelane_b32 v40, s50, 16 -; GISEL-NEXT: v_writelane_b32 v40, s51, 17 -; GISEL-NEXT: v_writelane_b32 v40, s52, 18 -; GISEL-NEXT: v_writelane_b32 v40, s53, 19 -; GISEL-NEXT: v_writelane_b32 v40, s54, 20 -; GISEL-NEXT: v_writelane_b32 v40, s55, 21 -; GISEL-NEXT: v_writelane_b32 v40, s56, 22 -; GISEL-NEXT: v_writelane_b32 v40, s57, 23 -; GISEL-NEXT: v_writelane_b32 v40, s58, 24 -; GISEL-NEXT: v_writelane_b32 v40, s59, 25 -; GISEL-NEXT: v_writelane_b32 v40, s60, 26 -; GISEL-NEXT: v_writelane_b32 v40, s61, 27 -; GISEL-NEXT: v_writelane_b32 v40, s62, 28 -; GISEL-NEXT: v_writelane_b32 v40, s63, 29 +; GISEL-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL-NEXT: v_writelane_b32 v40, s50, 18 +; GISEL-NEXT: v_writelane_b32 v40, s51, 19 +; GISEL-NEXT: v_writelane_b32 v40, s52, 20 +; GISEL-NEXT: v_writelane_b32 v40, s53, 21 +; GISEL-NEXT: v_writelane_b32 v40, s54, 22 +; GISEL-NEXT: v_writelane_b32 v40, s55, 23 +; GISEL-NEXT: v_writelane_b32 v40, s56, 24 +; GISEL-NEXT: v_writelane_b32 v40, s57, 25 +; GISEL-NEXT: v_writelane_b32 v40, s58, 26 +; GISEL-NEXT: v_writelane_b32 v40, s59, 27 +; GISEL-NEXT: v_writelane_b32 v40, s60, 28 +; GISEL-NEXT: v_writelane_b32 v40, s61, 29 +; GISEL-NEXT: v_writelane_b32 v40, s62, 30 +; GISEL-NEXT: v_writelane_b32 v40, s63, 31 ; GISEL-NEXT: v_mov_b32_e32 v41, v0 -; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31] -; GISEL-NEXT: s_mov_b64 s[6:7], exec +; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s8, v1 -; GISEL-NEXT: v_readfirstlane_b32 s9, v2 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] -; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc +; GISEL-NEXT: v_readfirstlane_b32 s6, v1 +; GISEL-NEXT: v_readfirstlane_b32 s7, v2 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GISEL-NEXT: v_mov_b32_e32 v0, v41 -; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] +; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9] ; GISEL-NEXT: s_cbranch_execnz .LBB7_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: v_mov_b32_e32 v0, v41 -; GISEL-NEXT: v_readlane_b32 s63, v40, 29 -; GISEL-NEXT: v_readlane_b32 s62, v40, 28 -; GISEL-NEXT: v_readlane_b32 s61, v40, 27 -; GISEL-NEXT: v_readlane_b32 s60, v40, 26 -; GISEL-NEXT: v_readlane_b32 s59, v40, 25 -; GISEL-NEXT: v_readlane_b32 s58, v40, 24 -; GISEL-NEXT: v_readlane_b32 s57, v40, 23 -; GISEL-NEXT: v_readlane_b32 s56, v40, 22 -; GISEL-NEXT: v_readlane_b32 s55, v40, 21 -; GISEL-NEXT: v_readlane_b32 s54, v40, 20 -; GISEL-NEXT: v_readlane_b32 s53, v40, 19 -; GISEL-NEXT: v_readlane_b32 s52, v40, 18 -; GISEL-NEXT: v_readlane_b32 s51, v40, 17 -; GISEL-NEXT: v_readlane_b32 s50, v40, 16 -; GISEL-NEXT: v_readlane_b32 s49, v40, 15 -; GISEL-NEXT: v_readlane_b32 s48, v40, 14 -; GISEL-NEXT: v_readlane_b32 s47, v40, 13 -; GISEL-NEXT: v_readlane_b32 s46, v40, 12 -; GISEL-NEXT: v_readlane_b32 s45, v40, 11 -; GISEL-NEXT: v_readlane_b32 s44, v40, 10 -; GISEL-NEXT: v_readlane_b32 s43, v40, 9 -; GISEL-NEXT: v_readlane_b32 s42, v40, 8 -; GISEL-NEXT: v_readlane_b32 s41, v40, 7 -; GISEL-NEXT: v_readlane_b32 s40, v40, 6 -; GISEL-NEXT: v_readlane_b32 s39, v40, 5 -; GISEL-NEXT: v_readlane_b32 s38, v40, 4 -; GISEL-NEXT: v_readlane_b32 s37, v40, 3 -; GISEL-NEXT: v_readlane_b32 s36, v40, 2 -; GISEL-NEXT: v_readlane_b32 s35, v40, 1 -; GISEL-NEXT: v_readlane_b32 s34, v40, 0 +; GISEL-NEXT: v_readlane_b32 s63, v40, 31 +; GISEL-NEXT: v_readlane_b32 s62, v40, 30 +; GISEL-NEXT: v_readlane_b32 s61, v40, 29 +; GISEL-NEXT: v_readlane_b32 s60, v40, 28 +; GISEL-NEXT: v_readlane_b32 s59, v40, 27 +; GISEL-NEXT: v_readlane_b32 s58, v40, 26 +; GISEL-NEXT: v_readlane_b32 s57, v40, 25 +; GISEL-NEXT: v_readlane_b32 s56, v40, 24 +; GISEL-NEXT: v_readlane_b32 s55, v40, 23 +; GISEL-NEXT: v_readlane_b32 s54, v40, 22 +; GISEL-NEXT: v_readlane_b32 s53, v40, 21 +; GISEL-NEXT: v_readlane_b32 s52, v40, 20 +; GISEL-NEXT: v_readlane_b32 s51, v40, 19 +; GISEL-NEXT: v_readlane_b32 s50, v40, 18 +; GISEL-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL-NEXT: v_readlane_b32 s30, v40, 0 ; GISEL-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 30 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: v_readlane_b32 s33, v40, 32 +; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] call amdgpu_gfx void %fptr(i32 %i) ret i32 %i } @@ -1458,92 +1470,95 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr) ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 30 +; GCN-NEXT: v_writelane_b32 v40, s33, 32 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s34, 0 -; GCN-NEXT: v_writelane_b32 v40, s35, 1 -; GCN-NEXT: v_writelane_b32 v40, s36, 2 -; GCN-NEXT: v_writelane_b32 v40, s37, 3 -; GCN-NEXT: v_writelane_b32 v40, s38, 4 -; GCN-NEXT: v_writelane_b32 v40, s39, 5 -; GCN-NEXT: v_writelane_b32 v40, s40, 6 -; GCN-NEXT: v_writelane_b32 v40, s41, 7 -; GCN-NEXT: v_writelane_b32 v40, s42, 8 -; GCN-NEXT: v_writelane_b32 v40, s43, 9 -; GCN-NEXT: v_writelane_b32 v40, s44, 10 -; GCN-NEXT: v_writelane_b32 v40, s45, 11 -; GCN-NEXT: v_writelane_b32 v40, s46, 12 -; GCN-NEXT: v_writelane_b32 v40, s47, 13 -; GCN-NEXT: v_writelane_b32 v40, s48, 14 -; GCN-NEXT: v_writelane_b32 v40, s49, 15 -; GCN-NEXT: v_writelane_b32 v40, s50, 16 -; GCN-NEXT: v_writelane_b32 v40, s51, 17 -; GCN-NEXT: v_writelane_b32 v40, s52, 18 -; GCN-NEXT: v_writelane_b32 v40, s53, 19 -; GCN-NEXT: v_writelane_b32 v40, s54, 20 -; GCN-NEXT: v_writelane_b32 v40, s55, 21 -; GCN-NEXT: v_writelane_b32 v40, s56, 22 -; GCN-NEXT: v_writelane_b32 v40, s57, 23 -; GCN-NEXT: v_writelane_b32 v40, s58, 24 -; GCN-NEXT: v_writelane_b32 v40, s59, 25 -; GCN-NEXT: v_writelane_b32 v40, s60, 26 -; GCN-NEXT: v_writelane_b32 v40, s61, 27 -; GCN-NEXT: v_writelane_b32 v40, s62, 28 -; GCN-NEXT: v_writelane_b32 v40, s63, 29 -; GCN-NEXT: s_mov_b64 s[4:5], s[30:31] -; GCN-NEXT: s_mov_b64 s[6:7], exec +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: v_writelane_b32 v40, s34, 2 +; GCN-NEXT: v_writelane_b32 v40, s35, 3 +; GCN-NEXT: v_writelane_b32 v40, s36, 4 +; GCN-NEXT: v_writelane_b32 v40, s37, 5 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 +; GCN-NEXT: v_writelane_b32 v40, s40, 8 +; GCN-NEXT: v_writelane_b32 v40, s41, 9 +; GCN-NEXT: v_writelane_b32 v40, s42, 10 +; GCN-NEXT: v_writelane_b32 v40, s43, 11 +; GCN-NEXT: v_writelane_b32 v40, s44, 12 +; GCN-NEXT: v_writelane_b32 v40, s45, 13 +; GCN-NEXT: v_writelane_b32 v40, s46, 14 +; GCN-NEXT: v_writelane_b32 v40, s47, 15 +; GCN-NEXT: v_writelane_b32 v40, s48, 16 +; GCN-NEXT: v_writelane_b32 v40, s49, 17 +; GCN-NEXT: v_writelane_b32 v40, s50, 18 +; GCN-NEXT: v_writelane_b32 v40, s51, 19 +; GCN-NEXT: v_writelane_b32 v40, s52, 20 +; GCN-NEXT: v_writelane_b32 v40, s53, 21 +; GCN-NEXT: v_writelane_b32 v40, s54, 22 +; GCN-NEXT: v_writelane_b32 v40, s55, 23 +; GCN-NEXT: v_writelane_b32 v40, s56, 24 +; GCN-NEXT: v_writelane_b32 v40, s57, 25 +; GCN-NEXT: v_writelane_b32 v40, s58, 26 +; GCN-NEXT: v_writelane_b32 v40, s59, 27 +; GCN-NEXT: v_writelane_b32 v40, s60, 28 +; GCN-NEXT: v_writelane_b32 v40, s61, 29 +; GCN-NEXT: v_writelane_b32 v40, s62, 30 +; GCN-NEXT: v_writelane_b32 v40, s63, 31 +; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s10, v1 -; GCN-NEXT: v_readfirstlane_b32 s11, v2 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[1:2] -; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11] +; GCN-NEXT: v_readfirstlane_b32 s8, v1 +; GCN-NEXT: v_readfirstlane_b32 s9, v2 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] +; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GCN-NEXT: v_mov_b32_e32 v3, v0 ; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2 ; GCN-NEXT: ; implicit-def: $vgpr0 -; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] +; GCN-NEXT: s_xor_b64 exec, exec, s[6:7] ; GCN-NEXT: s_cbranch_execnz .LBB8_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: v_mov_b32_e32 v0, v3 -; GCN-NEXT: v_readlane_b32 s63, v40, 29 -; GCN-NEXT: v_readlane_b32 s62, v40, 28 -; GCN-NEXT: v_readlane_b32 s61, v40, 27 -; GCN-NEXT: v_readlane_b32 s60, v40, 26 -; GCN-NEXT: v_readlane_b32 s59, v40, 25 -; GCN-NEXT: v_readlane_b32 s58, v40, 24 -; GCN-NEXT: v_readlane_b32 s57, v40, 23 -; GCN-NEXT: v_readlane_b32 s56, v40, 22 -; GCN-NEXT: v_readlane_b32 s55, v40, 21 -; GCN-NEXT: v_readlane_b32 s54, v40, 20 -; GCN-NEXT: v_readlane_b32 s53, v40, 19 -; GCN-NEXT: v_readlane_b32 s52, v40, 18 -; GCN-NEXT: v_readlane_b32 s51, v40, 17 -; GCN-NEXT: v_readlane_b32 s50, v40, 16 -; GCN-NEXT: v_readlane_b32 s49, v40, 15 -; GCN-NEXT: v_readlane_b32 s48, v40, 14 -; GCN-NEXT: v_readlane_b32 s47, v40, 13 -; GCN-NEXT: v_readlane_b32 s46, v40, 12 -; GCN-NEXT: v_readlane_b32 s45, v40, 11 -; GCN-NEXT: v_readlane_b32 s44, v40, 10 -; GCN-NEXT: v_readlane_b32 s43, v40, 9 -; GCN-NEXT: v_readlane_b32 s42, v40, 8 -; GCN-NEXT: v_readlane_b32 s41, v40, 7 -; GCN-NEXT: v_readlane_b32 s40, v40, 6 -; GCN-NEXT: v_readlane_b32 s39, v40, 5 -; GCN-NEXT: v_readlane_b32 s38, v40, 4 -; GCN-NEXT: v_readlane_b32 s37, v40, 3 -; GCN-NEXT: v_readlane_b32 s36, v40, 2 -; GCN-NEXT: v_readlane_b32 s35, v40, 1 -; GCN-NEXT: v_readlane_b32 s34, v40, 0 +; GCN-NEXT: v_readlane_b32 s63, v40, 31 +; GCN-NEXT: v_readlane_b32 s62, v40, 30 +; GCN-NEXT: v_readlane_b32 s61, v40, 29 +; GCN-NEXT: v_readlane_b32 s60, v40, 28 +; GCN-NEXT: v_readlane_b32 s59, v40, 27 +; GCN-NEXT: v_readlane_b32 s58, v40, 26 +; GCN-NEXT: v_readlane_b32 s57, v40, 25 +; GCN-NEXT: v_readlane_b32 s56, v40, 24 +; GCN-NEXT: v_readlane_b32 s55, v40, 23 +; GCN-NEXT: v_readlane_b32 s54, v40, 22 +; GCN-NEXT: v_readlane_b32 s53, v40, 21 +; GCN-NEXT: v_readlane_b32 s52, v40, 20 +; GCN-NEXT: v_readlane_b32 s51, v40, 19 +; GCN-NEXT: v_readlane_b32 s50, v40, 18 +; GCN-NEXT: v_readlane_b32 s49, v40, 17 +; GCN-NEXT: v_readlane_b32 s48, v40, 16 +; GCN-NEXT: v_readlane_b32 s47, v40, 15 +; GCN-NEXT: v_readlane_b32 s46, v40, 14 +; GCN-NEXT: v_readlane_b32 s45, v40, 13 +; GCN-NEXT: v_readlane_b32 s44, v40, 12 +; GCN-NEXT: v_readlane_b32 s43, v40, 11 +; GCN-NEXT: v_readlane_b32 s42, v40, 10 +; GCN-NEXT: v_readlane_b32 s41, v40, 9 +; GCN-NEXT: v_readlane_b32 s40, v40, 8 +; GCN-NEXT: v_readlane_b32 s39, v40, 7 +; GCN-NEXT: v_readlane_b32 s38, v40, 6 +; GCN-NEXT: v_readlane_b32 s37, v40, 5 +; GCN-NEXT: v_readlane_b32 s36, v40, 4 +; GCN-NEXT: v_readlane_b32 s35, v40, 3 +; GCN-NEXT: v_readlane_b32 s34, v40, 2 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 30 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: v_readlane_b32 s33, v40, 32 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: ; GISEL: ; %bb.0: @@ -1551,92 +1566,95 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr) ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 30 +; GISEL-NEXT: v_writelane_b32 v40, s33, 32 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s34, 0 -; GISEL-NEXT: v_writelane_b32 v40, s35, 1 -; GISEL-NEXT: v_writelane_b32 v40, s36, 2 -; GISEL-NEXT: v_writelane_b32 v40, s37, 3 -; GISEL-NEXT: v_writelane_b32 v40, s38, 4 -; GISEL-NEXT: v_writelane_b32 v40, s39, 5 -; GISEL-NEXT: v_writelane_b32 v40, s40, 6 -; GISEL-NEXT: v_writelane_b32 v40, s41, 7 -; GISEL-NEXT: v_writelane_b32 v40, s42, 8 -; GISEL-NEXT: v_writelane_b32 v40, s43, 9 -; GISEL-NEXT: v_writelane_b32 v40, s44, 10 -; GISEL-NEXT: v_writelane_b32 v40, s45, 11 -; GISEL-NEXT: v_writelane_b32 v40, s46, 12 -; GISEL-NEXT: v_writelane_b32 v40, s47, 13 -; GISEL-NEXT: v_writelane_b32 v40, s48, 14 -; GISEL-NEXT: v_writelane_b32 v40, s49, 15 -; GISEL-NEXT: v_writelane_b32 v40, s50, 16 -; GISEL-NEXT: v_writelane_b32 v40, s51, 17 -; GISEL-NEXT: v_writelane_b32 v40, s52, 18 -; GISEL-NEXT: v_writelane_b32 v40, s53, 19 -; GISEL-NEXT: v_writelane_b32 v40, s54, 20 -; GISEL-NEXT: v_writelane_b32 v40, s55, 21 -; GISEL-NEXT: v_writelane_b32 v40, s56, 22 -; GISEL-NEXT: v_writelane_b32 v40, s57, 23 -; GISEL-NEXT: v_writelane_b32 v40, s58, 24 -; GISEL-NEXT: v_writelane_b32 v40, s59, 25 -; GISEL-NEXT: v_writelane_b32 v40, s60, 26 -; GISEL-NEXT: v_writelane_b32 v40, s61, 27 -; GISEL-NEXT: v_writelane_b32 v40, s62, 28 -; GISEL-NEXT: v_writelane_b32 v40, s63, 29 -; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31] -; GISEL-NEXT: s_mov_b64 s[6:7], exec +; GISEL-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL-NEXT: v_writelane_b32 v40, s50, 18 +; GISEL-NEXT: v_writelane_b32 v40, s51, 19 +; GISEL-NEXT: v_writelane_b32 v40, s52, 20 +; GISEL-NEXT: v_writelane_b32 v40, s53, 21 +; GISEL-NEXT: v_writelane_b32 v40, s54, 22 +; GISEL-NEXT: v_writelane_b32 v40, s55, 23 +; GISEL-NEXT: v_writelane_b32 v40, s56, 24 +; GISEL-NEXT: v_writelane_b32 v40, s57, 25 +; GISEL-NEXT: v_writelane_b32 v40, s58, 26 +; GISEL-NEXT: v_writelane_b32 v40, s59, 27 +; GISEL-NEXT: v_writelane_b32 v40, s60, 28 +; GISEL-NEXT: v_writelane_b32 v40, s61, 29 +; GISEL-NEXT: v_writelane_b32 v40, s62, 30 +; GISEL-NEXT: v_writelane_b32 v40, s63, 31 +; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s8, v1 -; GISEL-NEXT: v_readfirstlane_b32 s9, v2 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] -; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc -; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GISEL-NEXT: v_readfirstlane_b32 s6, v1 +; GISEL-NEXT: v_readfirstlane_b32 s7, v2 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GISEL-NEXT: v_mov_b32_e32 v3, v0 ; GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2 ; GISEL-NEXT: ; implicit-def: $vgpr0 -; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] +; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9] ; GISEL-NEXT: s_cbranch_execnz .LBB8_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: v_mov_b32_e32 v0, v3 -; GISEL-NEXT: v_readlane_b32 s63, v40, 29 -; GISEL-NEXT: v_readlane_b32 s62, v40, 28 -; GISEL-NEXT: v_readlane_b32 s61, v40, 27 -; GISEL-NEXT: v_readlane_b32 s60, v40, 26 -; GISEL-NEXT: v_readlane_b32 s59, v40, 25 -; GISEL-NEXT: v_readlane_b32 s58, v40, 24 -; GISEL-NEXT: v_readlane_b32 s57, v40, 23 -; GISEL-NEXT: v_readlane_b32 s56, v40, 22 -; GISEL-NEXT: v_readlane_b32 s55, v40, 21 -; GISEL-NEXT: v_readlane_b32 s54, v40, 20 -; GISEL-NEXT: v_readlane_b32 s53, v40, 19 -; GISEL-NEXT: v_readlane_b32 s52, v40, 18 -; GISEL-NEXT: v_readlane_b32 s51, v40, 17 -; GISEL-NEXT: v_readlane_b32 s50, v40, 16 -; GISEL-NEXT: v_readlane_b32 s49, v40, 15 -; GISEL-NEXT: v_readlane_b32 s48, v40, 14 -; GISEL-NEXT: v_readlane_b32 s47, v40, 13 -; GISEL-NEXT: v_readlane_b32 s46, v40, 12 -; GISEL-NEXT: v_readlane_b32 s45, v40, 11 -; GISEL-NEXT: v_readlane_b32 s44, v40, 10 -; GISEL-NEXT: v_readlane_b32 s43, v40, 9 -; GISEL-NEXT: v_readlane_b32 s42, v40, 8 -; GISEL-NEXT: v_readlane_b32 s41, v40, 7 -; GISEL-NEXT: v_readlane_b32 s40, v40, 6 -; GISEL-NEXT: v_readlane_b32 s39, v40, 5 -; GISEL-NEXT: v_readlane_b32 s38, v40, 4 -; GISEL-NEXT: v_readlane_b32 s37, v40, 3 -; GISEL-NEXT: v_readlane_b32 s36, v40, 2 -; GISEL-NEXT: v_readlane_b32 s35, v40, 1 -; GISEL-NEXT: v_readlane_b32 s34, v40, 0 +; GISEL-NEXT: v_readlane_b32 s63, v40, 31 +; GISEL-NEXT: v_readlane_b32 s62, v40, 30 +; GISEL-NEXT: v_readlane_b32 s61, v40, 29 +; GISEL-NEXT: v_readlane_b32 s60, v40, 28 +; GISEL-NEXT: v_readlane_b32 s59, v40, 27 +; GISEL-NEXT: v_readlane_b32 s58, v40, 26 +; GISEL-NEXT: v_readlane_b32 s57, v40, 25 +; GISEL-NEXT: v_readlane_b32 s56, v40, 24 +; GISEL-NEXT: v_readlane_b32 s55, v40, 23 +; GISEL-NEXT: v_readlane_b32 s54, v40, 22 +; GISEL-NEXT: v_readlane_b32 s53, v40, 21 +; GISEL-NEXT: v_readlane_b32 s52, v40, 20 +; GISEL-NEXT: v_readlane_b32 s51, v40, 19 +; GISEL-NEXT: v_readlane_b32 s50, v40, 18 +; GISEL-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL-NEXT: v_readlane_b32 s30, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 30 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: v_readlane_b32 s33, v40, 32 +; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] %ret = call amdgpu_gfx i32 %fptr(i32 %i) ret i32 %ret } @@ -1649,89 +1667,92 @@ define void @test_indirect_tail_call_vgpr_ptr(void()* %fptr) { ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 30 +; GCN-NEXT: v_writelane_b32 v40, s33, 32 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s34, 0 -; GCN-NEXT: v_writelane_b32 v40, s35, 1 -; GCN-NEXT: v_writelane_b32 v40, s36, 2 -; GCN-NEXT: v_writelane_b32 v40, s37, 3 -; GCN-NEXT: v_writelane_b32 v40, s38, 4 -; GCN-NEXT: v_writelane_b32 v40, s39, 5 -; GCN-NEXT: v_writelane_b32 v40, s40, 6 -; GCN-NEXT: v_writelane_b32 v40, s41, 7 -; GCN-NEXT: v_writelane_b32 v40, s42, 8 -; GCN-NEXT: v_writelane_b32 v40, s43, 9 -; GCN-NEXT: v_writelane_b32 v40, s44, 10 -; GCN-NEXT: v_writelane_b32 v40, s45, 11 -; GCN-NEXT: v_writelane_b32 v40, s46, 12 -; GCN-NEXT: v_writelane_b32 v40, s47, 13 -; GCN-NEXT: v_writelane_b32 v40, s48, 14 -; GCN-NEXT: v_writelane_b32 v40, s49, 15 -; GCN-NEXT: v_writelane_b32 v40, s50, 16 -; GCN-NEXT: v_writelane_b32 v40, s51, 17 -; GCN-NEXT: v_writelane_b32 v40, s52, 18 -; GCN-NEXT: v_writelane_b32 v40, s53, 19 -; GCN-NEXT: v_writelane_b32 v40, s54, 20 -; GCN-NEXT: v_writelane_b32 v40, s55, 21 -; GCN-NEXT: v_writelane_b32 v40, s56, 22 -; GCN-NEXT: v_writelane_b32 v40, s57, 23 -; GCN-NEXT: v_writelane_b32 v40, s58, 24 -; GCN-NEXT: v_writelane_b32 v40, s59, 25 -; GCN-NEXT: v_writelane_b32 v40, s60, 26 -; GCN-NEXT: v_writelane_b32 v40, s61, 27 -; GCN-NEXT: v_writelane_b32 v40, s62, 28 -; GCN-NEXT: v_writelane_b32 v40, s63, 29 -; GCN-NEXT: s_mov_b64 s[4:5], s[30:31] -; GCN-NEXT: s_mov_b64 s[6:7], exec +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: v_writelane_b32 v40, s34, 2 +; GCN-NEXT: v_writelane_b32 v40, s35, 3 +; GCN-NEXT: v_writelane_b32 v40, s36, 4 +; GCN-NEXT: v_writelane_b32 v40, s37, 5 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 +; GCN-NEXT: v_writelane_b32 v40, s40, 8 +; GCN-NEXT: v_writelane_b32 v40, s41, 9 +; GCN-NEXT: v_writelane_b32 v40, s42, 10 +; GCN-NEXT: v_writelane_b32 v40, s43, 11 +; GCN-NEXT: v_writelane_b32 v40, s44, 12 +; GCN-NEXT: v_writelane_b32 v40, s45, 13 +; GCN-NEXT: v_writelane_b32 v40, s46, 14 +; GCN-NEXT: v_writelane_b32 v40, s47, 15 +; GCN-NEXT: v_writelane_b32 v40, s48, 16 +; GCN-NEXT: v_writelane_b32 v40, s49, 17 +; GCN-NEXT: v_writelane_b32 v40, s50, 18 +; GCN-NEXT: v_writelane_b32 v40, s51, 19 +; GCN-NEXT: v_writelane_b32 v40, s52, 20 +; GCN-NEXT: v_writelane_b32 v40, s53, 21 +; GCN-NEXT: v_writelane_b32 v40, s54, 22 +; GCN-NEXT: v_writelane_b32 v40, s55, 23 +; GCN-NEXT: v_writelane_b32 v40, s56, 24 +; GCN-NEXT: v_writelane_b32 v40, s57, 25 +; GCN-NEXT: v_writelane_b32 v40, s58, 26 +; GCN-NEXT: v_writelane_b32 v40, s59, 27 +; GCN-NEXT: v_writelane_b32 v40, s60, 28 +; GCN-NEXT: v_writelane_b32 v40, s61, 29 +; GCN-NEXT: v_writelane_b32 v40, s62, 30 +; GCN-NEXT: v_writelane_b32 v40, s63, 31 +; GCN-NEXT: s_mov_b64 s[4:5], exec ; GCN-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s10, v0 -; GCN-NEXT: v_readfirstlane_b32 s11, v1 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11] +; GCN-NEXT: v_readfirstlane_b32 s8, v0 +; GCN-NEXT: v_readfirstlane_b32 s9, v1 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] +; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] +; GCN-NEXT: s_xor_b64 exec, exec, s[6:7] ; GCN-NEXT: s_cbranch_execnz .LBB9_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: v_readlane_b32 s63, v40, 29 -; GCN-NEXT: v_readlane_b32 s62, v40, 28 -; GCN-NEXT: v_readlane_b32 s61, v40, 27 -; GCN-NEXT: v_readlane_b32 s60, v40, 26 -; GCN-NEXT: v_readlane_b32 s59, v40, 25 -; GCN-NEXT: v_readlane_b32 s58, v40, 24 -; GCN-NEXT: v_readlane_b32 s57, v40, 23 -; GCN-NEXT: v_readlane_b32 s56, v40, 22 -; GCN-NEXT: v_readlane_b32 s55, v40, 21 -; GCN-NEXT: v_readlane_b32 s54, v40, 20 -; GCN-NEXT: v_readlane_b32 s53, v40, 19 -; GCN-NEXT: v_readlane_b32 s52, v40, 18 -; GCN-NEXT: v_readlane_b32 s51, v40, 17 -; GCN-NEXT: v_readlane_b32 s50, v40, 16 -; GCN-NEXT: v_readlane_b32 s49, v40, 15 -; GCN-NEXT: v_readlane_b32 s48, v40, 14 -; GCN-NEXT: v_readlane_b32 s47, v40, 13 -; GCN-NEXT: v_readlane_b32 s46, v40, 12 -; GCN-NEXT: v_readlane_b32 s45, v40, 11 -; GCN-NEXT: v_readlane_b32 s44, v40, 10 -; GCN-NEXT: v_readlane_b32 s43, v40, 9 -; GCN-NEXT: v_readlane_b32 s42, v40, 8 -; GCN-NEXT: v_readlane_b32 s41, v40, 7 -; GCN-NEXT: v_readlane_b32 s40, v40, 6 -; GCN-NEXT: v_readlane_b32 s39, v40, 5 -; GCN-NEXT: v_readlane_b32 s38, v40, 4 -; GCN-NEXT: v_readlane_b32 s37, v40, 3 -; GCN-NEXT: v_readlane_b32 s36, v40, 2 -; GCN-NEXT: v_readlane_b32 s35, v40, 1 -; GCN-NEXT: v_readlane_b32 s34, v40, 0 +; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: v_readlane_b32 s63, v40, 31 +; GCN-NEXT: v_readlane_b32 s62, v40, 30 +; GCN-NEXT: v_readlane_b32 s61, v40, 29 +; GCN-NEXT: v_readlane_b32 s60, v40, 28 +; GCN-NEXT: v_readlane_b32 s59, v40, 27 +; GCN-NEXT: v_readlane_b32 s58, v40, 26 +; GCN-NEXT: v_readlane_b32 s57, v40, 25 +; GCN-NEXT: v_readlane_b32 s56, v40, 24 +; GCN-NEXT: v_readlane_b32 s55, v40, 23 +; GCN-NEXT: v_readlane_b32 s54, v40, 22 +; GCN-NEXT: v_readlane_b32 s53, v40, 21 +; GCN-NEXT: v_readlane_b32 s52, v40, 20 +; GCN-NEXT: v_readlane_b32 s51, v40, 19 +; GCN-NEXT: v_readlane_b32 s50, v40, 18 +; GCN-NEXT: v_readlane_b32 s49, v40, 17 +; GCN-NEXT: v_readlane_b32 s48, v40, 16 +; GCN-NEXT: v_readlane_b32 s47, v40, 15 +; GCN-NEXT: v_readlane_b32 s46, v40, 14 +; GCN-NEXT: v_readlane_b32 s45, v40, 13 +; GCN-NEXT: v_readlane_b32 s44, v40, 12 +; GCN-NEXT: v_readlane_b32 s43, v40, 11 +; GCN-NEXT: v_readlane_b32 s42, v40, 10 +; GCN-NEXT: v_readlane_b32 s41, v40, 9 +; GCN-NEXT: v_readlane_b32 s40, v40, 8 +; GCN-NEXT: v_readlane_b32 s39, v40, 7 +; GCN-NEXT: v_readlane_b32 s38, v40, 6 +; GCN-NEXT: v_readlane_b32 s37, v40, 5 +; GCN-NEXT: v_readlane_b32 s36, v40, 4 +; GCN-NEXT: v_readlane_b32 s35, v40, 3 +; GCN-NEXT: v_readlane_b32 s34, v40, 2 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 30 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: v_readlane_b32 s33, v40, 32 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-LABEL: test_indirect_tail_call_vgpr_ptr: ; GISEL: ; %bb.0: @@ -1739,89 +1760,92 @@ define void @test_indirect_tail_call_vgpr_ptr(void()* %fptr) { ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 30 +; GISEL-NEXT: v_writelane_b32 v40, s33, 32 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s34, 0 -; GISEL-NEXT: v_writelane_b32 v40, s35, 1 -; GISEL-NEXT: v_writelane_b32 v40, s36, 2 -; GISEL-NEXT: v_writelane_b32 v40, s37, 3 -; GISEL-NEXT: v_writelane_b32 v40, s38, 4 -; GISEL-NEXT: v_writelane_b32 v40, s39, 5 -; GISEL-NEXT: v_writelane_b32 v40, s40, 6 -; GISEL-NEXT: v_writelane_b32 v40, s41, 7 -; GISEL-NEXT: v_writelane_b32 v40, s42, 8 -; GISEL-NEXT: v_writelane_b32 v40, s43, 9 -; GISEL-NEXT: v_writelane_b32 v40, s44, 10 -; GISEL-NEXT: v_writelane_b32 v40, s45, 11 -; GISEL-NEXT: v_writelane_b32 v40, s46, 12 -; GISEL-NEXT: v_writelane_b32 v40, s47, 13 -; GISEL-NEXT: v_writelane_b32 v40, s48, 14 -; GISEL-NEXT: v_writelane_b32 v40, s49, 15 -; GISEL-NEXT: v_writelane_b32 v40, s50, 16 -; GISEL-NEXT: v_writelane_b32 v40, s51, 17 -; GISEL-NEXT: v_writelane_b32 v40, s52, 18 -; GISEL-NEXT: v_writelane_b32 v40, s53, 19 -; GISEL-NEXT: v_writelane_b32 v40, s54, 20 -; GISEL-NEXT: v_writelane_b32 v40, s55, 21 -; GISEL-NEXT: v_writelane_b32 v40, s56, 22 -; GISEL-NEXT: v_writelane_b32 v40, s57, 23 -; GISEL-NEXT: v_writelane_b32 v40, s58, 24 -; GISEL-NEXT: v_writelane_b32 v40, s59, 25 -; GISEL-NEXT: v_writelane_b32 v40, s60, 26 -; GISEL-NEXT: v_writelane_b32 v40, s61, 27 -; GISEL-NEXT: v_writelane_b32 v40, s62, 28 -; GISEL-NEXT: v_writelane_b32 v40, s63, 29 -; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31] -; GISEL-NEXT: s_mov_b64 s[6:7], exec +; GISEL-NEXT: v_writelane_b32 v40, s30, 0 +; GISEL-NEXT: v_writelane_b32 v40, s31, 1 +; GISEL-NEXT: v_writelane_b32 v40, s34, 2 +; GISEL-NEXT: v_writelane_b32 v40, s35, 3 +; GISEL-NEXT: v_writelane_b32 v40, s36, 4 +; GISEL-NEXT: v_writelane_b32 v40, s37, 5 +; GISEL-NEXT: v_writelane_b32 v40, s38, 6 +; GISEL-NEXT: v_writelane_b32 v40, s39, 7 +; GISEL-NEXT: v_writelane_b32 v40, s40, 8 +; GISEL-NEXT: v_writelane_b32 v40, s41, 9 +; GISEL-NEXT: v_writelane_b32 v40, s42, 10 +; GISEL-NEXT: v_writelane_b32 v40, s43, 11 +; GISEL-NEXT: v_writelane_b32 v40, s44, 12 +; GISEL-NEXT: v_writelane_b32 v40, s45, 13 +; GISEL-NEXT: v_writelane_b32 v40, s46, 14 +; GISEL-NEXT: v_writelane_b32 v40, s47, 15 +; GISEL-NEXT: v_writelane_b32 v40, s48, 16 +; GISEL-NEXT: v_writelane_b32 v40, s49, 17 +; GISEL-NEXT: v_writelane_b32 v40, s50, 18 +; GISEL-NEXT: v_writelane_b32 v40, s51, 19 +; GISEL-NEXT: v_writelane_b32 v40, s52, 20 +; GISEL-NEXT: v_writelane_b32 v40, s53, 21 +; GISEL-NEXT: v_writelane_b32 v40, s54, 22 +; GISEL-NEXT: v_writelane_b32 v40, s55, 23 +; GISEL-NEXT: v_writelane_b32 v40, s56, 24 +; GISEL-NEXT: v_writelane_b32 v40, s57, 25 +; GISEL-NEXT: v_writelane_b32 v40, s58, 26 +; GISEL-NEXT: v_writelane_b32 v40, s59, 27 +; GISEL-NEXT: v_writelane_b32 v40, s60, 28 +; GISEL-NEXT: v_writelane_b32 v40, s61, 29 +; GISEL-NEXT: v_writelane_b32 v40, s62, 30 +; GISEL-NEXT: v_writelane_b32 v40, s63, 31 +; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s8, v0 -; GISEL-NEXT: v_readfirstlane_b32 s9, v1 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc -; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GISEL-NEXT: v_readfirstlane_b32 s6, v0 +; GISEL-NEXT: v_readfirstlane_b32 s7, v1 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1] +; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] +; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9] ; GISEL-NEXT: s_cbranch_execnz .LBB9_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[6:7] -; GISEL-NEXT: v_readlane_b32 s63, v40, 29 -; GISEL-NEXT: v_readlane_b32 s62, v40, 28 -; GISEL-NEXT: v_readlane_b32 s61, v40, 27 -; GISEL-NEXT: v_readlane_b32 s60, v40, 26 -; GISEL-NEXT: v_readlane_b32 s59, v40, 25 -; GISEL-NEXT: v_readlane_b32 s58, v40, 24 -; GISEL-NEXT: v_readlane_b32 s57, v40, 23 -; GISEL-NEXT: v_readlane_b32 s56, v40, 22 -; GISEL-NEXT: v_readlane_b32 s55, v40, 21 -; GISEL-NEXT: v_readlane_b32 s54, v40, 20 -; GISEL-NEXT: v_readlane_b32 s53, v40, 19 -; GISEL-NEXT: v_readlane_b32 s52, v40, 18 -; GISEL-NEXT: v_readlane_b32 s51, v40, 17 -; GISEL-NEXT: v_readlane_b32 s50, v40, 16 -; GISEL-NEXT: v_readlane_b32 s49, v40, 15 -; GISEL-NEXT: v_readlane_b32 s48, v40, 14 -; GISEL-NEXT: v_readlane_b32 s47, v40, 13 -; GISEL-NEXT: v_readlane_b32 s46, v40, 12 -; GISEL-NEXT: v_readlane_b32 s45, v40, 11 -; GISEL-NEXT: v_readlane_b32 s44, v40, 10 -; GISEL-NEXT: v_readlane_b32 s43, v40, 9 -; GISEL-NEXT: v_readlane_b32 s42, v40, 8 -; GISEL-NEXT: v_readlane_b32 s41, v40, 7 -; GISEL-NEXT: v_readlane_b32 s40, v40, 6 -; GISEL-NEXT: v_readlane_b32 s39, v40, 5 -; GISEL-NEXT: v_readlane_b32 s38, v40, 4 -; GISEL-NEXT: v_readlane_b32 s37, v40, 3 -; GISEL-NEXT: v_readlane_b32 s36, v40, 2 -; GISEL-NEXT: v_readlane_b32 s35, v40, 1 -; GISEL-NEXT: v_readlane_b32 s34, v40, 0 +; GISEL-NEXT: s_mov_b64 exec, s[4:5] +; GISEL-NEXT: v_readlane_b32 s63, v40, 31 +; GISEL-NEXT: v_readlane_b32 s62, v40, 30 +; GISEL-NEXT: v_readlane_b32 s61, v40, 29 +; GISEL-NEXT: v_readlane_b32 s60, v40, 28 +; GISEL-NEXT: v_readlane_b32 s59, v40, 27 +; GISEL-NEXT: v_readlane_b32 s58, v40, 26 +; GISEL-NEXT: v_readlane_b32 s57, v40, 25 +; GISEL-NEXT: v_readlane_b32 s56, v40, 24 +; GISEL-NEXT: v_readlane_b32 s55, v40, 23 +; GISEL-NEXT: v_readlane_b32 s54, v40, 22 +; GISEL-NEXT: v_readlane_b32 s53, v40, 21 +; GISEL-NEXT: v_readlane_b32 s52, v40, 20 +; GISEL-NEXT: v_readlane_b32 s51, v40, 19 +; GISEL-NEXT: v_readlane_b32 s50, v40, 18 +; GISEL-NEXT: v_readlane_b32 s49, v40, 17 +; GISEL-NEXT: v_readlane_b32 s48, v40, 16 +; GISEL-NEXT: v_readlane_b32 s47, v40, 15 +; GISEL-NEXT: v_readlane_b32 s46, v40, 14 +; GISEL-NEXT: v_readlane_b32 s45, v40, 13 +; GISEL-NEXT: v_readlane_b32 s44, v40, 12 +; GISEL-NEXT: v_readlane_b32 s43, v40, 11 +; GISEL-NEXT: v_readlane_b32 s42, v40, 10 +; GISEL-NEXT: v_readlane_b32 s41, v40, 9 +; GISEL-NEXT: v_readlane_b32 s40, v40, 8 +; GISEL-NEXT: v_readlane_b32 s39, v40, 7 +; GISEL-NEXT: v_readlane_b32 s38, v40, 6 +; GISEL-NEXT: v_readlane_b32 s37, v40, 5 +; GISEL-NEXT: v_readlane_b32 s36, v40, 4 +; GISEL-NEXT: v_readlane_b32 s35, v40, 3 +; GISEL-NEXT: v_readlane_b32 s34, v40, 2 +; GISEL-NEXT: v_readlane_b32 s31, v40, 1 +; GISEL-NEXT: v_readlane_b32 s30, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 30 -; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GISEL-NEXT: v_readlane_b32 s33, v40, 32 +; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] tail call amdgpu_gfx void %fptr() ret void } diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll index b303e69064a0..d3e18dbaaa6d 100644 --- a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll @@ -8,15 +8,15 @@ define amdgpu_kernel void @s_input_output_i128() { ; GFX908-LABEL: name: s_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:SGPR_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5242890 /* regdef:SGPR_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5767177 /* reguse:SGPR_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5242889 /* reguse:SGPR_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: s_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:SGPR_128 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5242890 /* regdef:SGPR_128 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5767177 /* reguse:SGPR_128 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5242889 /* reguse:SGPR_128 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=s"() call void asm sideeffect "; use $0", "s"(i128 %val) @@ -26,15 +26,15 @@ define amdgpu_kernel void @s_input_output_i128() { define amdgpu_kernel void @v_input_output_i128() { ; GFX908-LABEL: name: v_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5308426 /* regdef:VReg_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4784138 /* regdef:VReg_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5308425 /* reguse:VReg_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4784137 /* reguse:VReg_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: v_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5505034 /* regdef:VReg_128_Align2 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4980746 /* regdef:VReg_128_Align2 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5505033 /* reguse:VReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4980745 /* reguse:VReg_128_Align2 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=v"() call void asm sideeffect "; use $0", "v"(i128 %val) @@ -44,15 +44,15 @@ define amdgpu_kernel void @v_input_output_i128() { define amdgpu_kernel void @a_input_output_i128() { ; GFX908-LABEL: name: a_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5242890 /* regdef:AReg_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4718602 /* regdef:AReg_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5242889 /* reguse:AReg_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4718601 /* reguse:AReg_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: a_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5439498 /* regdef:AReg_128_Align2 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4915210 /* regdef:AReg_128_Align2 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5439497 /* reguse:AReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4915209 /* reguse:AReg_128_Align2 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = call i128 asm sideeffect "; def $0", "=a"() call void asm sideeffect "; use $0", "a"(i128 %val) diff --git a/llvm/test/CodeGen/AMDGPU/ipra.ll b/llvm/test/CodeGen/AMDGPU/ipra.ll index 1466d3b76bec..6e4752b05110 100644 --- a/llvm/test/CodeGen/AMDGPU/ipra.ll +++ b/llvm/test/CodeGen/AMDGPU/ipra.ll @@ -40,20 +40,16 @@ define amdgpu_kernel void @kernel_call() #0 { } ; GCN-LABEL: {{^}}func_regular_call: -; GCN-NOT: buffer_store ; GCN-NOT: buffer_load ; GCN-NOT: readlane -; GCN-NOT: writelane -; GCN: flat_load_dword v8 +; GCN: flat_load_dword v9 ; GCN: s_swappc_b64 -; GCN-NOT: buffer_store ; GCN-NOT: buffer_load ; GCN-NOT: readlane -; GCN-NOT: writelane -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8 +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v9 -; GCN: ; NumSgprs: 32 -; GCN: ; NumVgprs: 9 +; GCN: ; NumSgprs: 34 +; GCN: ; NumVgprs: 10 define void @func_regular_call() #1 { %vgpr = load volatile i32, i32 addrspace(1)* undef tail call void @func() @@ -76,13 +72,13 @@ define void @func_tail_call() #1 { } ; GCN-LABEL: {{^}}func_call_tail_call: -; GCN: flat_load_dword v8 +; GCN: flat_load_dword v9 ; GCN: s_swappc_b64 -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8 +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v9 ; GCN: s_setpc_b64 -; GCN: ; NumSgprs: 32 -; GCN: ; NumVgprs: 9 +; GCN: ; NumSgprs: 34 +; GCN: ; NumVgprs: 10 define void @func_call_tail_call() #1 { %vgpr = load volatile i32, i32 addrspace(1)* undef tail call void @func() @@ -97,8 +93,11 @@ define void @void_func_void() noinline { ; Make sure we don't get save/restore of FP between calls. ; GCN-LABEL: {{^}}test_funcx2: -; GCN-NOT: s5 +; GCN: s_getpc_b64 ; GCN-NOT: s32 +; GCN: s_swappc_b64 +; GCN-NOT: s32 +; GCN: s_swappc_b64 define void @test_funcx2() #0 { call void @void_func_void() call void @void_func_void() diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll index 9b6143d7a952..581bb7233e50 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll @@ -197,6 +197,8 @@ define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func([112 x i8]) # ; GCN-NOT: s8 ; GCN-NOT: s9 ; GCN-NOT: s[8:9] +; GCN: s_swappc_b64 +; GCN: s_setpc_b64 s[30:31] define void @func_call_implicitarg_ptr_func() #0 { call void @func_implicitarg_ptr() ret void @@ -206,6 +208,8 @@ define void @func_call_implicitarg_ptr_func() #0 { ; GCN-NOT: s8 ; GCN-NOT: s9 ; GCN-NOT: s[8:9] +; GCN: s_swappc_b64 +; GCN: s_setpc_b64 s[30:31] define void @opencl_func_call_implicitarg_ptr_func() #0 { call void @func_implicitarg_ptr() ret void diff --git a/llvm/test/CodeGen/AMDGPU/llvm.powi.ll b/llvm/test/CodeGen/AMDGPU/llvm.powi.ll index 7838d5ab8def..a95df6268034 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.powi.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.powi.ll @@ -57,9 +57,9 @@ define float @v_powi_neg1_f32(float %l) { ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 ; GFX7-NEXT: v_rcp_f32_e32 v2, v1 -; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 -; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 ; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 +; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 +; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 ; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 ; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 ; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 @@ -104,9 +104,9 @@ define float @v_powi_neg2_f32(float %l) { ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 ; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 ; GFX7-NEXT: v_rcp_f32_e32 v2, v1 -; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 -; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 ; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 +; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 +; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 ; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 ; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 ; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 @@ -200,9 +200,9 @@ define float @v_powi_neg128_f32(float %l) { ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 ; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 ; GFX7-NEXT: v_rcp_f32_e32 v2, v1 -; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 -; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 ; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 +; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 +; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 ; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 ; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 ; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll index 2fac70d96bc3..7ae67bad68ae 100644 --- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll +++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll @@ -193,22 +193,22 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: v_writelane_b32 v40, s33, 4 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x800 -; GFX9-NEXT: v_writelane_b32 v40, s34, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: v_writelane_b32 v40, s34, 2 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, foo@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, foo@gotpcrel32@hi+12 -; GFX9-NEXT: v_writelane_b32 v40, s35, 1 +; GFX9-NEXT: v_writelane_b32 v40, s35, 3 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v41, v1 ; GFX9-NEXT: v_mov_b32_e32 v42, v0 -; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v42, v41 -; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: v_and_b32_e32 v43, 0xffffff, v41 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -220,17 +220,17 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 { ; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s4, v40, 2 -; GFX9-NEXT: v_readlane_b32 s5, v40, 3 -; GFX9-NEXT: v_readlane_b32 s35, v40, 1 -; GFX9-NEXT: v_readlane_b32 s34, v40, 0 +; GFX9-NEXT: v_readlane_b32 s35, v40, 3 +; GFX9-NEXT: v_readlane_b32 s34, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xf800 ; GFX9-NEXT: v_readlane_b32 s33, v40, 4 -; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] %b = and i32 %b.arg, 16777215 %s = and i32 %s.arg, 16777215 diff --git a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll index 68646ad229a7..109163bf7373 100644 --- a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll @@ -44,11 +44,11 @@ define internal fastcc void @csr_vgpr_spill_fp_callee() #0 { ; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9] ; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11] ; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] -; CHECK-NEXT: v_readlane_b32 s30, v1, 0 -; CHECK-NEXT: v_readlane_b32 s31, v1, 1 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; clobber csr v40 ; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_readlane_b32 s31, v1, 1 +; CHECK-NEXT: v_readlane_b32 s30, v1, 0 ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 ; CHECK-NEXT: v_readlane_b32 s33, v1, 2 @@ -168,8 +168,8 @@ define hidden i32 @caller_save_vgpr_spill_fp_tail_call() #0 { ; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9] ; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11] ; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] -; CHECK-NEXT: v_readlane_b32 s30, v1, 0 ; CHECK-NEXT: v_readlane_b32 s31, v1, 1 +; CHECK-NEXT: v_readlane_b32 s30, v1, 0 ; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 ; CHECK-NEXT: v_readlane_b32 s33, v1, 2 ; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 @@ -202,8 +202,8 @@ define hidden i32 @caller_save_vgpr_spill_fp() #0 { ; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9] ; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11] ; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] -; CHECK-NEXT: v_readlane_b32 s30, v2, 0 ; CHECK-NEXT: v_readlane_b32 s31, v2, 1 +; CHECK-NEXT: v_readlane_b32 s30, v2, 0 ; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 ; CHECK-NEXT: v_readlane_b32 s33, v2, 2 ; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 diff --git a/llvm/test/CodeGen/AMDGPU/nested-calls.ll b/llvm/test/CodeGen/AMDGPU/nested-calls.ll index 327136fdfe1e..2ae8530e1ed1 100644 --- a/llvm/test/CodeGen/AMDGPU/nested-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-calls.ll @@ -22,8 +22,8 @@ declare void @external_void_func_i32(i32) #0 ; GCN: s_swappc_b64 -; GCN: v_readlane_b32 s4, v40, 0 -; GCN: v_readlane_b32 s5, v40, 1 +; GCN: v_readlane_b32 s31, v40, 1 +; GCN: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 @@ -31,7 +31,7 @@ declare void @external_void_func_i32(i32) #0 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] define void @test_func_call_external_void_func_i32_imm() #0 { call void @external_void_func_i32(i32 42) ret void diff --git a/llvm/test/CodeGen/AMDGPU/no-remat-indirect-mov.mir b/llvm/test/CodeGen/AMDGPU/no-remat-indirect-mov.mir index 4f4bdcd6f5b4..d5abbc017260 100644 --- a/llvm/test/CodeGen/AMDGPU/no-remat-indirect-mov.mir +++ b/llvm/test/CodeGen/AMDGPU/no-remat-indirect-mov.mir @@ -30,8 +30,7 @@ body: | ; GFX9-LABEL: name: index_vgpr_waterfall_loop ; GFX9: bb.0: ; GFX9: successors: %bb.1(0x80000000) - ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $sgpr30_sgpr31 - ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr16 ; GFX9: undef %18.sub15:vreg_512 = COPY $vgpr15 ; GFX9: %18.sub14:vreg_512 = COPY $vgpr14 @@ -63,7 +62,6 @@ body: | ; GFX9: S_CBRANCH_EXECNZ %bb.1, implicit $exec ; GFX9: bb.2: ; GFX9: $exec = S_MOV_B64 [[S_MOV_B64_]] - ; GFX9: $sgpr30_sgpr31 = COPY [[COPY]] ; GFX9: $vgpr0 = COPY [[V_MOV_B32_e32_]] ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit undef $vgpr1, implicit undef $vgpr2, implicit undef $vgpr3 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll index 84643f945ac5..1e424ecde23d 100644 --- a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll +++ b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll @@ -19,10 +19,10 @@ define hidden void @_ZL3barv() #0 !dbg !1644 { ; CHECK-NEXT: v_writelane_b32 v40, s33, 2 ; CHECK-NEXT: s_mov_b32 s33, s32 ; CHECK-NEXT: s_add_i32 s32, s32, 0x400 -; CHECK-NEXT: v_writelane_b32 v40, s30, 0 -; CHECK-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-NEXT: .Ltmp0: ; CHECK-NEXT: .loc 0 31 3 prologue_end ; lane-info.cpp:31:3 +; CHECK-NEXT: v_writelane_b32 v40, s30, 0 +; CHECK-NEXT: v_writelane_b32 v40, s31, 1 ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _ZL13sleep_foreverv@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _ZL13sleep_foreverv@gotpcrel32@hi+12 @@ -34,14 +34,14 @@ define hidden void @_ZL3barv() #0 !dbg !1644 { ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: .Ltmp1: -; CHECK-NEXT: v_readlane_b32 s30, v40, 0 +; CHECK-NEXT: .loc 0 32 1 ; lane-info.cpp:32:1 ; CHECK-NEXT: v_readlane_b32 s31, v40, 1 +; CHECK-NEXT: v_readlane_b32 s30, v40, 0 ; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 ; CHECK-NEXT: v_readlane_b32 s33, v40, 2 ; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] -; CHECK-NEXT: .loc 0 32 1 ; lane-info.cpp:32:1 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] ; CHECK-NEXT: .Ltmp2: diff --git a/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll b/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll index e0e211243771..d474ea525799 100644 --- a/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll @@ -11,8 +11,8 @@ define void @child_function() #0 { ; GCN: v_writelane_b32 v255, s30, 0 ; GCN: v_writelane_b32 v255, s31, 1 ; GCN: s_swappc_b64 s[30:31], s[4:5] -; GCN: v_readlane_b32 s30, v255, 0 ; GCN: v_readlane_b32 s31, v255, 1 +; GCN: v_readlane_b32 s30, v255, 0 ; GCN: v_readlane_b32 s33, v255, 2 ; GCN: ; NumVgprs: 256 @@ -57,8 +57,8 @@ define void @reserve_vgpr_with_no_lower_vgpr_available() #0 { ; GCN: v_writelane_b32 v254, s30, 0 ; GCN: v_writelane_b32 v254, s31, 1 ; GCN: s_swappc_b64 s[30:31], s[4:5] -; GCN: v_readlane_b32 s30, v254, 0 ; GCN: v_readlane_b32 s31, v254, 1 +; GCN: v_readlane_b32 s30, v254, 0 ; GCN: v_readlane_b32 s33, v254, 2 define void @reserve_lowest_available_vgpr() #0 { @@ -99,8 +99,8 @@ define void @reserve_lowest_available_vgpr() #0 { ; GCN-LABEL: {{^}}reserve_vgpr_with_sgpr_spills: ; GCN-NOT: buffer_store_dword v255, off, s[0:3], s32 ; GCN: ; def s4 -; GCN: v_writelane_b32 v254, s4, 2 -; GCN: v_readlane_b32 s4, v254, 2 +; GCN: v_writelane_b32 v254, s4, 0 +; GCN: v_readlane_b32 s4, v254, 0 ; GCN: ; use s4 define void @reserve_vgpr_with_sgpr_spills() #0 { diff --git a/llvm/test/CodeGen/AMDGPU/save-fp.ll b/llvm/test/CodeGen/AMDGPU/save-fp.ll index 6bed1dbf901e..ec56f41aa1a0 100644 --- a/llvm/test/CodeGen/AMDGPU/save-fp.ll +++ b/llvm/test/CodeGen/AMDGPU/save-fp.ll @@ -11,16 +11,14 @@ bb: ; GCN-LABEL: {{^}}caller: -; GFX900: s_mov_b32 [[SAVED_FP:s[0-9]+]], s33 -; GFX900: s_mov_b32 s33, s32 -; GFX908-NOT: s_mov_b32 s33, s32 +; GCN: v_writelane_b32 v2, s33, 2 +; GCN: s_mov_b32 s33, s32 ; GFX900: buffer_store_dword -; GFX908-DAG: s_mov_b32 [[SAVED_FP:s[0-9]+]], s33 ; GFX908-DAG: v_accvgpr_write_b32 ; GCN: s_swappc_b64 ; GFX900: buffer_load_dword ; GFX908: v_accvgpr_read_b32 -; GCN: s_mov_b32 s33, [[SAVED_FP]] +; GCN: v_readlane_b32 s33, v2, 2 define i64 @caller() { bb: call void asm sideeffect "", "~{v40}" () diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll index fccfb8fe2f21..054d4ef3651e 100644 --- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll @@ -201,18 +201,18 @@ entry: ; GCN: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1 ; GCN-NEXT: buffer_store_dword [[CSRV:v[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec -; GCN: s_mov_b32 s33, s32 +; GCN: v_writelane_b32 [[CSRV]], s33, 2 ; GCN-DAG: s_addk_i32 s32, 0x400 -; GCN-DAG: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GCN-DAG: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-DAG: v_writelane_b32 [[CSRV]], s34, 0 -; GCN-DAG: v_writelane_b32 [[CSRV]], s35, 1 - ; GCN-DAG: s_getpc_b64 s[4:5] ; GCN-DAG: s_add_u32 s4, s4, i32_fastcc_i32_i32@gotpcrel32@lo+4 ; GCN-DAG: s_addc_u32 s5, s5, i32_fastcc_i32_i32@gotpcrel32@hi+12 +; GCN-DAG: v_writelane_b32 [[CSRV]], s30, 0 +; GCN-DAG: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-DAG: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-DAG: v_writelane_b32 [[CSRV]], s31, 1 + ; GCN: s_swappc_b64 @@ -223,8 +223,8 @@ entry: ; GCN-NEXT: s_add_u32 s4, s4, sibling_call_i32_fastcc_i32_i32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, sibling_call_i32_fastcc_i32_i32@rel32@hi+12 -; GCN-DAG: v_readlane_b32 s34, [[CSRV]], 0 -; GCN-DAG: v_readlane_b32 s35, [[CSRV]], 1 +; GCN-DAG: v_readlane_b32 s30, [[CSRV]], 0 +; GCN-DAG: v_readlane_b32 s31, [[CSRV]], 1 ; GCN: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir index 23714f0323ef..d7e368f4d6d9 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir @@ -89,11 +89,11 @@ body: | ; VR: renamable $sgpr37 = S_MOV_B32 -1 ; VR: renamable $sgpr36 = S_MOV_B32 -1 ; VR: renamable $sgpr68 = S_MOV_B32 0 + ; VR: renamable $sgpr30_sgpr31 = IMPLICIT_DEF ; VR: renamable $sgpr34_sgpr35 = IMPLICIT_DEF - ; VR: renamable $sgpr66_sgpr67 = IMPLICIT_DEF ; VR: bb.1: ; VR: successors: %bb.2(0x80000000) - ; VR: liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x000000000000000F, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0000000000000003, $sgpr34_sgpr35, $sgpr66_sgpr67 + ; VR: liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x000000000000000F, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0000000000000003, $sgpr30_sgpr31, $sgpr34_sgpr35 ; VR: renamable $sgpr38 = COPY renamable $sgpr36 ; VR: renamable $sgpr39 = COPY renamable $sgpr37 ; VR: renamable $sgpr40 = COPY renamable $sgpr36 @@ -155,8 +155,8 @@ body: | ; VR: renamable $sgpr99 = COPY renamable $sgpr68 ; VR: bb.2: ; VR: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; VR: liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x000000000000000F, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0000000000000003, $sgpr34_sgpr35, $sgpr66_sgpr67 - ; VR: S_NOP 0, csr_amdgpu_highregs, implicit renamable $sgpr34_sgpr35, implicit renamable $sgpr66_sgpr67 + ; VR: liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x000000000000000F, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0000000000000003, $sgpr30_sgpr31, $sgpr34_sgpr35 + ; VR: S_NOP 0, csr_amdgpu_highregs, implicit renamable $sgpr30_sgpr31, implicit renamable $sgpr34_sgpr35 ; VR: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc ; VR: S_BRANCH %bb.2 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll index c7031a892150..37df92e9700a 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll @@ -171,12 +171,15 @@ define void @func_call_align1024_bp_gets_vgpr_spill(<32 x i32> %a, i32 %b) #0 { ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 ; GCN: s_swappc_b64 s[30:31], +; GCN: v_readlane_b32 s31, [[VGPR_REG]], 1 +; GCN: v_readlane_b32 s30, [[VGPR_REG]], 0 ; GCN: s_add_i32 s32, s32, 0xfffd0000 ; GCN-NEXT: v_readlane_b32 s33, [[VGPR_REG]], 2 ; GCN-NEXT: v_readlane_b32 s34, [[VGPR_REG]], 3 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword [[VGPR_REG]], off, s[0:3], s32 offset:1028 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN: s_setpc_b64 s[30:31] %temp = alloca i32, align 1024, addrspace(5) store volatile i32 0, i32 addrspace(5)* %temp, align 1024 call void @extern_func(<32 x i32> %a, i32 %b) diff --git a/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll index 884b56e7a530..bece8aab1e3d 100644 --- a/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll @@ -186,13 +186,13 @@ define <4 x half> @v_constained_fadd_v4f16_fpexcept_strict(<4 x half> %x, <4 x h ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_add_f16_e32 v4, v0, v2 -; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff +; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX10-NEXT: v_add_f16_e32 v5, v0, v2 ; GFX10-NEXT: v_add_f16_e32 v6, v1, v3 ; GFX10-NEXT: v_add_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10-NEXT: v_add_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX10-NEXT: v_and_b32_e32 v2, v5, v4 -; GFX10-NEXT: v_and_b32_e32 v3, v5, v6 +; GFX10-NEXT: v_and_b32_e32 v2, v4, v5 +; GFX10-NEXT: v_and_b32_e32 v3, v4, v6 ; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v2 ; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll index 34c5e908ac31..00aa96fea128 100644 --- a/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll @@ -186,13 +186,13 @@ define <4 x half> @v_constained_fmul_v4f16_fpexcept_strict(<4 x half> %x, <4 x h ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mul_f16_e32 v4, v0, v2 -; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff +; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX10-NEXT: v_mul_f16_e32 v5, v0, v2 ; GFX10-NEXT: v_mul_f16_e32 v6, v1, v3 ; GFX10-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX10-NEXT: v_and_b32_e32 v2, v5, v4 -; GFX10-NEXT: v_and_b32_e32 v3, v5, v6 +; GFX10-NEXT: v_and_b32_e32 v2, v4, v5 +; GFX10-NEXT: v_and_b32_e32 v3, v4, v6 ; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v2 ; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll index 4cf75f68a7a1..802c08fe64a2 100644 --- a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll @@ -206,13 +206,13 @@ define <4 x half> @v_constained_fsub_v4f16_fpexcept_strict(<4 x half> %x, <4 x h ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_sub_f16_e32 v4, v0, v2 -; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff +; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff +; GFX10-NEXT: v_sub_f16_e32 v5, v0, v2 ; GFX10-NEXT: v_sub_f16_e32 v6, v1, v3 ; GFX10-NEXT: v_sub_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX10-NEXT: v_and_b32_e32 v2, v5, v4 -; GFX10-NEXT: v_and_b32_e32 v3, v5, v6 +; GFX10-NEXT: v_and_b32_e32 v2, v4, v5 +; GFX10-NEXT: v_and_b32_e32 v3, v4, v6 ; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v2 ; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll b/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll index 4894eeca5825..fba11bf0124e 100644 --- a/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll +++ b/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll @@ -20,25 +20,28 @@ define amdgpu_gfx float @caller(float %arg0) { ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[34:35] -; GCN-NEXT: v_writelane_b32 v1, s33, 1 +; GCN-NEXT: v_writelane_b32 v1, s33, 3 +; GCN-NEXT: v_writelane_b32 v1, s4, 0 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v1, s4, 0 +; GCN-NEXT: v_writelane_b32 v1, s30, 1 ; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0 ; GCN-NEXT: s_mov_b32 s4, 2.0 -; GCN-NEXT: s_mov_b64 s[36:37], s[30:31] -; GCN-NEXT: s_getpc_b64 s[30:31] -; GCN-NEXT: s_add_u32 s30, s30, callee@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s31, s31, callee@rel32@hi+12 -; GCN-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GCN-NEXT: v_writelane_b32 v1, s31, 2 +; GCN-NEXT: s_getpc_b64 s[34:35] +; GCN-NEXT: s_add_u32 s34, s34, callee@rel32@lo+4 +; GCN-NEXT: s_addc_u32 s35, s35, callee@rel32@hi+12 +; GCN-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GCN-NEXT: v_readlane_b32 s31, v1, 2 +; GCN-NEXT: v_readlane_b32 s30, v1, 1 ; GCN-NEXT: v_readlane_b32 s4, v1, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v1, 1 -; GCN-NEXT: s_or_saveexec_b64 s[30:31], -1 +; GCN-NEXT: v_readlane_b32 s33, v1, 3 +; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[30:31] +; GCN-NEXT: s_mov_b64 exec, s[34:35] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[36:37] +; GCN-NEXT: s_setpc_b64 s[30:31] %add = fadd float %arg0, 1.0 %call = tail call amdgpu_gfx float @callee(float %add, float inreg 2.0) ret float %call diff --git a/llvm/test/CodeGen/AMDGPU/udiv.ll b/llvm/test/CodeGen/AMDGPU/udiv.ll index d678b64f54b6..b991d6cd5149 100644 --- a/llvm/test/CodeGen/AMDGPU/udiv.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv.ll @@ -2500,7 +2500,7 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) { ; SI-NEXT: v_madak_f32 v2, 0, v2, 0x47c35000 ; SI-NEXT: v_rcp_f32_e32 v2, v2 ; SI-NEXT: s_mov_b32 s4, 0xfffe7960 -; SI-NEXT: v_mov_b32_e32 v9, 0 +; SI-NEXT: v_mov_b32_e32 v8, 0 ; SI-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; SI-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; SI-NEXT: v_trunc_f32_e32 v3, v3 @@ -2508,22 +2508,22 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) { ; SI-NEXT: v_cvt_u32_f32_e32 v2, v2 ; SI-NEXT: v_cvt_u32_f32_e32 v3, v3 ; SI-NEXT: v_mul_hi_u32 v4, v2, s4 -; SI-NEXT: v_mul_lo_u32 v5, v3, s4 -; SI-NEXT: v_mul_lo_u32 v6, v2, s4 +; SI-NEXT: v_mul_lo_u32 v6, v3, s4 +; SI-NEXT: v_mul_lo_u32 v5, v2, s4 ; SI-NEXT: v_subrev_i32_e32 v4, vcc, v2, v4 -; SI-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; SI-NEXT: v_mul_lo_u32 v5, v2, v4 -; SI-NEXT: v_mul_hi_u32 v7, v2, v6 -; SI-NEXT: v_mul_hi_u32 v8, v2, v4 +; SI-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; SI-NEXT: v_mul_hi_u32 v7, v2, v5 +; SI-NEXT: v_mul_lo_u32 v6, v2, v4 +; SI-NEXT: v_mul_hi_u32 v9, v2, v4 ; SI-NEXT: v_mul_hi_u32 v10, v3, v4 ; SI-NEXT: v_mul_lo_u32 v4, v3, v4 -; SI-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; SI-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc -; SI-NEXT: v_mul_lo_u32 v8, v3, v6 -; SI-NEXT: v_mul_hi_u32 v6, v3, v6 -; SI-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; SI-NEXT: v_addc_u32_e32 v5, vcc, v7, v6, vcc -; SI-NEXT: v_addc_u32_e32 v6, vcc, v10, v9, vcc +; SI-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; SI-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc +; SI-NEXT: v_mul_lo_u32 v9, v3, v5 +; SI-NEXT: v_mul_hi_u32 v5, v3, v5 +; SI-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; SI-NEXT: v_addc_u32_e32 v5, vcc, v7, v5, vcc +; SI-NEXT: v_addc_u32_e32 v6, vcc, v10, v8, vcc ; SI-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; SI-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; SI-NEXT: v_add_i32_e32 v2, vcc, v2, v4 @@ -2536,16 +2536,16 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) { ; SI-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; SI-NEXT: v_mul_lo_u32 v5, v2, v4 ; SI-NEXT: v_mul_hi_u32 v7, v2, v6 -; SI-NEXT: v_mul_hi_u32 v8, v2, v4 +; SI-NEXT: v_mul_hi_u32 v9, v2, v4 ; SI-NEXT: v_mul_hi_u32 v10, v3, v4 ; SI-NEXT: v_mul_lo_u32 v4, v3, v4 ; SI-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; SI-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc -; SI-NEXT: v_mul_lo_u32 v8, v3, v6 +; SI-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc +; SI-NEXT: v_mul_lo_u32 v9, v3, v6 ; SI-NEXT: v_mul_hi_u32 v6, v3, v6 -; SI-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; SI-NEXT: v_add_i32_e32 v5, vcc, v5, v9 ; SI-NEXT: v_addc_u32_e32 v5, vcc, v7, v6, vcc -; SI-NEXT: v_addc_u32_e32 v6, vcc, v10, v9, vcc +; SI-NEXT: v_addc_u32_e32 v6, vcc, v10, v8, vcc ; SI-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; SI-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; SI-NEXT: v_add_i32_e32 v2, vcc, v2, v4 @@ -2561,7 +2561,7 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) { ; SI-NEXT: v_mul_hi_u32 v2, v1, v2 ; SI-NEXT: v_add_i32_e32 v4, vcc, v4, v6 ; SI-NEXT: v_addc_u32_e32 v2, vcc, v5, v2, vcc -; SI-NEXT: v_addc_u32_e32 v4, vcc, v7, v9, vcc +; SI-NEXT: v_addc_u32_e32 v4, vcc, v7, v8, vcc ; SI-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; SI-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; SI-NEXT: v_mul_lo_u32 v4, v3, s4 @@ -2610,15 +2610,15 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) { ; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0 ; VI-NEXT: v_mul_lo_u32 v4, v7, s6 ; VI-NEXT: v_subrev_u32_e32 v3, vcc, v6, v3 -; VI-NEXT: v_add_u32_e32 v5, vcc, v4, v3 -; VI-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0 -; VI-NEXT: v_mul_hi_u32 v8, v6, v2 -; VI-NEXT: v_add_u32_e32 v8, vcc, v8, v3 +; VI-NEXT: v_add_u32_e32 v8, vcc, v4, v3 +; VI-NEXT: v_mul_hi_u32 v5, v6, v2 +; VI-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v8, 0 +; VI-NEXT: v_add_u32_e32 v10, vcc, v5, v3 ; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v2, 0 -; VI-NEXT: v_addc_u32_e32 v10, vcc, 0, v4, vcc -; VI-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v5, 0 -; VI-NEXT: v_add_u32_e32 v2, vcc, v8, v2 -; VI-NEXT: v_addc_u32_e32 v2, vcc, v10, v3, vcc +; VI-NEXT: v_addc_u32_e32 v11, vcc, 0, v4, vcc +; VI-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v8, 0 +; VI-NEXT: v_add_u32_e32 v2, vcc, v10, v2 +; VI-NEXT: v_addc_u32_e32 v2, vcc, v11, v3, vcc ; VI-NEXT: v_addc_u32_e32 v3, vcc, v5, v9, vcc ; VI-NEXT: v_add_u32_e32 v2, vcc, v2, v4 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc @@ -2698,15 +2698,15 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) { ; GCN-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0 ; GCN-NEXT: v_mul_lo_u32 v4, v7, s6 ; GCN-NEXT: v_subrev_u32_e32 v3, vcc, v6, v3 -; GCN-NEXT: v_add_u32_e32 v5, vcc, v4, v3 -; GCN-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0 -; GCN-NEXT: v_mul_hi_u32 v8, v6, v2 -; GCN-NEXT: v_add_u32_e32 v8, vcc, v8, v3 +; GCN-NEXT: v_add_u32_e32 v8, vcc, v4, v3 +; GCN-NEXT: v_mul_hi_u32 v5, v6, v2 +; GCN-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v8, 0 +; GCN-NEXT: v_add_u32_e32 v10, vcc, v5, v3 ; GCN-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v2, 0 -; GCN-NEXT: v_addc_u32_e32 v10, vcc, 0, v4, vcc -; GCN-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v5, 0 -; GCN-NEXT: v_add_u32_e32 v2, vcc, v8, v2 -; GCN-NEXT: v_addc_u32_e32 v2, vcc, v10, v3, vcc +; GCN-NEXT: v_addc_u32_e32 v11, vcc, 0, v4, vcc +; GCN-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v8, 0 +; GCN-NEXT: v_add_u32_e32 v2, vcc, v10, v2 +; GCN-NEXT: v_addc_u32_e32 v2, vcc, v11, v3, vcc ; GCN-NEXT: v_addc_u32_e32 v3, vcc, v5, v9, vcc ; GCN-NEXT: v_add_u32_e32 v2, vcc, v2, v4 ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll index c124446beed9..1f9b7437e8ba 100644 --- a/llvm/test/CodeGen/AMDGPU/udiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll @@ -230,10 +230,10 @@ define i64 @v_test_udiv_i64(i64 %x, i64 %y) { ; GCN-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 ; GCN-NEXT: v_cvt_u32_f32_e32 v5, v5 ; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GCN-NEXT: v_mul_lo_u32 v9, v6, v5 -; GCN-NEXT: v_mul_hi_u32 v8, v6, v4 +; GCN-NEXT: v_mul_lo_u32 v8, v6, v5 +; GCN-NEXT: v_mul_hi_u32 v9, v6, v4 ; GCN-NEXT: v_mul_lo_u32 v10, v7, v4 -; GCN-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; GCN-NEXT: v_add_i32_e32 v8, vcc, v9, v8 ; GCN-NEXT: v_mul_lo_u32 v9, v6, v4 ; GCN-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; GCN-NEXT: v_mul_lo_u32 v10, v4, v8 @@ -1558,7 +1558,7 @@ define i64 @v_test_udiv_k_den_i64(i64 %x) { ; GCN-NEXT: v_madak_f32 v2, 0, v2, 0x41c00000 ; GCN-NEXT: v_rcp_f32_e32 v2, v2 ; GCN-NEXT: s_movk_i32 s4, 0xffe8 -; GCN-NEXT: v_mov_b32_e32 v9, 0 +; GCN-NEXT: v_mov_b32_e32 v8, 0 ; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; GCN-NEXT: v_trunc_f32_e32 v3, v3 @@ -1566,22 +1566,22 @@ define i64 @v_test_udiv_k_den_i64(i64 %x) { ; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2 ; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3 ; GCN-NEXT: v_mul_hi_u32 v4, v2, s4 -; GCN-NEXT: v_mul_lo_u32 v5, v3, s4 -; GCN-NEXT: v_mul_lo_u32 v6, v2, s4 +; GCN-NEXT: v_mul_lo_u32 v6, v3, s4 +; GCN-NEXT: v_mul_lo_u32 v5, v2, s4 ; GCN-NEXT: v_subrev_i32_e32 v4, vcc, v2, v4 -; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; GCN-NEXT: v_mul_lo_u32 v5, v2, v4 -; GCN-NEXT: v_mul_hi_u32 v7, v2, v6 -; GCN-NEXT: v_mul_hi_u32 v8, v2, v4 +; GCN-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; GCN-NEXT: v_mul_hi_u32 v7, v2, v5 +; GCN-NEXT: v_mul_lo_u32 v6, v2, v4 +; GCN-NEXT: v_mul_hi_u32 v9, v2, v4 ; GCN-NEXT: v_mul_hi_u32 v10, v3, v4 ; GCN-NEXT: v_mul_lo_u32 v4, v3, v4 -; GCN-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc -; GCN-NEXT: v_mul_lo_u32 v8, v3, v6 -; GCN-NEXT: v_mul_hi_u32 v6, v3, v6 -; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; GCN-NEXT: v_addc_u32_e32 v5, vcc, v7, v6, vcc -; GCN-NEXT: v_addc_u32_e32 v6, vcc, v10, v9, vcc +; GCN-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc +; GCN-NEXT: v_mul_lo_u32 v9, v3, v5 +; GCN-NEXT: v_mul_hi_u32 v5, v3, v5 +; GCN-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; GCN-NEXT: v_addc_u32_e32 v5, vcc, v7, v5, vcc +; GCN-NEXT: v_addc_u32_e32 v6, vcc, v10, v8, vcc ; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 @@ -1593,16 +1593,16 @@ define i64 @v_test_udiv_k_den_i64(i64 %x) { ; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; GCN-NEXT: v_mul_lo_u32 v5, v2, v4 ; GCN-NEXT: v_mul_hi_u32 v7, v2, v6 -; GCN-NEXT: v_mul_hi_u32 v8, v2, v4 +; GCN-NEXT: v_mul_hi_u32 v9, v2, v4 ; GCN-NEXT: v_mul_hi_u32 v10, v3, v4 ; GCN-NEXT: v_mul_lo_u32 v4, v3, v4 ; GCN-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc -; GCN-NEXT: v_mul_lo_u32 v8, v3, v6 +; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc +; GCN-NEXT: v_mul_lo_u32 v9, v3, v6 ; GCN-NEXT: v_mul_hi_u32 v6, v3, v6 -; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v9 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v7, v6, vcc -; GCN-NEXT: v_addc_u32_e32 v6, vcc, v10, v9, vcc +; GCN-NEXT: v_addc_u32_e32 v6, vcc, v10, v8, vcc ; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 @@ -1618,7 +1618,7 @@ define i64 @v_test_udiv_k_den_i64(i64 %x) { ; GCN-NEXT: v_mul_hi_u32 v2, v1, v2 ; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v6 ; GCN-NEXT: v_addc_u32_e32 v2, vcc, v5, v2, vcc -; GCN-NEXT: v_addc_u32_e32 v4, vcc, v7, v9, vcc +; GCN-NEXT: v_addc_u32_e32 v4, vcc, v7, v8, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; GCN-NEXT: v_mul_lo_u32 v4, v3, 24 diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll index 347b84513a53..f20504019f1f 100644 --- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll +++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll @@ -51,15 +51,15 @@ define hidden void @widget() { ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: .LBB0_7: ; %UnifiedReturnBlock -; GCN-NEXT: v_readlane_b32 s4, v40, 0 -; GCN-NEXT: v_readlane_b32 s5, v40, 1 +; GCN-NEXT: v_readlane_b32 s31, v40, 1 +; GCN-NEXT: v_readlane_b32 s30, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[4:5] +; GCN-NEXT: s_setpc_b64 s[30:31] ; SI-OPT-LABEL: @widget( ; SI-OPT-NEXT: bb: ; SI-OPT-NEXT: [[TMP:%.*]] = load i32, i32 addrspace(1)* null, align 16 @@ -188,7 +188,7 @@ define hidden void @blam() { ; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: v_writelane_b32 v40, s33, 15 +; GCN-NEXT: v_writelane_b32 v40, s33, 17 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x800 ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill @@ -196,21 +196,23 @@ define hidden void @blam() { ; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_writelane_b32 v40, s34, 0 -; GCN-NEXT: v_writelane_b32 v40, s35, 1 -; GCN-NEXT: v_writelane_b32 v40, s36, 2 -; GCN-NEXT: v_writelane_b32 v40, s37, 3 -; GCN-NEXT: v_writelane_b32 v40, s38, 4 -; GCN-NEXT: v_writelane_b32 v40, s39, 5 -; GCN-NEXT: v_writelane_b32 v40, s40, 6 -; GCN-NEXT: v_writelane_b32 v40, s41, 7 -; GCN-NEXT: v_writelane_b32 v40, s42, 8 -; GCN-NEXT: v_writelane_b32 v40, s43, 9 -; GCN-NEXT: v_writelane_b32 v40, s44, 10 -; GCN-NEXT: v_writelane_b32 v40, s45, 11 -; GCN-NEXT: v_writelane_b32 v40, s46, 12 -; GCN-NEXT: v_writelane_b32 v40, s48, 13 -; GCN-NEXT: v_writelane_b32 v40, s49, 14 +; GCN-NEXT: v_writelane_b32 v40, s30, 0 +; GCN-NEXT: v_writelane_b32 v40, s31, 1 +; GCN-NEXT: v_writelane_b32 v40, s34, 2 +; GCN-NEXT: v_writelane_b32 v40, s35, 3 +; GCN-NEXT: v_writelane_b32 v40, s36, 4 +; GCN-NEXT: v_writelane_b32 v40, s37, 5 +; GCN-NEXT: v_writelane_b32 v40, s38, 6 +; GCN-NEXT: v_writelane_b32 v40, s39, 7 +; GCN-NEXT: v_writelane_b32 v40, s40, 8 +; GCN-NEXT: v_writelane_b32 v40, s41, 9 +; GCN-NEXT: v_writelane_b32 v40, s42, 10 +; GCN-NEXT: v_writelane_b32 v40, s43, 11 +; GCN-NEXT: v_writelane_b32 v40, s44, 12 +; GCN-NEXT: v_writelane_b32 v40, s45, 13 +; GCN-NEXT: v_writelane_b32 v40, s46, 14 +; GCN-NEXT: v_writelane_b32 v40, s48, 15 +; GCN-NEXT: v_writelane_b32 v40, s49, 16 ; GCN-NEXT: v_mov_b32_e32 v41, v31 ; GCN-NEXT: s_mov_b32 s44, s14 ; GCN-NEXT: s_mov_b32 s45, s13 diff --git a/llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll index 79723603f1c1..ba3d120be9ba 100644 --- a/llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll @@ -76,22 +76,21 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { ; CHECK-NEXT: s_mov_b32 s5, 0x8311eb33 ; CHECK-NEXT: s_mov_b32 s6, 0x20140c ; CHECK-NEXT: s_mov_b32 s7, 0xb6db6db7 -; CHECK-NEXT: s_mov_b32 s8, 0x49249249 -; CHECK-NEXT: s_mov_b32 s9, 0x24924924 -; CHECK-NEXT: s_mov_b32 s10, 0xaaaaaaab -; CHECK-NEXT: s_mov_b32 s11, 0x2aaaaaaa +; CHECK-NEXT: s_mov_b32 s8, 0x24924924 +; CHECK-NEXT: s_mov_b32 s9, 0xaaaaaaab +; CHECK-NEXT: s_mov_b32 s10, 0x2aaaaaaa ; CHECK-NEXT: v_and_b32_e32 v0, s4, v0 ; CHECK-NEXT: v_and_b32_e32 v1, s4, v1 ; CHECK-NEXT: v_and_b32_e32 v2, s4, v2 ; CHECK-NEXT: v_mul_lo_u32 v2, v2, s5 ; CHECK-NEXT: v_mul_lo_u32 v1, v1, s7 -; CHECK-NEXT: v_mul_lo_u32 v0, v0, s10 +; CHECK-NEXT: v_mul_lo_u32 v0, v0, s9 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0xf9dc299a, v2 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, s8, v1 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, 0x49249249, v1 ; CHECK-NEXT: v_alignbit_b32 v0, v0, v0, 1 -; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s11, v0 +; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s10, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s9, v1 +; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s8, v1 ; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc ; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s6, v2 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll index 00bae7afa10f..b14825871cea 100644 --- a/llvm/test/CodeGen/AMDGPU/urem64.ll +++ b/llvm/test/CodeGen/AMDGPU/urem64.ll @@ -240,10 +240,10 @@ define i64 @v_test_urem_i64(i64 %x, i64 %y) { ; GCN-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 ; GCN-NEXT: v_cvt_u32_f32_e32 v5, v5 ; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GCN-NEXT: v_mul_lo_u32 v9, v6, v5 -; GCN-NEXT: v_mul_hi_u32 v8, v6, v4 +; GCN-NEXT: v_mul_lo_u32 v8, v6, v5 +; GCN-NEXT: v_mul_hi_u32 v9, v6, v4 ; GCN-NEXT: v_mul_lo_u32 v10, v7, v4 -; GCN-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; GCN-NEXT: v_add_i32_e32 v8, vcc, v9, v8 ; GCN-NEXT: v_mul_lo_u32 v9, v6, v4 ; GCN-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; GCN-NEXT: v_mul_lo_u32 v10, v4, v8 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll index 9b4bc71eed41..46ee737d3125 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll @@ -29,7 +29,8 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9: v_writelane_b32 v40, s30, 0 +; GFX9: v_writelane_b32 v40, s31, 1 ; GFX9: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -37,8 +38,10 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload +; GFX9: v_readlane_b32 s31, v40, 1 +; GFX9: v_readlane_b32 s30, v40, 0 ; GFX9: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX9: s_setpc_b64 s[4:5] +; GFX9: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: non_preserved_vgpr_tuple8: ; GFX10: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill @@ -63,7 +66,9 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 +; GFX10: v_writelane_b32 v40, s30, 0 ; GFX10: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GFX10: v_writelane_b32 v40, s31, 1 ; GFX10: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -72,8 +77,10 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:12 +; GFX10: v_readlane_b32 s31, v40, 1 +; GFX10: v_readlane_b32 s30, v40, 0 ; GFX10: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX10: s_setpc_b64 s[4:5] +; GFX10: s_setpc_b64 s[30:31] main_body: call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0 call void asm sideeffect "", "~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15}"() #0 @@ -92,6 +99,8 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-LABEL: call_preserved_vgpr_tuple8: ; GFX9: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill +; GFX9: v_writelane_b32 v40, s30, 0 +; GFX9: v_writelane_b32 v40, s31, 1 ; GFX9: buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill @@ -110,7 +119,7 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -122,8 +131,10 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload +; GFX9: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9: buffer_load_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GFX9: s_setpc_b64 s[4:5] +; GFX9: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: call_preserved_vgpr_tuple8: ; GFX10: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -133,15 +144,20 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v45, off, s[0:3], s33 ; 4-byte Folded Spill +; GFX10: v_writelane_b32 v40, s30, 0 +; GFX10: v_mov_b32_e32 v41, v16 +; GFX10: v_mov_b32_e32 v42, v15 +; GFX10: v_mov_b32_e32 v43, v14 +; GFX10: v_mov_b32_e32 v44, v13 +; GFX10: v_writelane_b32 v40, s31, 1 +; GFX10: v_mov_b32_e32 v45, v12 ; GFX10: image_gather4_c_b_cl v[0:3], v[12:16], s[36:43], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX10-NEXT: v_writelane_b32 v40, s31, 9 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -153,8 +169,10 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:8 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:12 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:16 +; GFX10: v_readlane_b32 s31, v40, 1 +; GFX10: v_readlane_b32 s30, v40, 0 ; GFX10: buffer_load_dword v40, off, s[0:3], s32 offset:20 -; GFX10: s_setpc_b64 s[4:5] +; GFX10: s_setpc_b64 s[30:31] main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) store <4 x float> %v, <4 x float> addrspace(1)* undef diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index fcdbeea76fa2..eecff44dec3c 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -1136,8 +1136,8 @@ declare void @external_void_func_void() #1 ; GCN-DAG: v_writelane_b32 v40, s30, 0 ; GCN-DAG: v_writelane_b32 v40, s31, 1 ; GCN: s_swappc_b64 -; GCN-DAG: v_readlane_b32 s4, v40, 0 -; GCN-DAG: v_readlane_b32 s5, v40, 1 +; GCN-DAG: v_readlane_b32 s30, v40, 0 +; GCN-DAG: v_readlane_b32 s31, v40, 1 ; GFX1064: s_addk_i32 s32, 0xfc00 diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll index 6a079bc4e52d..c01485cc3e8e 100644 --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll @@ -138,8 +138,6 @@ define amdgpu_gfx void @strict_wwm_cfg(<4 x i32> inreg %tmp14, i32 %arg) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-O0-NEXT: v_writelane_b32 v5, s30, 0 -; GFX9-O0-NEXT: v_writelane_b32 v5, s31, 1 ; GFX9-O0-NEXT: s_mov_b32 s36, s4 ; GFX9-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 def $sgpr36_sgpr37_sgpr38_sgpr39 ; GFX9-O0-NEXT: s_mov_b32 s37, s5 @@ -147,37 +145,37 @@ define amdgpu_gfx void @strict_wwm_cfg(<4 x i32> inreg %tmp14, i32 %arg) { ; GFX9-O0-NEXT: s_mov_b32 s39, s7 ; GFX9-O0-NEXT: s_mov_b64 s[42:43], s[38:39] ; GFX9-O0-NEXT: s_mov_b64 s[40:41], s[36:37] -; GFX9-O0-NEXT: v_writelane_b32 v5, s40, 2 -; GFX9-O0-NEXT: v_writelane_b32 v5, s41, 3 -; GFX9-O0-NEXT: v_writelane_b32 v5, s42, 4 -; GFX9-O0-NEXT: v_writelane_b32 v5, s43, 5 -; GFX9-O0-NEXT: s_mov_b32 s30, 0 -; GFX9-O0-NEXT: buffer_load_dwordx2 v[3:4], off, s[36:39], s30 +; GFX9-O0-NEXT: v_writelane_b32 v5, s40, 0 +; GFX9-O0-NEXT: v_writelane_b32 v5, s41, 1 +; GFX9-O0-NEXT: v_writelane_b32 v5, s42, 2 +; GFX9-O0-NEXT: v_writelane_b32 v5, s43, 3 +; GFX9-O0-NEXT: s_mov_b32 s34, 0 +; GFX9-O0-NEXT: buffer_load_dwordx2 v[3:4], off, s[36:39], s34 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-O0-NEXT: ; implicit-def: $sgpr34_sgpr35 +; GFX9-O0-NEXT: ; implicit-def: $sgpr36_sgpr37 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s30 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s34 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s30 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[36:37], -1 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s34 ; GFX9-O0-NEXT: s_nop 1 ; GFX9-O0-NEXT: v_mov_b32_dpp v2, v1 row_bcast:31 row_mask:0xc bank_mask:0xf ; GFX9-O0-NEXT: v_add_u32_e64 v1, v1, v2 -; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-O0-NEXT: s_mov_b64 exec, s[36:37] ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[34:35], v0, s30 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, s30 +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[36:37], v0, s34 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s34 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_mov_b64 s[30:31], exec -; GFX9-O0-NEXT: v_writelane_b32 v5, s30, 6 -; GFX9-O0-NEXT: v_writelane_b32 v5, s31, 7 -; GFX9-O0-NEXT: s_and_b64 s[30:31], s[30:31], s[34:35] -; GFX9-O0-NEXT: s_mov_b64 exec, s[30:31] +; GFX9-O0-NEXT: s_mov_b64 s[34:35], exec +; GFX9-O0-NEXT: v_writelane_b32 v5, s34, 4 +; GFX9-O0-NEXT: v_writelane_b32 v5, s35, 5 +; GFX9-O0-NEXT: s_and_b64 s[34:35], s[34:35], s[36:37] +; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: s_cbranch_execz .LBB1_2 ; GFX9-O0-NEXT: ; %bb.1: ; %if ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload @@ -185,29 +183,27 @@ define amdgpu_gfx void @strict_wwm_cfg(<4 x i32> inreg %tmp14, i32 %arg) { ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[30:31], -1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-O0-NEXT: s_mov_b64 exec, s[30:31] +; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 ; GFX9-O0-NEXT: s_not_b64 exec, exec ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: s_or_saveexec_b64 s[30:31], -1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O0-NEXT: v_mov_b32_dpp v1, v2 row_bcast:31 row_mask:0xc bank_mask:0xf ; GFX9-O0-NEXT: v_add_u32_e64 v1, v2, v1 -; GFX9-O0-NEXT: s_mov_b64 exec, s[30:31] +; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O0-NEXT: .LBB1_2: ; %merge -; GFX9-O0-NEXT: v_readlane_b32 s34, v5, 6 -; GFX9-O0-NEXT: v_readlane_b32 s35, v5, 7 +; GFX9-O0-NEXT: v_readlane_b32 s34, v5, 4 +; GFX9-O0-NEXT: v_readlane_b32 s35, v5, 5 ; GFX9-O0-NEXT: s_or_b64 exec, exec, s[34:35] -; GFX9-O0-NEXT: v_readlane_b32 s30, v5, 0 -; GFX9-O0-NEXT: v_readlane_b32 s31, v5, 1 -; GFX9-O0-NEXT: v_readlane_b32 s36, v5, 2 -; GFX9-O0-NEXT: v_readlane_b32 s37, v5, 3 -; GFX9-O0-NEXT: v_readlane_b32 s38, v5, 4 -; GFX9-O0-NEXT: v_readlane_b32 s39, v5, 5 +; GFX9-O0-NEXT: v_readlane_b32 s36, v5, 0 +; GFX9-O0-NEXT: v_readlane_b32 s37, v5, 1 +; GFX9-O0-NEXT: v_readlane_b32 s38, v5, 2 +; GFX9-O0-NEXT: v_readlane_b32 s39, v5, 3 ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) @@ -361,22 +357,22 @@ define amdgpu_gfx void @strict_wwm_call(<4 x i32> inreg %tmp14, i32 inreg %arg) ; GFX9-O0-NEXT: v_mov_b32_e32 v2, s34 ; GFX9-O0-NEXT: s_not_b64 exec, exec ; GFX9-O0-NEXT: s_or_saveexec_b64 s[40:41], -1 -; GFX9-O0-NEXT: s_getpc_b64 s[30:31] -; GFX9-O0-NEXT: s_add_u32 s30, s30, strict_wwm_called@rel32@lo+4 -; GFX9-O0-NEXT: s_addc_u32 s31, s31, strict_wwm_called@rel32@hi+12 +; GFX9-O0-NEXT: s_getpc_b64 s[42:43] +; GFX9-O0-NEXT: s_add_u32 s42, s42, strict_wwm_called@rel32@lo+4 +; GFX9-O0-NEXT: s_addc_u32 s43, s43, strict_wwm_called@rel32@hi+12 ; GFX9-O0-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-O0-NEXT: s_mov_b64 s[44:45], s[0:1] ; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[44:45] ; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[46:47] ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2 -; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[30:31] -; GFX9-O0-NEXT: v_readlane_b32 s30, v3, 0 -; GFX9-O0-NEXT: v_readlane_b32 s31, v3, 1 +; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[42:43] ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-O0-NEXT: v_add_u32_e64 v1, v1, v2 ; GFX9-O0-NEXT: s_mov_b64 exec, s[40:41] ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[36:39], s34 offset:4 +; GFX9-O0-NEXT: v_readlane_b32 s31, v3, 1 +; GFX9-O0-NEXT: v_readlane_b32 s30, v3, 0 ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffffc00 ; GFX9-O0-NEXT: v_readlane_b32 s33, v3, 2 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -391,36 +387,41 @@ define amdgpu_gfx void @strict_wwm_call(<4 x i32> inreg %tmp14, i32 inreg %arg) ; GFX9-O3: ; %bb.0: ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O3-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-O3-NEXT: s_mov_b32 s38, s33 +; GFX9-O3-NEXT: v_writelane_b32 v3, s33, 2 +; GFX9-O3-NEXT: v_writelane_b32 v3, s30, 0 ; GFX9-O3-NEXT: s_mov_b32 s33, s32 ; GFX9-O3-NEXT: s_addk_i32 s32, 0x400 -; GFX9-O3-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX9-O3-NEXT: v_writelane_b32 v3, s31, 1 ; GFX9-O3-NEXT: v_mov_b32_e32 v2, s8 ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2 -; GFX9-O3-NEXT: s_getpc_b64 s[30:31] -; GFX9-O3-NEXT: s_add_u32 s30, s30, strict_wwm_called@rel32@lo+4 -; GFX9-O3-NEXT: s_addc_u32 s31, s31, strict_wwm_called@rel32@hi+12 -; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-O3-NEXT: s_getpc_b64 s[36:37] +; GFX9-O3-NEXT: s_add_u32 s36, s36, strict_wwm_called@rel32@lo+4 +; GFX9-O3-NEXT: s_addc_u32 s37, s37, strict_wwm_called@rel32@hi+12 +; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[36:37] ; GFX9-O3-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-O3-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O3-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4 +; GFX9-O3-NEXT: v_readlane_b32 s31, v3, 1 +; GFX9-O3-NEXT: v_readlane_b32 s30, v3, 0 ; GFX9-O3-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-O3-NEXT: s_mov_b32 s33, s38 -; GFX9-O3-NEXT: s_or_saveexec_b64 s[30:31], -1 +; GFX9-O3-NEXT: v_readlane_b32 s33, v3, 2 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O3-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-O3-NEXT: s_mov_b64 exec, s[30:31] +; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) -; GFX9-O3-NEXT: s_setpc_b64 s[36:37] +; GFX9-O3-NEXT: s_setpc_b64 s[30:31] %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0) %tmp134 = call amdgpu_gfx i32 @strict_wwm_called(i32 %tmp107) %tmp136 = add i32 %tmp134, %tmp107 @@ -534,39 +535,39 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xc00 ; GFX9-O0-NEXT: v_writelane_b32 v10, s30, 0 ; GFX9-O0-NEXT: v_writelane_b32 v10, s31, 1 -; GFX9-O0-NEXT: s_mov_b32 s34, s8 -; GFX9-O0-NEXT: s_mov_b32 s36, s4 -; GFX9-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 def $sgpr36_sgpr37_sgpr38_sgpr39 -; GFX9-O0-NEXT: s_mov_b32 s37, s5 -; GFX9-O0-NEXT: s_mov_b32 s38, s6 -; GFX9-O0-NEXT: s_mov_b32 s39, s7 -; GFX9-O0-NEXT: v_writelane_b32 v10, s36, 2 -; GFX9-O0-NEXT: v_writelane_b32 v10, s37, 3 -; GFX9-O0-NEXT: v_writelane_b32 v10, s38, 4 -; GFX9-O0-NEXT: v_writelane_b32 v10, s39, 5 -; GFX9-O0-NEXT: ; kill: def $sgpr34 killed $sgpr34 def $sgpr34_sgpr35 -; GFX9-O0-NEXT: s_mov_b32 s35, s9 -; GFX9-O0-NEXT: ; kill: def $sgpr30_sgpr31 killed $sgpr34_sgpr35 -; GFX9-O0-NEXT: s_mov_b64 s[30:31], 0 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, s34 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s35 +; GFX9-O0-NEXT: s_mov_b32 s36, s8 +; GFX9-O0-NEXT: s_mov_b32 s40, s4 +; GFX9-O0-NEXT: ; kill: def $sgpr40 killed $sgpr40 def $sgpr40_sgpr41_sgpr42_sgpr43 +; GFX9-O0-NEXT: s_mov_b32 s41, s5 +; GFX9-O0-NEXT: s_mov_b32 s42, s6 +; GFX9-O0-NEXT: s_mov_b32 s43, s7 +; GFX9-O0-NEXT: v_writelane_b32 v10, s40, 2 +; GFX9-O0-NEXT: v_writelane_b32 v10, s41, 3 +; GFX9-O0-NEXT: v_writelane_b32 v10, s42, 4 +; GFX9-O0-NEXT: v_writelane_b32 v10, s43, 5 +; GFX9-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 def $sgpr36_sgpr37 +; GFX9-O0-NEXT: s_mov_b32 s37, s9 +; GFX9-O0-NEXT: ; kill: def $sgpr34_sgpr35 killed $sgpr36_sgpr37 +; GFX9-O0-NEXT: s_mov_b64 s[34:35], 0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s36 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s37 ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v1 ; GFX9-O0-NEXT: v_mov_b32_e32 v8, v0 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v8, s30 -; GFX9-O0-NEXT: v_mov_b32_e32 v9, s31 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, s34 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, s35 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: s_or_saveexec_b64 s[30:31], -1 -; GFX9-O0-NEXT: v_writelane_b32 v10, s30, 6 -; GFX9-O0-NEXT: v_writelane_b32 v10, s31, 7 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O0-NEXT: v_writelane_b32 v10, s34, 6 +; GFX9-O0-NEXT: v_writelane_b32 v10, s35, 7 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8 -; GFX9-O0-NEXT: s_mov_b32 s30, 32 -; GFX9-O0-NEXT: ; implicit-def: $sgpr34_sgpr35 -; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s30, v[8:9] -; GFX9-O0-NEXT: s_getpc_b64 s[30:31] -; GFX9-O0-NEXT: s_add_u32 s30, s30, strict_wwm_called_i64@gotpcrel32@lo+4 -; GFX9-O0-NEXT: s_addc_u32 s31, s31, strict_wwm_called_i64@gotpcrel32@hi+12 -; GFX9-O0-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-O0-NEXT: s_mov_b32 s34, 32 +; GFX9-O0-NEXT: ; implicit-def: $sgpr36_sgpr37 +; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s34, v[8:9] +; GFX9-O0-NEXT: s_getpc_b64 s[34:35] +; GFX9-O0-NEXT: s_add_u32 s34, s34, strict_wwm_called_i64@gotpcrel32@lo+4 +; GFX9-O0-NEXT: s_addc_u32 s35, s35, strict_wwm_called_i64@gotpcrel32@hi+12 +; GFX9-O0-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-O0-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-O0-NEXT: s_mov_b64 s[36:37], s[0:1] ; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -574,15 +575,13 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-O0-NEXT: v_readlane_b32 s34, v10, 6 ; GFX9-O0-NEXT: v_readlane_b32 s35, v10, 7 ; GFX9-O0-NEXT: v_readlane_b32 s36, v10, 2 ; GFX9-O0-NEXT: v_readlane_b32 s37, v10, 3 ; GFX9-O0-NEXT: v_readlane_b32 s38, v10, 4 ; GFX9-O0-NEXT: v_readlane_b32 s39, v10, 5 -; GFX9-O0-NEXT: v_readlane_b32 s30, v10, 0 -; GFX9-O0-NEXT: v_readlane_b32 s31, v10, 1 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 @@ -594,6 +593,8 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: s_mov_b32 s34, 0 ; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1], off, s[36:39], s34 offset:4 +; GFX9-O0-NEXT: v_readlane_b32 s31, v10, 1 +; GFX9-O0-NEXT: v_readlane_b32 s30, v10, 0 ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffff400 ; GFX9-O0-NEXT: v_readlane_b32 s33, v10, 8 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -625,6 +626,7 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O3: ; %bb.0: ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-O3-NEXT: buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) ; GFX9-O3-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill @@ -634,18 +636,19 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) ; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-O3-NEXT: s_mov_b32 s40, s33 +; GFX9-O3-NEXT: v_writelane_b32 v8, s33, 2 +; GFX9-O3-NEXT: v_writelane_b32 v8, s30, 0 ; GFX9-O3-NEXT: s_mov_b32 s33, s32 ; GFX9-O3-NEXT: s_addk_i32 s32, 0x800 -; GFX9-O3-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX9-O3-NEXT: v_writelane_b32 v8, s31, 1 ; GFX9-O3-NEXT: v_mov_b32_e32 v6, s8 ; GFX9-O3-NEXT: v_mov_b32_e32 v7, s9 -; GFX9-O3-NEXT: s_or_saveexec_b64 s[30:31], -1 -; GFX9-O3-NEXT: s_getpc_b64 s[34:35] -; GFX9-O3-NEXT: s_add_u32 s34, s34, strict_wwm_called_i64@gotpcrel32@lo+4 -; GFX9-O3-NEXT: s_addc_u32 s35, s35, strict_wwm_called_i64@gotpcrel32@hi+12 -; GFX9-O3-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX9-O3-NEXT: s_mov_b64 exec, s[30:31] +; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: s_getpc_b64 s[36:37] +; GFX9-O3-NEXT: s_add_u32 s36, s36, strict_wwm_called_i64@gotpcrel32@lo+4 +; GFX9-O3-NEXT: s_addc_u32 s37, s37, strict_wwm_called_i64@gotpcrel32@hi+12 +; GFX9-O3-NEXT: s_load_dwordx2 s[36:37], s[36:37], 0x0 +; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-O3-NEXT: v_mov_b32_e32 v7, 0 @@ -654,7 +657,7 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v6 ; GFX9-O3-NEXT: v_mov_b32_e32 v1, v7 ; GFX9-O3-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[36:37] ; GFX9-O3-NEXT: v_mov_b32_e32 v2, v0 ; GFX9-O3-NEXT: v_mov_b32_e32 v3, v1 ; GFX9-O3-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 @@ -663,9 +666,13 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-O3-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O3-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 offset:4 +; GFX9-O3-NEXT: v_readlane_b32 s31, v8, 1 +; GFX9-O3-NEXT: v_readlane_b32 s30, v8, 0 ; GFX9-O3-NEXT: s_addk_i32 s32, 0xf800 -; GFX9-O3-NEXT: s_mov_b32 s33, s40 -; GFX9-O3-NEXT: s_or_saveexec_b64 s[30:31], -1 +; GFX9-O3-NEXT: v_readlane_b32 s33, v8, 2 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O3-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload +; GFX9-O3-NEXT: s_nop 0 ; GFX9-O3-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_nop 0 ; GFX9-O3-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -676,9 +683,9 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_nop 0 ; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GFX9-O3-NEXT: s_mov_b64 exec, s[30:31] +; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) -; GFX9-O3-NEXT: s_setpc_b64 s[36:37] +; GFX9-O3-NEXT: s_setpc_b64 s[30:31] %tmp107 = tail call i64 @llvm.amdgcn.set.inactive.i64(i64 %arg, i64 0) %tmp134 = call amdgpu_gfx i64 @strict_wwm_called_i64(i64 %tmp107) %tmp136 = add i64 %tmp134, %tmp107 diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir index a9fbe15b244e..0ef574c2b3f7 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir @@ -77,10 +77,9 @@ machineMetadataNodes: - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' body: | bb.0 (%ir-block.0): - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-LABEL: name: test_memcpy - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 @@ -96,11 +95,8 @@ body: | ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1 ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[COPY8]], killed [[COPY9]], 0, implicit $exec - ; CHECK: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; CHECK: $vgpr0 = COPY [[V_ADD_U32_e64_]] - ; CHECK: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] - ; CHECK: S_SETPC_B64_return [[COPY11]], implicit $vgpr0 - %4:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %3:vgpr_32 = COPY $vgpr3 %2:vgpr_32 = COPY $vgpr2 %1:vgpr_32 = COPY $vgpr1 @@ -116,10 +112,8 @@ body: | %13:vgpr_32 = COPY %11.sub0 %14:vgpr_32 = COPY %11.sub1 %15:vgpr_32 = V_ADD_U32_e64 killed %13, killed %14, 0, implicit $exec - %5:ccr_sgpr_64 = COPY %4 $vgpr0 = COPY %15 - %16:ccr_sgpr_64 = COPY %5 - S_SETPC_B64_return %16, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- @@ -134,10 +128,9 @@ machineMetadataNodes: - '!10 = !{!1, !9}' body: | bb.0 (%ir-block.0): - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; CHECK-LABEL: name: test_memcpy_inline - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 @@ -153,11 +146,8 @@ body: | ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1 ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[COPY8]], killed [[COPY9]], 0, implicit $exec - ; CHECK: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; CHECK: $vgpr0 = COPY [[V_ADD_U32_e64_]] - ; CHECK: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] - ; CHECK: S_SETPC_B64_return [[COPY11]], implicit $vgpr0 - %4:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %3:vgpr_32 = COPY $vgpr3 %2:vgpr_32 = COPY $vgpr2 %1:vgpr_32 = COPY $vgpr1 @@ -173,9 +163,7 @@ body: | %13:vgpr_32 = COPY %11.sub0 %14:vgpr_32 = COPY %11.sub1 %15:vgpr_32 = V_ADD_U32_e64 killed %13, killed %14, 0, implicit $exec - %5:ccr_sgpr_64 = COPY %4 $vgpr0 = COPY %15 - %16:ccr_sgpr_64 = COPY %5 - S_SETPC_B64_return %16, implicit $vgpr0 + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir b/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir index 7b2ef70da2d6..6e9f4dacc56c 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir @@ -17,11 +17,9 @@ --- name: foo -liveins: - - { reg: '$sgpr30_sgpr31', virtual-reg: '' } body: | bb.0: - S_SETPC_B64_return killed renamable $sgpr30_sgpr31 + SI_RETURN ... --- @@ -31,7 +29,6 @@ liveins: - { reg: '$vgpr0', virtual-reg: '' } - { reg: '$vgpr1', virtual-reg: '' } - { reg: '$vgpr2', virtual-reg: '' } - - { reg: '$sgpr30_sgpr31', virtual-reg: '' } stack: - { id: 0, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4, stack-id: sgpr-spill, callee-saved-register: '', callee-saved-restored: true, @@ -41,7 +38,7 @@ machineFunctionInfo: stackPtrOffsetReg: '$sgpr32' body: | bb.0: - liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + liveins: $vgpr0, $vgpr1, $vgpr2 renamable $vgpr41 = COPY $vgpr2, implicit $exec renamable $vgpr40 = COPY $vgpr1, implicit $exec @@ -52,12 +49,10 @@ body: | ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @foo + 4, target-flags(amdgpu-gotprel32-hi) @foo + 12, implicit-def dead $scc renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 - SI_SPILL_S64_SAVE killed renamable $sgpr30_sgpr31, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @foo, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3 - renamable $sgpr30_sgpr31 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - S_SETPC_B64_return killed renamable $sgpr30_sgpr31 + SI_RETURN ... From 8ad364ad2123af98f24050417710f975b8816a90 Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Wed, 15 Dec 2021 22:10:34 -0500 Subject: [PATCH 054/293] [libc++] [ranges] Remove the static_assert from ranges::begin and ranges::end. As discussed with ldionne. The problem with this static_assert is that it makes ranges::begin a pitfall for anyone ever to use inside a constraint or decltype. Many Ranges things, such as ranges::size, are specified as "Does X if X is well-formed, or else Y if Y is well-formed, or else `ranges::end(t) - ranges::begin(t)` if that is well-formed, or else..." And if there's a static_assert hidden inside `ranges::begin(t)`, then you get a hard error as soon as you ask the question -- even if the answer would have been "no, that's not well-formed"! Constraining on `requires { t + 0; }` or `requires { t + N; }` is verboten because of https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103700 . For ranges::begin, we can just decay to a pointer even in the incomplete-type case. For ranges::end, we can safely constrain on `sizeof(*t)`. Yes, this means that an array of incomplete type has a `ranges::begin` but no `ranges::end`... just like an unbounded array of complete type. This is a valid manifestation of IFNDR. All of the new libcxx/test/std/ cases are mandatory behavior, as far as I'm aware. Tests for the IFNDR cases in ranges::begin and ranges::end remain in `libcxx/test/libcxx/`. The similar tests for ranges::empty and ranges::data were simply wrong, AFAIK. Differential Revision: https://reviews.llvm.org/D115838 --- libcxx/include/__ranges/access.h | 32 ++------ .../range.access/begin.incomplete_type.sh.cpp | 74 +++++++++++++++++++ .../range.access/end.incomplete_type.pass.cpp | 46 ++++++++++++ .../range.access.begin/incomplete.verify.cpp | 36 --------- .../range.access.cbegin/incomplete.verify.cpp | 32 -------- .../range.access.cend/incomplete.verify.cpp | 38 ---------- .../range.access.end/incomplete.verify.cpp | 38 ---------- .../range.prim/data.incomplete.verify.cpp | 56 -------------- .../range.prim/empty.incomplete.verify.cpp | 53 ------------- .../range.access.begin/begin.pass.cpp | 8 +- .../range.access.end/end.pass.cpp | 6 ++ .../range.access/range.prim/empty.pass.cpp | 11 +++ .../range.access/range.prim/size.pass.cpp | 11 +++ 13 files changed, 162 insertions(+), 279 deletions(-) create mode 100644 libcxx/test/libcxx/ranges/range.access/begin.incomplete_type.sh.cpp create mode 100644 libcxx/test/libcxx/ranges/range.access/end.incomplete_type.pass.cpp delete mode 100644 libcxx/test/libcxx/ranges/range.access/range.access.begin/incomplete.verify.cpp delete mode 100644 libcxx/test/libcxx/ranges/range.access/range.access.cbegin/incomplete.verify.cpp delete mode 100644 libcxx/test/libcxx/ranges/range.access/range.access.cend/incomplete.verify.cpp delete mode 100644 libcxx/test/libcxx/ranges/range.access/range.access.end/incomplete.verify.cpp delete mode 100644 libcxx/test/libcxx/ranges/range.access/range.prim/data.incomplete.verify.cpp delete mode 100644 libcxx/test/libcxx/ranges/range.access/range.prim/empty.incomplete.verify.cpp diff --git a/libcxx/include/__ranges/access.h b/libcxx/include/__ranges/access.h index b0b89c0eeea3..d54b3314e435 100644 --- a/libcxx/include/__ranges/access.h +++ b/libcxx/include/__ranges/access.h @@ -27,15 +27,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if !defined(_LIBCPP_HAS_NO_RANGES) -// clang-format off - namespace ranges { template concept __can_borrow = is_lvalue_reference_v<_Tp> || enable_borrowed_range >; - - template - concept __is_complete = requires { sizeof(_Tp); }; } // namespace ranges // [range.access.begin] @@ -61,15 +56,10 @@ namespace ranges::__begin { struct __fn { template - requires is_array_v> - [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp& __t) const noexcept { - constexpr bool __complete = __is_complete >; - if constexpr (__complete) { // used to disable cryptic diagnostic - return __t + 0; - } - else { - static_assert(__complete, "`std::ranges::begin` is SFINAE-unfriendly on arrays of an incomplete type."); - } + requires is_array_v> + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp& __t) const noexcept + { + return __t; } template @@ -127,14 +117,10 @@ namespace ranges::__end { class __fn { public: template - [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp (&__t)[_Np]) const noexcept { - constexpr bool __complete = __is_complete >; - if constexpr (__complete) { // used to disable cryptic diagnostic - return __t + _Np; - } - else { - static_assert(__complete, "`std::ranges::end` is SFINAE-unfriendly on arrays of an incomplete type."); - } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp (&__t)[_Np]) const noexcept + requires (sizeof(*__t) != 0) // Disallow incomplete element types. + { + return __t + _Np; } template @@ -209,8 +195,6 @@ namespace ranges::inline __cpo { inline constexpr auto cend = __cend::__fn{}; } // namespace ranges::__cpo -// clang-format off - #endif // !defined(_LIBCPP_HAS_NO_RANGES) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/test/libcxx/ranges/range.access/begin.incomplete_type.sh.cpp b/libcxx/test/libcxx/ranges/range.access/begin.incomplete_type.sh.cpp new file mode 100644 index 000000000000..0bf359900145 --- /dev/null +++ b/libcxx/test/libcxx/ranges/range.access/begin.incomplete_type.sh.cpp @@ -0,0 +1,74 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// RUN: %{cxx} %{flags} %{compile_flags} -c %s -o %t.tu1.o -DTU1 +// RUN: %{cxx} %{flags} %{compile_flags} -c %s -o %t.tu2.o -DTU2 +// RUN: %{cxx} %t.tu1.o %t.tu2.o %{flags} %{link_flags} -o %t.exe +// RUN: %{exec} %t.exe + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-no-concepts +// UNSUPPORTED: libcpp-has-no-incomplete-ranges + +// Test the libc++-specific behavior that we handle the IFNDR case for ranges::begin +// by returning the beginning of the array-of-incomplete-type. +// Use two translation units so that `Incomplete` really is never completed +// at any point within TU2, but the array `bounded` is still given a definition +// (in TU1) to avoid an "undefined reference" error from the linker. +// All of the actually interesting stuff takes place within TU2. + +#include +#include + +#include "test_macros.h" + +#if defined(TU1) + +struct Incomplete {}; +Incomplete bounded[10]; +Incomplete unbounded[10]; + +#else // defined(TU1) + +struct Incomplete; + +constexpr bool test() +{ + { + extern Incomplete bounded[10]; + assert(std::ranges::begin(bounded) == bounded); + assert(std::ranges::cbegin(bounded) == bounded); + assert(std::ranges::begin(std::as_const(bounded)) == bounded); + assert(std::ranges::cbegin(std::as_const(bounded)) == bounded); + ASSERT_SAME_TYPE(decltype(std::ranges::begin(bounded)), Incomplete*); + ASSERT_SAME_TYPE(decltype(std::ranges::cbegin(bounded)), const Incomplete*); + ASSERT_SAME_TYPE(decltype(std::ranges::begin(std::as_const(bounded))), const Incomplete*); + ASSERT_SAME_TYPE(decltype(std::ranges::cbegin(std::as_const(bounded))), const Incomplete*); + } + { + extern Incomplete unbounded[]; + assert(std::ranges::begin(unbounded) == unbounded); + assert(std::ranges::cbegin(unbounded) == unbounded); + assert(std::ranges::begin(std::as_const(unbounded)) == unbounded); + assert(std::ranges::cbegin(std::as_const(unbounded)) == unbounded); + ASSERT_SAME_TYPE(decltype(std::ranges::begin(unbounded)), Incomplete*); + ASSERT_SAME_TYPE(decltype(std::ranges::cbegin(unbounded)), const Incomplete*); + ASSERT_SAME_TYPE(decltype(std::ranges::begin(std::as_const(unbounded))), const Incomplete*); + ASSERT_SAME_TYPE(decltype(std::ranges::cbegin(std::as_const(unbounded))), const Incomplete*); + } + + return true; +} + +int main(int, char**) +{ + test(); + static_assert(test()); +} + +#endif // defined(TU1) diff --git a/libcxx/test/libcxx/ranges/range.access/end.incomplete_type.pass.cpp b/libcxx/test/libcxx/ranges/range.access/end.incomplete_type.pass.cpp new file mode 100644 index 000000000000..4b83ca8945a3 --- /dev/null +++ b/libcxx/test/libcxx/ranges/range.access/end.incomplete_type.pass.cpp @@ -0,0 +1,46 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-no-concepts +// UNSUPPORTED: libcpp-has-no-incomplete-ranges + +// Test the libc++-specific behavior that we handle the IFNDR case for ranges::end +// by being SFINAE-friendly. + +#include +#include +#include + +struct Incomplete; + +constexpr bool test() +{ + { + extern Incomplete bounded[10]; + assert((!std::is_invocable_v)); + assert((!std::is_invocable_v)); + assert((!std::is_invocable_v)); + assert((!std::is_invocable_v)); + } + { + extern Incomplete unbounded[]; + assert((!std::is_invocable_v)); + assert((!std::is_invocable_v)); + assert((!std::is_invocable_v)); + assert((!std::is_invocable_v)); + } + + return true; +} + +int main(int, char**) +{ + test(); + static_assert(test()); +} diff --git a/libcxx/test/libcxx/ranges/range.access/range.access.begin/incomplete.verify.cpp b/libcxx/test/libcxx/ranges/range.access/range.access.begin/incomplete.verify.cpp deleted file mode 100644 index 41322529792e..000000000000 --- a/libcxx/test/libcxx/ranges/range.access/range.access.begin/incomplete.verify.cpp +++ /dev/null @@ -1,36 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-no-concepts -// UNSUPPORTED: libcpp-has-no-incomplete-ranges - -// Test the libc++ specific behavior that we provide a better diagnostic when calling -// std::ranges::begin on an array of incomplete type. - -#include - -#include - -using begin_t = decltype(std::ranges::begin); - -template void f() requires std::invocable { } -template void f() { } - -void test() { - struct incomplete; - f(); - // expected-error@*:* {{"`std::ranges::begin` is SFINAE-unfriendly on arrays of an incomplete type."}} - f(); - // expected-error@*:* {{"`std::ranges::begin` is SFINAE-unfriendly on arrays of an incomplete type."}} - f(); - // expected-error@*:* {{"`std::ranges::begin` is SFINAE-unfriendly on arrays of an incomplete type."}} - - // This is okay because calling `std::ranges::begin` on any rvalue is ill-formed. - f(); -} diff --git a/libcxx/test/libcxx/ranges/range.access/range.access.cbegin/incomplete.verify.cpp b/libcxx/test/libcxx/ranges/range.access/range.access.cbegin/incomplete.verify.cpp deleted file mode 100644 index bd0ff5e85dd2..000000000000 --- a/libcxx/test/libcxx/ranges/range.access/range.access.cbegin/incomplete.verify.cpp +++ /dev/null @@ -1,32 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-no-concepts -// UNSUPPORTED: libcpp-has-no-incomplete-ranges - -// Test the libc++ specific behavior that we provide a better diagnostic when calling -// std::ranges::cbegin on an array of incomplete type. - -#include - -#include - -using cbegin_t = decltype(std::ranges::cbegin); - -template void f() requires std::invocable { } -template void f() { } - -void test() { - struct incomplete; - f(); - // expected-error@*:* {{"`std::ranges::begin` is SFINAE-unfriendly on arrays of an incomplete type."}} - - // This is okay because calling `std::ranges::end` on any rvalue is ill-formed. - f(); -} diff --git a/libcxx/test/libcxx/ranges/range.access/range.access.cend/incomplete.verify.cpp b/libcxx/test/libcxx/ranges/range.access/range.access.cend/incomplete.verify.cpp deleted file mode 100644 index b4d6729c9547..000000000000 --- a/libcxx/test/libcxx/ranges/range.access/range.access.cend/incomplete.verify.cpp +++ /dev/null @@ -1,38 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-no-concepts -// UNSUPPORTED: libcpp-has-no-incomplete-ranges - -// Test the libc++ specific behavior that we provide a better diagnostic when calling -// std::ranges::cend on an array of incomplete type. - -#include - -#include - -using cend_t = decltype(std::ranges::cend); - -template void f() requires std::invocable { } -template void f() { } - -void test() { - struct incomplete; - f(); - // expected-error@*:* {{"`std::ranges::begin` is SFINAE-unfriendly on arrays of an incomplete type."}} - // expected-error@*:* {{"`std::ranges::end` is SFINAE-unfriendly on arrays of an incomplete type."}} - f(); - // expected-error@*:* {{"`std::ranges::begin` is SFINAE-unfriendly on arrays of an incomplete type."}} - // expected-error@*:* {{"`std::ranges::end` is SFINAE-unfriendly on arrays of an incomplete type."}} - f(); - // expected-error@*:* {{"`std::ranges::begin` is SFINAE-unfriendly on arrays of an incomplete type."}} - - // This is okay because calling `std::ranges::end` on any rvalue is ill-formed. - f(); -} diff --git a/libcxx/test/libcxx/ranges/range.access/range.access.end/incomplete.verify.cpp b/libcxx/test/libcxx/ranges/range.access/range.access.end/incomplete.verify.cpp deleted file mode 100644 index ecfe1c4e5a5a..000000000000 --- a/libcxx/test/libcxx/ranges/range.access/range.access.end/incomplete.verify.cpp +++ /dev/null @@ -1,38 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-no-concepts -// UNSUPPORTED: libcpp-has-no-incomplete-ranges - -// Test the libc++ specific behavior that we provide a better diagnostic when calling -// std::ranges::end on an array of incomplete type. - -#include - -#include - -using end_t = decltype(std::ranges::end); - -template void f() requires std::invocable { } -template void f() { } - -void test() { - struct incomplete; - f(); - // expected-error@*:* {{"`std::ranges::begin` is SFINAE-unfriendly on arrays of an incomplete type."}} - // expected-error@*:* {{"`std::ranges::end` is SFINAE-unfriendly on arrays of an incomplete type."}} - f(); - // expected-error@*:* {{"`std::ranges::begin` is SFINAE-unfriendly on arrays of an incomplete type."}} - // expected-error@*:* {{"`std::ranges::end` is SFINAE-unfriendly on arrays of an incomplete type."}} - f(); - // expected-error@*:* {{"`std::ranges::begin` is SFINAE-unfriendly on arrays of an incomplete type."}} - - // This is okay because calling `std::ranges::end` on any rvalue is ill-formed. - f(); -} diff --git a/libcxx/test/libcxx/ranges/range.access/range.prim/data.incomplete.verify.cpp b/libcxx/test/libcxx/ranges/range.access/range.prim/data.incomplete.verify.cpp deleted file mode 100644 index f67328084c6b..000000000000 --- a/libcxx/test/libcxx/ranges/range.access/range.prim/data.incomplete.verify.cpp +++ /dev/null @@ -1,56 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-no-concepts -// UNSUPPORTED: libcpp-has-no-incomplete-ranges - -// Test the libc++ specific behavior that we provide a better diagnostic when calling -// std::ranges::data on an array of incomplete type. - -#include - -struct Incomplete; - -void f(Incomplete arr[]) { - // expected-error@*:* {{is SFINAE-unfriendly on arrays of an incomplete type.}} - // expected-error@*:* {{no matching function for call}} - std::ranges::data(arr); -} - -void f(Incomplete(&arr)[]) { - // expected-error@*:* {{is SFINAE-unfriendly on arrays of an incomplete type.}} - // expected-error@*:* {{no matching function for call}} - std::ranges::data(arr); -} - -void f(Incomplete(&&arr)[]) { - // expected-error@*:* {{is SFINAE-unfriendly on arrays of an incomplete type.}} - // expected-error@*:* {{no matching function for call}} - std::ranges::data(arr); -} - -void f2(Incomplete arr[2]) { - // expected-error@*:* {{no matching function for call}} - std::ranges::data(arr); -} - -void f(Incomplete(&arr)[2]) { - // expected-error@*:* {{no matching function for call}} - std::ranges::data(arr); -} - -void f(Incomplete(&&arr)[2]) { - // expected-error@*:* {{no matching function for call}} - std::ranges::data(arr); -} - -void f(Incomplete(&arr)[2][2]) { - // expected-error@*:* {{no matching function for call}} - std::ranges::data(arr); -} diff --git a/libcxx/test/libcxx/ranges/range.access/range.prim/empty.incomplete.verify.cpp b/libcxx/test/libcxx/ranges/range.access/range.prim/empty.incomplete.verify.cpp deleted file mode 100644 index 3b5f79a5bdf5..000000000000 --- a/libcxx/test/libcxx/ranges/range.access/range.prim/empty.incomplete.verify.cpp +++ /dev/null @@ -1,53 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 -// UNSUPPORTED: libcpp-no-concepts -// UNSUPPORTED: libcpp-has-no-incomplete-ranges - -// Test the libc++ specific behavior that we provide a better diagnostic when calling -// std::ranges::empty on an array of incomplete type. - -#include - -struct Incomplete; - -void f(Incomplete arr[]) { - // expected-error@*:* {{is SFINAE-unfriendly on arrays of an incomplete type.}} - // expected-error@*:* {{call to deleted function call operator in type}} - // expected-error@*:* {{attempt to use a deleted function}} - std::ranges::begin(arr); -} - -void f(Incomplete(&arr)[]) { - // expected-error@*:* {{is SFINAE-unfriendly on arrays of an incomplete type.}} - std::ranges::begin(arr); -} - -void f(Incomplete(&&arr)[]) { - // expected-error@*:* {{is SFINAE-unfriendly on arrays of an incomplete type.}} - std::ranges::begin(arr); -} - -void f2(Incomplete arr[2]) { - // expected-error@*:* {{call to deleted function call operator in type}} - // expected-error@*:* {{attempt to use a deleted function}} - std::ranges::begin(arr); -} - -void f(Incomplete(&arr)[2]) { - std::ranges::begin(arr); -} - -void f(Incomplete(&&arr)[2]) { - std::ranges::begin(arr); -} - -void f(Incomplete(&arr)[2][2]) { - std::ranges::begin(arr); -} diff --git a/libcxx/test/std/ranges/range.access/range.access.begin/begin.pass.cpp b/libcxx/test/std/ranges/range.access/range.access.begin/begin.pass.cpp index 3b712a1cc137..05e6211f7a54 100644 --- a/libcxx/test/std/ranges/range.access/range.access.begin/begin.pass.cpp +++ b/libcxx/test/std/ranges/range.access/range.access.begin/begin.pass.cpp @@ -23,13 +23,17 @@ using RangeCBeginT = decltype(std::ranges::cbegin)&; static int globalBuff[8]; -struct Incomplete; - static_assert(!std::is_invocable_v); static_assert( std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert( std::is_invocable_v); +struct Incomplete; +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); + struct BeginMember { int x; constexpr const int *begin() const { return &x; } diff --git a/libcxx/test/std/ranges/range.access/range.access.end/end.pass.cpp b/libcxx/test/std/ranges/range.access/range.access.end/end.pass.cpp index dd5da2ebf594..1e4fc0213fc4 100644 --- a/libcxx/test/std/ranges/range.access/range.access.end/end.pass.cpp +++ b/libcxx/test/std/ranges/range.access/range.access.end/end.pass.cpp @@ -28,6 +28,12 @@ static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert( std::is_invocable_v); +struct Incomplete; +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); + struct EndMember { int x; constexpr const int *begin() const { return nullptr; } diff --git a/libcxx/test/std/ranges/range.access/range.prim/empty.pass.cpp b/libcxx/test/std/ranges/range.access/range.prim/empty.pass.cpp index 5a06fa88348a..9be523eb42b7 100644 --- a/libcxx/test/std/ranges/range.access/range.prim/empty.pass.cpp +++ b/libcxx/test/std/ranges/range.access/range.prim/empty.pass.cpp @@ -30,6 +30,17 @@ static_assert( std::is_invocable_v); static_assert( std::is_invocable_v); static_assert( std::is_invocable_v); +struct Incomplete; +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); + +extern Incomplete array_of_incomplete[42]; +static_assert(!std::ranges::empty(array_of_incomplete)); +static_assert(!std::ranges::empty(std::move(array_of_incomplete))); +static_assert(!std::ranges::empty(std::as_const(array_of_incomplete))); +static_assert(!std::ranges::empty(static_cast(array_of_incomplete))); + struct NonConstSizeAndEmpty { int size(); bool empty(); diff --git a/libcxx/test/std/ranges/range.access/range.prim/size.pass.cpp b/libcxx/test/std/ranges/range.access/range.prim/size.pass.cpp index 17449744cd51..92023cc6a287 100644 --- a/libcxx/test/std/ranges/range.access/range.prim/size.pass.cpp +++ b/libcxx/test/std/ranges/range.access/range.prim/size.pass.cpp @@ -25,6 +25,17 @@ static_assert( std::is_invocable_v); static_assert( std::is_invocable_v); static_assert( std::is_invocable_v); +struct Incomplete; +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); + +extern Incomplete array_of_incomplete[42]; +static_assert(std::ranges::size(array_of_incomplete) == 42); +static_assert(std::ranges::size(std::move(array_of_incomplete)) == 42); +static_assert(std::ranges::size(std::as_const(array_of_incomplete)) == 42); +static_assert(std::ranges::size(static_cast(array_of_incomplete)) == 42); + static_assert(std::semiregular>); struct SizeMember { From fd8fc5e8d93849f4a2c8dea087690b1a0e6ea7df Mon Sep 17 00:00:00 2001 From: Ivan Gerasimov Date: Wed, 22 Dec 2021 16:33:09 +0100 Subject: [PATCH 055/293] [clang-tidy] abseil-string-find-startswith: detect `s.rfind(z, 0) == 0` Suggest converting `std::string::rfind()` calls to `absl::StartsWith()` where possible. --- .../abseil/StringFindStartswithCheck.cpp | 35 +++++++++++++--- .../checks/abseil-string-find-startswith.rst | 8 ++-- .../abseil-string-find-startswith.cpp | 40 +++++++++++++++++++ 3 files changed, 75 insertions(+), 8 deletions(-) diff --git a/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp b/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp index 5741c0d505d5..e834c8a12459 100644 --- a/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp @@ -40,7 +40,7 @@ void StringFindStartswithCheck::registerMatchers(MatchFinder *Finder) { auto StringFind = cxxMemberCallExpr( // .find()-call on a string... - callee(cxxMethodDecl(hasName("find"))), + callee(cxxMethodDecl(hasName("find")).bind("findfun")), on(hasType(StringType)), // ... with some search expression ... hasArgument(0, expr().bind("needle")), @@ -55,6 +55,25 @@ void StringFindStartswithCheck::registerMatchers(MatchFinder *Finder) { ignoringParenImpCasts(StringFind.bind("findexpr")))) .bind("expr"), this); + + auto StringRFind = cxxMemberCallExpr( + // .rfind()-call on a string... + callee(cxxMethodDecl(hasName("rfind")).bind("findfun")), + on(hasType(StringType)), + // ... with some search expression ... + hasArgument(0, expr().bind("needle")), + // ... and "0" as second argument. + hasArgument(1, ZeroLiteral)); + + Finder->addMatcher( + // Match [=!]= with either a zero or npos on one side and a string.rfind + // on the other. + binaryOperator( + hasAnyOperatorName("==", "!="), + hasOperands(ignoringParenImpCasts(ZeroLiteral), + ignoringParenImpCasts(StringRFind.bind("findexpr")))) + .bind("expr"), + this); } void StringFindStartswithCheck::check(const MatchFinder::MatchResult &Result) { @@ -69,6 +88,11 @@ void StringFindStartswithCheck::check(const MatchFinder::MatchResult &Result) { const Expr *Haystack = Result.Nodes.getNodeAs("findexpr") ->getImplicitObjectArgument(); assert(Haystack != nullptr); + const CXXMethodDecl *FindFun = + Result.Nodes.getNodeAs("findfun"); + assert(FindFun != nullptr); + + bool Rev = FindFun->getName().contains("rfind"); if (ComparisonExpr->getBeginLoc().isMacroID()) return; @@ -86,10 +110,11 @@ void StringFindStartswithCheck::check(const MatchFinder::MatchResult &Result) { bool Neg = ComparisonExpr->getOpcode() == BO_NE; // Create the warning message and a FixIt hint replacing the original expr. - auto Diagnostic = diag(ComparisonExpr->getBeginLoc(), - "use %select{absl::StartsWith|!absl::StartsWith}0 " - "instead of find() %select{==|!=}0 0") - << Neg; + auto Diagnostic = + diag(ComparisonExpr->getBeginLoc(), + "use %select{absl::StartsWith|!absl::StartsWith}0 " + "instead of %select{find()|rfind()}1 %select{==|!=}0 0") + << Neg << Rev; Diagnostic << FixItHint::CreateReplacement( ComparisonExpr->getSourceRange(), diff --git a/clang-tools-extra/docs/clang-tidy/checks/abseil-string-find-startswith.rst b/clang-tools-extra/docs/clang-tidy/checks/abseil-string-find-startswith.rst index 43f35ac66c8a..8224c37a087b 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/abseil-string-find-startswith.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/abseil-string-find-startswith.rst @@ -3,14 +3,15 @@ abseil-string-find-startswith ============================= -Checks whether a ``std::string::find()`` result is compared with 0, and -suggests replacing with ``absl::StartsWith()``. This is both a readability and -performance issue. +Checks whether a ``std::string::find()`` or ``std::string::rfind()`` result is +compared with 0, and suggests replacing with ``absl::StartsWith()``. This is +both a readability and performance issue. .. code-block:: c++ string s = "..."; if (s.find("Hello World") == 0) { /* do something */ } + if (s.rfind("Hello World", 0) == 0) { /* do something */ } becomes @@ -19,6 +20,7 @@ becomes string s = "..."; if (absl::StartsWith(s, "Hello World")) { /* do something */ } + if (absl::StartsWith(s, "Hello World")) { /* do something */ } Options diff --git a/clang-tools-extra/test/clang-tidy/checkers/abseil-string-find-startswith.cpp b/clang-tools-extra/test/clang-tidy/checkers/abseil-string-find-startswith.cpp index 89365bfa1f81..7a1fd8233d83 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/abseil-string-find-startswith.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/abseil-string-find-startswith.cpp @@ -1,6 +1,8 @@ // RUN: %check_clang_tidy %s abseil-string-find-startswith %t -- \ // RUN: -config="{CheckOptions: [{key: 'abseil-string-find-startswith.StringLikeClasses', value: '::std::basic_string;::basic_string'}]}" +using size_t = decltype(sizeof(int)); + namespace std { template class allocator {}; template class char_traits {}; @@ -13,12 +15,17 @@ struct basic_string { ~basic_string(); int find(basic_string s, int pos = 0); int find(const char *s, int pos = 0); + int rfind(basic_string s, int pos = npos); + int rfind(const char *s, int pos = npos); + static constexpr size_t npos = -1; }; typedef basic_string string; typedef basic_string wstring; struct cxx_string { int find(const char *s, int pos = 0); + int rfind(const char *s, int pos = npos); + static constexpr size_t npos = -1; }; } // namespace std @@ -61,9 +68,42 @@ void tests(std::string s, global_string s2) { // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use absl::StartsWith // CHECK-FIXES: {{^[[:space:]]*}}absl::StartsWith(s2, "a");{{$}} + s.rfind("a", 0) == 0; + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use absl::StartsWith instead of rfind() == 0 [abseil-string-find-startswith] + // CHECK-FIXES: {{^[[:space:]]*}}absl::StartsWith(s, "a");{{$}} + + s.rfind(s, 0) == 0; + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use absl::StartsWith + // CHECK-FIXES: {{^[[:space:]]*}}absl::StartsWith(s, s);{{$}} + + s.rfind("aaa", 0) != 0; + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use !absl::StartsWith + // CHECK-FIXES: {{^[[:space:]]*}}!absl::StartsWith(s, "aaa");{{$}} + + s.rfind(foo(foo(bar())), 0) != 0; + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use !absl::StartsWith + // CHECK-FIXES: {{^[[:space:]]*}}!absl::StartsWith(s, foo(foo(bar())));{{$}} + + if (s.rfind("....", 0) == 0) { /* do something */ } + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: use absl::StartsWith + // CHECK-FIXES: {{^[[:space:]]*}}if (absl::StartsWith(s, "....")) { /* do something */ }{{$}} + + 0 != s.rfind("a", 0); + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use !absl::StartsWith + // CHECK-FIXES: {{^[[:space:]]*}}!absl::StartsWith(s, "a");{{$}} + + s2.rfind("a", 0) == 0; + // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use absl::StartsWith + // CHECK-FIXES: {{^[[:space:]]*}}absl::StartsWith(s2, "a");{{$}} + // expressions that don't trigger the check are here. A_MACRO(s.find("a"), 0); + A_MACRO(s.rfind("a", 0), 0); s.find("a", 1) == 0; s.find("a", 1) == 1; s.find("a") == 1; + s.rfind("a", 1) == 0; + s.rfind("a", 1) == 1; + s.rfind("a") == 0; + s.rfind("a") == 1; } From da007a33c95a37c1331a14bc4f377982598fe68a Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 22 Dec 2021 16:55:56 +0100 Subject: [PATCH 056/293] [JSONNodeDumper] Regenerate test checks (NFC) gen_ast_dump_json_test.py adds these lines of whitespace. Precommit it to avoid spurious diffs in future changes. --- clang/test/AST/ast-dump-decl-json.c | 31 +++++++++++++++++++ clang/test/AST/ast-dump-decl-json.m | 15 +++++++++ clang/test/AST/ast-dump-expr-json.c | 7 +++++ clang/test/AST/ast-dump-expr-json.cpp | 10 ++++++ clang/test/AST/ast-dump-expr-json.m | 9 ++++++ clang/test/AST/ast-dump-file-line-json.c | 5 +-- clang/test/AST/ast-dump-funcs-json.cpp | 24 ++++++++++++++ clang/test/AST/ast-dump-stmt-json.c | 13 ++++++++ clang/test/AST/ast-dump-stmt-json.cpp | 25 +++++++++++++++ clang/test/AST/ast-dump-stmt-json.m | 3 ++ .../test/AST/ast-dump-template-decls-json.cpp | 1 + 11 files changed, 141 insertions(+), 2 deletions(-) diff --git a/clang/test/AST/ast-dump-decl-json.c b/clang/test/AST/ast-dump-decl-json.c index 9264590d67b8..7bc37a76b800 100644 --- a/clang/test/AST/ast-dump-decl-json.c +++ b/clang/test/AST/ast-dump-decl-json.c @@ -91,6 +91,7 @@ void testParmVarDecl(int TestParmVarDecl); // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "TypedefDecl", // CHECK-NEXT: "loc": { @@ -137,6 +138,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "VarDecl", // CHECK-NEXT: "loc": { @@ -165,6 +167,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "VarDecl", // CHECK-NEXT: "loc": { @@ -195,6 +198,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "RecordDecl", // CHECK-NEXT: "loc": { @@ -250,6 +254,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "RecordDecl", // CHECK-NEXT: "loc": { @@ -360,6 +365,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "LabelDecl", // CHECK-NEXT: "loc": { @@ -385,6 +391,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: "name": "TestLabelDecl" // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "TypedefDecl", // CHECK-NEXT: "loc": { @@ -421,6 +428,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "EnumDecl", // CHECK-NEXT: "loc": { @@ -474,6 +482,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "RecordDecl", // CHECK-NEXT: "loc": { @@ -583,6 +592,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "EnumDecl", // CHECK-NEXT: "loc": { @@ -607,6 +617,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: "name": "TestEnumDeclForward" // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "RecordDecl", // CHECK-NEXT: "loc": { @@ -662,6 +673,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "RecordDecl", // CHECK-NEXT: "loc": { @@ -689,6 +701,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: "completeDefinition": true // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "RecordDecl", // CHECK-NEXT: "loc": { @@ -771,6 +784,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "RecordDecl", // CHECK-NEXT: "loc": { @@ -851,6 +865,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "RecordDecl", // CHECK-NEXT: "loc": { @@ -876,6 +891,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: "tagUsed": "struct" // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "EnumConstantDecl", // CHECK-NEXT: "loc": { @@ -903,6 +919,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "EnumConstantDecl", // CHECK-NEXT: "loc": { @@ -976,6 +993,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "RecordDecl", // CHECK-NEXT: "loc": { @@ -1108,6 +1126,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -1283,6 +1302,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -1454,6 +1474,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -1510,6 +1531,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -1538,6 +1560,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -1567,6 +1590,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: "storageClass": "extern" // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -1596,6 +1620,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: "inline": true // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FieldDecl", // CHECK-NEXT: "loc": { @@ -1623,6 +1648,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FieldDecl", // CHECK-NEXT: "loc": { @@ -1697,6 +1723,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "VarDecl", // CHECK-NEXT: "loc": { @@ -1725,6 +1752,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "VarDecl", // CHECK-NEXT: "loc": { @@ -1754,6 +1782,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: "storageClass": "extern" // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "VarDecl", // CHECK-NEXT: "loc": { @@ -1783,6 +1812,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: "tls": "static" // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "VarDecl", // CHECK-NEXT: "loc": { @@ -1835,6 +1865,7 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ParmVarDecl", // CHECK-NEXT: "loc": { diff --git a/clang/test/AST/ast-dump-decl-json.m b/clang/test/AST/ast-dump-decl-json.m index b970d73e906b..9778ff5bb6ac 100644 --- a/clang/test/AST/ast-dump-decl-json.m +++ b/clang/test/AST/ast-dump-decl-json.m @@ -85,6 +85,7 @@ void f() { // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCInterfaceDecl", // CHECK-NEXT: "loc": { @@ -121,6 +122,7 @@ void f() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCImplementationDecl", // CHECK-NEXT: "loc": { @@ -291,6 +293,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCMethodDecl", // CHECK-NEXT: "loc": { @@ -349,6 +352,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCMethodDecl", // CHECK-NEXT: "loc": { @@ -500,6 +504,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCProtocolDecl", // CHECK-NEXT: "loc": { @@ -555,6 +560,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCInterfaceDecl", // CHECK-NEXT: "loc": { @@ -628,6 +634,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCImplementationDecl", // CHECK-NEXT: "loc": { @@ -773,6 +780,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCCategoryDecl", // CHECK-NEXT: "loc": { @@ -845,6 +853,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCInterfaceDecl", // CHECK-NEXT: "loc": { @@ -916,6 +925,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCCategoryImplDecl", // CHECK-NEXT: "loc": { @@ -1034,6 +1044,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCCompatibleAliasDecl", // CHECK-NEXT: "loc": { @@ -1063,6 +1074,7 @@ void f() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCInterfaceDecl", // CHECK-NEXT: "loc": { @@ -1341,6 +1353,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "ObjCImplementationDecl", // CHECK-NEXT: "loc": { @@ -1676,6 +1689,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -1962,6 +1976,7 @@ void f() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "VarDecl", // CHECK-NEXT: "loc": { diff --git a/clang/test/AST/ast-dump-expr-json.c b/clang/test/AST/ast-dump-expr-json.c index c322625cb2b2..a018066ac514 100644 --- a/clang/test/AST/ast-dump-expr-json.c +++ b/clang/test/AST/ast-dump-expr-json.c @@ -109,6 +109,7 @@ void PrimaryExpressions(int a) { // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py // using --filters=FunctionDecl + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -270,6 +271,7 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -529,6 +531,7 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -963,6 +966,7 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -3335,6 +3339,7 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -4003,6 +4008,7 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -4900,6 +4906,7 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { diff --git a/clang/test/AST/ast-dump-expr-json.cpp b/clang/test/AST/ast-dump-expr-json.cpp index 3ef2f78551b6..110dfd70ebad 100644 --- a/clang/test/AST/ast-dump-expr-json.cpp +++ b/clang/test/AST/ast-dump-expr-json.cpp @@ -155,6 +155,7 @@ void TestNonADLCall3() { // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -271,6 +272,7 @@ void TestNonADLCall3() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -1018,6 +1020,7 @@ void TestNonADLCall3() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -1405,6 +1408,7 @@ void TestNonADLCall3() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionTemplateDecl", // CHECK-NEXT: "loc": { @@ -2483,6 +2487,7 @@ void TestNonADLCall3() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -3560,6 +3565,7 @@ void TestNonADLCall3() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionTemplateDecl", // CHECK-NEXT: "loc": { @@ -7759,6 +7765,7 @@ void TestNonADLCall3() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -8197,6 +8204,7 @@ void TestNonADLCall3() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -8478,6 +8486,7 @@ void TestNonADLCall3() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -8961,6 +8970,7 @@ void TestNonADLCall3() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { diff --git a/clang/test/AST/ast-dump-expr-json.m b/clang/test/AST/ast-dump-expr-json.m index 062f6c06d1aa..aff2d279916b 100644 --- a/clang/test/AST/ast-dump-expr-json.m +++ b/clang/test/AST/ast-dump-expr-json.m @@ -97,6 +97,7 @@ void TestObjCBoolLiteral() { // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -243,6 +244,7 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -423,6 +425,7 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -585,6 +588,7 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -709,6 +713,7 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -896,6 +901,7 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -1919,6 +1925,7 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -4767,6 +4774,7 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -4972,6 +4980,7 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { diff --git a/clang/test/AST/ast-dump-file-line-json.c b/clang/test/AST/ast-dump-file-line-json.c index d4899d8fbbe9..b42107d415b7 100644 --- a/clang/test/AST/ast-dump-file-line-json.c +++ b/clang/test/AST/ast-dump-file-line-json.c @@ -12,6 +12,7 @@ int d; int e; // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "TranslationUnitDecl", // CHECK-NEXT: "loc": {}, @@ -171,7 +172,7 @@ int e; // CHECK-NEXT: "offset": 105, // CHECK-NEXT: "file": "{{.*}}", // CHECK-NEXT: "line": 4, -// CHECK-NEXT: "presumedFile": "test.c", +// CHECK-NEXT: "presumedFile": "{{.*}}", // CHECK-NEXT: "col": 5, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: }, @@ -256,7 +257,7 @@ int e; // CHECK-NEXT: "loc": { // CHECK-NEXT: "offset": 163, // CHECK-NEXT: "line": 11, -// CHECK-NEXT: "presumedFile": "test.c", +// CHECK-NEXT: "presumedFile": "{{.*}}", // CHECK-NEXT: "col": 5, // CHECK-NEXT: "tokLen": 1 // CHECK-NEXT: }, diff --git a/clang/test/AST/ast-dump-funcs-json.cpp b/clang/test/AST/ast-dump-funcs-json.cpp index aeaf3540a987..ecc2701c400d 100644 --- a/clang/test/AST/ast-dump-funcs-json.cpp +++ b/clang/test/AST/ast-dump-funcs-json.cpp @@ -43,6 +43,7 @@ int main() { // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { @@ -71,6 +72,7 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { @@ -99,6 +101,7 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { @@ -127,6 +130,7 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { @@ -155,6 +159,7 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { @@ -183,6 +188,7 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { @@ -286,6 +292,7 @@ int main() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { @@ -316,6 +323,7 @@ int main() { // CHECK-NEXT: "pure": true // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { @@ -434,6 +442,7 @@ int main() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { @@ -462,6 +471,7 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CXXMethodDecl", // CHECK-NEXT: "loc": { @@ -510,6 +520,7 @@ int main() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -539,6 +550,7 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -567,6 +579,7 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -649,6 +662,7 @@ int main() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -755,6 +769,7 @@ int main() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -784,6 +799,7 @@ int main() { // CHECK-NEXT: "constexpr": true // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -813,6 +829,7 @@ int main() { // CHECK-NEXT: "storageClass": "static" // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -842,6 +859,7 @@ int main() { // CHECK-NEXT: "storageClass": "extern" // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -871,6 +889,7 @@ int main() { // CHECK-NEXT: "inline": true // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -899,6 +918,7 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -927,6 +947,7 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -955,6 +976,7 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionTemplateDecl", // CHECK-NEXT: "loc": { @@ -1062,6 +1084,7 @@ int main() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -1132,6 +1155,7 @@ int main() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { diff --git a/clang/test/AST/ast-dump-stmt-json.c b/clang/test/AST/ast-dump-stmt-json.c index fe3ec558112a..2990cfa64079 100644 --- a/clang/test/AST/ast-dump-stmt-json.c +++ b/clang/test/AST/ast-dump-stmt-json.c @@ -150,6 +150,7 @@ void TestLineNumbers(void) { // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py // using --filters=VarDecl,CompoundStmt + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "VarDecl", // CHECK-NEXT: "loc": { @@ -202,6 +203,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "VarDecl", // CHECK-NEXT: "loc": { @@ -319,6 +321,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CompoundStmt", // CHECK-NEXT: "range": { @@ -479,6 +482,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "VarDecl", // CHECK-NEXT: "loc": { @@ -659,6 +663,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CompoundStmt", // CHECK-NEXT: "range": { @@ -1302,6 +1307,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CompoundStmt", // CHECK-NEXT: "range": { @@ -2059,6 +2065,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CompoundStmt", // CHECK-NEXT: "range": { @@ -2342,6 +2349,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CompoundStmt", // CHECK-NEXT: "range": { @@ -2839,6 +2847,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CompoundStmt", // CHECK-NEXT: "range": { @@ -3622,6 +3631,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CompoundStmt", // CHECK-NEXT: "range": { @@ -4830,6 +4840,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CompoundStmt", // CHECK-NEXT: "range": { @@ -5075,6 +5086,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CompoundStmt", // CHECK-NEXT: "range": { @@ -5256,6 +5268,7 @@ void TestLineNumbers(void) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "CompoundStmt", // CHECK-NEXT: "range": { diff --git a/clang/test/AST/ast-dump-stmt-json.cpp b/clang/test/AST/ast-dump-stmt-json.cpp index b8988cc16d43..2283f38eebd5 100644 --- a/clang/test/AST/ast-dump-stmt-json.cpp +++ b/clang/test/AST/ast-dump-stmt-json.cpp @@ -125,6 +125,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py // using --filters=FunctionDecl,FunctionTemplateDecl,UsingDecl,UsingShadowDecl + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -171,6 +172,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "UsingDecl", // CHECK-NEXT: "loc": { @@ -194,6 +196,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "name": "n::function" // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "UsingShadowDecl", // CHECK-NEXT: "loc": { @@ -224,6 +227,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "UsingDecl", // CHECK-NEXT: "loc": { @@ -247,6 +251,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "name": "n::Variable" // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "UsingShadowDecl", // CHECK-NEXT: "loc": { @@ -277,6 +282,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -511,6 +517,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -663,6 +670,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -792,6 +800,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -1425,6 +1434,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": {}, @@ -1483,6 +1493,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": {}, @@ -1562,6 +1573,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": {}, @@ -1620,6 +1632,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": {}, @@ -1699,6 +1712,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": {}, @@ -1739,6 +1753,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": {}, @@ -1791,6 +1806,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": {}, @@ -1831,6 +1847,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": {}, @@ -1883,6 +1900,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionTemplateDecl", // CHECK-NEXT: "loc": { @@ -2106,6 +2124,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionTemplateDecl", // CHECK-NEXT: "loc": { @@ -2613,6 +2632,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionTemplateDecl", // CHECK-NEXT: "loc": { @@ -2880,6 +2900,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -3079,6 +3100,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -3290,6 +3312,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -3974,6 +3997,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -6660,6 +6684,7 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionTemplateDecl", // CHECK-NEXT: "loc": { diff --git a/clang/test/AST/ast-dump-stmt-json.m b/clang/test/AST/ast-dump-stmt-json.m index 44935d832674..333bdd529f97 100644 --- a/clang/test/AST/ast-dump-stmt-json.m +++ b/clang/test/AST/ast-dump-stmt-json.m @@ -21,6 +21,7 @@ void TestObjCAtCatchStmt() { // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -254,6 +255,7 @@ void TestObjCAtCatchStmt() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { @@ -487,6 +489,7 @@ void TestObjCAtCatchStmt() { // CHECK-NEXT: ] // CHECK-NEXT: } + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "FunctionDecl", // CHECK-NEXT: "loc": { diff --git a/clang/test/AST/ast-dump-template-decls-json.cpp b/clang/test/AST/ast-dump-template-decls-json.cpp index 5e7a547e2161..9871f6dd9b6e 100644 --- a/clang/test/AST/ast-dump-template-decls-json.cpp +++ b/clang/test/AST/ast-dump-template-decls-json.cpp @@ -60,6 +60,7 @@ void i(); // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py + // CHECK-NOT: {{^}}Dumping // CHECK: "kind": "TranslationUnitDecl", // CHECK-NEXT: "loc": {}, From 0fe1ccc70f05d1a64ae8fd9a800829072bbe47e7 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 22 Dec 2021 17:11:48 +0100 Subject: [PATCH 057/293] [OpenMP] Regenerate test checks (NFC) Regenerate test checks to reduce diff for an upcoming patch. --- .../test/OpenMP/debug-info-complex-byval.cpp | 47 +- ...bute_parallel_for_firstprivate_codegen.cpp | 1 - ...parallel_for_simd_firstprivate_codegen.cpp | 748 ++--- ...istribute_parallel_for_simd_if_codegen.cpp | 2832 ++++++++-------- .../OpenMP/parallel_firstprivate_codegen.cpp | 1 - clang/test/OpenMP/parallel_for_codegen.cpp | 2 +- clang/test/OpenMP/parallel_master_codegen.cpp | 1 - clang/test/OpenMP/single_codegen.cpp | 135 +- .../OpenMP/target_parallel_debug_codegen.cpp | 8 +- .../OpenMP/target_parallel_for_codegen.cpp | 64 +- .../target_parallel_for_debug_codegen.cpp | 8 +- ...rget_teams_distribute_collapse_codegen.cpp | 1 - ..._teams_distribute_firstprivate_codegen.cpp | 1 - ...t_teams_distribute_lastprivate_codegen.cpp | 1 - ...stribute_parallel_for_collapse_codegen.cpp | 1 - ...ute_parallel_for_dist_schedule_codegen.cpp | 1 - ...bute_parallel_for_firstprivate_codegen.cpp | 1 - ...ibute_parallel_for_lastprivate_codegen.cpp | 1 - ...s_distribute_parallel_for_simd_codegen.cpp | 416 +-- ...ute_parallel_for_simd_collapse_codegen.cpp | 600 ++-- ...arallel_for_simd_dist_schedule_codegen.cpp | 1680 +++++----- ...parallel_for_simd_firstprivate_codegen.cpp | 1106 +++---- ...istribute_parallel_for_simd_if_codegen.cpp | 2912 ++++++++--------- ..._parallel_for_simd_lastprivate_codegen.cpp | 728 ++--- .../OpenMP/target_teams_num_teams_codegen.cpp | 1 - .../target_teams_thread_limit_codegen.cpp | 1 - clang/test/OpenMP/teams_codegen.cpp | 1 - .../test/OpenMP/teams_distribute_codegen.cpp | 1 - .../teams_distribute_firstprivate_codegen.cpp | 1 - ...ute_parallel_for_dist_schedule_codegen.cpp | 1 - ...bute_parallel_for_firstprivate_codegen.cpp | 1 - ...ibute_parallel_for_num_threads_codegen.cpp | 1 - ...s_distribute_parallel_for_simd_codegen.cpp | 352 +- ...arallel_for_simd_dist_schedule_codegen.cpp | 1608 ++++----- ...parallel_for_simd_firstprivate_codegen.cpp | 588 ++-- ...istribute_parallel_for_simd_if_codegen.cpp | 2832 ++++++++-------- ..._parallel_for_simd_num_threads_codegen.cpp | 928 +++--- ...s_distribute_simd_firstprivate_codegen.cpp | 288 +- .../OpenMP/teams_firstprivate_codegen.cpp | 1 - 39 files changed, 8940 insertions(+), 8961 deletions(-) diff --git a/clang/test/OpenMP/debug-info-complex-byval.cpp b/clang/test/OpenMP/debug-info-complex-byval.cpp index cd09f80d5580..cc39d65bc414 100644 --- a/clang/test/OpenMP/debug-info-complex-byval.cpp +++ b/clang/test/OpenMP/debug-info-complex-byval.cpp @@ -16,17 +16,17 @@ void a() { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[B:%.*]] = alloca { float, float }, align 4 // CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata { float, float }* [[B]], metadata [[META10:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load { float, float }, { float, float }* [[B]], align 4, !dbg [[DBG13:![0-9]+]] -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to { float, float }*, !dbg [[DBG13]] -// CHECK1-NEXT: store { float, float } [[TMP0]], { float, float }* [[CONV]], align 4, !dbg [[DBG13]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[B_CASTED]], align 8, !dbg [[DBG13]] -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]]), !dbg [[DBG13]] -// CHECK1-NEXT: ret void, !dbg [[DBG14:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata { float, float }* [[B]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG13:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load { float, float }, { float, float }* [[B]], align 4, !dbg [[DBG14:![0-9]+]] +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to { float, float }*, !dbg [[DBG14]] +// CHECK1-NEXT: store { float, float } [[TMP0]], { float, float }* [[CONV]], align 4, !dbg [[DBG14]] +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[B_CASTED]], align 8, !dbg [[DBG14]] +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]]), !dbg [[DBG14]] +// CHECK1-NEXT: ret void, !dbg [[DBG15:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined._debug__ -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], <2 x float> [[B_COERCE:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG15:![0-9]+]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], <2 x float> [[B_COERCE:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG16:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[B:%.*]] = alloca { float, float }, align 4 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -34,31 +34,30 @@ void a() { // CHECK1-NEXT: [[TMP0:%.*]] = bitcast { float, float }* [[B]] to <2 x float>* // CHECK1-NEXT: store <2 x float> [[B_COERCE]], <2 x float>* [[TMP0]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META23:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META24:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25:![0-9]+]] // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META25:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata { float, float }* [[B]], metadata [[META26:![0-9]+]], metadata !DIExpression()), !dbg [[DBG27:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG28:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META26:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata { float, float }* [[B]], metadata [[META27:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG29:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[B:%.*]]) #[[ATTR3:[0-9]+]] !dbg [[DBG29:![0-9]+]] { +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[B:%.*]]) #[[ATTR3:[0-9]+]] !dbg [[DBG30:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META33:![0-9]+]], metadata !DIExpression()), !dbg [[DBG34:![0-9]+]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META34:![0-9]+]], metadata !DIExpression()), !dbg [[DBG35:![0-9]+]] // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META35:![0-9]+]], metadata !DIExpression()), !dbg [[DBG34]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META36:![0-9]+]], metadata !DIExpression()), !dbg [[DBG35]] // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[B_ADDR]], metadata [[META36:![0-9]+]], metadata !DIExpression()), !dbg [[DBG34]] -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to { float, float }*, !dbg [[DBG37:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG37]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG37]] -// CHECK1-NEXT: [[TMP2:%.*]] = bitcast { float, float }* [[CONV]] to <2 x float>*, !dbg [[DBG37]] -// CHECK1-NEXT: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 8, !dbg [[DBG37]] -// CHECK1-NEXT: call void @.omp_outlined._debug__(i32* [[TMP0]], i32* [[TMP1]], <2 x float> [[TMP3]]) #[[ATTR4:[0-9]+]], !dbg [[DBG37]] -// CHECK1-NEXT: ret void, !dbg [[DBG37]] +// CHECK1-NEXT: call void @llvm.dbg.declare(metadata i64* [[B_ADDR]], metadata [[META37:![0-9]+]], metadata !DIExpression()), !dbg [[DBG35]] +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to { float, float }*, !dbg [[DBG38:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG38]] +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG38]] +// CHECK1-NEXT: [[TMP2:%.*]] = bitcast { float, float }* [[CONV]] to <2 x float>*, !dbg [[DBG38]] +// CHECK1-NEXT: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 8, !dbg [[DBG38]] +// CHECK1-NEXT: call void @.omp_outlined._debug__(i32* [[TMP0]], i32* [[TMP1]], <2 x float> [[TMP3]]) #[[ATTR4:[0-9]+]], !dbg [[DBG38]] +// CHECK1-NEXT: ret void, !dbg [[DBG38]] // -// \ No newline at end of file diff --git a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp index ee085daa60a5..9982bb790447 100644 --- a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp @@ -5006,4 +5006,3 @@ int main() { // CHECK11-NEXT: call void @__tgt_register_requires(i64 1) // CHECK11-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp index ba48754a6b48..56640954d514 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -374,40 +374,40 @@ int main() { // CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load double, double* [[G3]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load double, double* [[G3]], align 8, !llvm.access.group !4 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[G_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP21]], double* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[_TMP5]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load volatile double, double* [[TMP23]], align 8 +// CHECK1-NEXT: store double [[TMP21]], double* [[CONV]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[_TMP5]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP24:%.*]] = load volatile double, double* [[TMP23]], align 8, !llvm.access.group !4 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[G1_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP24]], double* [[CONV9]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[SVAR6]], align 4 +// CHECK1-NEXT: store double [[TMP24]], double* [[CONV9]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[SVAR6]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[CONV10:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP26]], i32* [[CONV10]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load float, float* [[SFVAR7]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], i32* [[CONV10]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP28:%.*]] = load float, float* [[SFVAR7]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[CONV11:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* -// CHECK1-NEXT: store float [[TMP28]], float* [[CONV11]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP25]], i64 [[TMP27]], i64 [[TMP29]]) +// CHECK1-NEXT: store float [[TMP28]], float* [[CONV11]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP25]], i64 [[TMP27]], i64 [[TMP29]]), !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -483,38 +483,38 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: store double 1.000000e+00, double* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP10]], align 8 -// CHECK1-NEXT: store i32 3, i32* [[CONV2]], align 8 -// CHECK1-NEXT: store float 4.000000e+00, float* [[CONV3]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: store double 1.000000e+00, double* [[CONV]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP10:%.*]] = load double*, double** [[TMP]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP10]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store i32 3, i32* [[CONV2]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store float 4.000000e+00, float* [[CONV3]], align 8, !llvm.access.group !8 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[CONV]], double** [[TMP11]], align 8 +// CHECK1-NEXT: store double* [[CONV]], double** [[TMP11]], align 8, !llvm.access.group !8 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK1-NEXT: store double* [[TMP13]], double** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load double*, double** [[TMP]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store double* [[TMP13]], double** [[TMP12]], align 8, !llvm.access.group !8 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8 +// CHECK1-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8, !llvm.access.group !8 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[CONV3]], float** [[TMP15]], align 8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK1-NEXT: store float* [[CONV3]], float** [[TMP15]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -648,40 +648,40 @@ int main() { // CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[TMP21:%.*]] = load double, double* [[G3]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load double, double* [[G3]], align 8, !llvm.access.group !4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[G_CASTED]] to double* -// CHECK2-NEXT: store double [[TMP21]], double* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = load double*, double** [[_TMP5]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = load volatile double, double* [[TMP23]], align 8 +// CHECK2-NEXT: store double [[TMP21]], double* [[CONV]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP23:%.*]] = load double*, double** [[_TMP5]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP24:%.*]] = load volatile double, double* [[TMP23]], align 8, !llvm.access.group !4 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[G1_CASTED]] to double* -// CHECK2-NEXT: store double [[TMP24]], double* [[CONV9]], align 8 -// CHECK2-NEXT: [[TMP25:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[SVAR6]], align 4 +// CHECK2-NEXT: store double [[TMP24]], double* [[CONV9]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[SVAR6]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[CONV10:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP26]], i32* [[CONV10]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP28:%.*]] = load float, float* [[SFVAR7]], align 4 +// CHECK2-NEXT: store i32 [[TMP26]], i32* [[CONV10]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP28:%.*]] = load float, float* [[SFVAR7]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[CONV11:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* -// CHECK2-NEXT: store float [[TMP28]], float* [[CONV11]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP25]], i64 [[TMP27]], i64 [[TMP29]]) +// CHECK2-NEXT: store float [[TMP28]], float* [[CONV11]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP29:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP25]], i64 [[TMP27]], i64 [[TMP29]]), !llvm.access.group !4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -757,38 +757,38 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: store double 1.000000e+00, double* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK2-NEXT: store volatile double 1.000000e+00, double* [[TMP10]], align 8 -// CHECK2-NEXT: store i32 3, i32* [[CONV2]], align 8 -// CHECK2-NEXT: store float 4.000000e+00, float* [[CONV3]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: store double 1.000000e+00, double* [[CONV]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP10:%.*]] = load double*, double** [[TMP]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: store volatile double 1.000000e+00, double* [[TMP10]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: store i32 3, i32* [[CONV2]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: store float 4.000000e+00, float* [[CONV3]], align 8, !llvm.access.group !8 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK2-NEXT: store double* [[CONV]], double** [[TMP11]], align 8 +// CHECK2-NEXT: store double* [[CONV]], double** [[TMP11]], align 8, !llvm.access.group !8 // CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP13:%.*]] = load double*, double** [[TMP]], align 8 -// CHECK2-NEXT: store double* [[TMP13]], double** [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load double*, double** [[TMP]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: store double* [[TMP13]], double** [[TMP12]], align 8, !llvm.access.group !8 // CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK2-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8 +// CHECK2-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8, !llvm.access.group !8 // CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK2-NEXT: store float* [[CONV3]], float** [[TMP15]], align 8 -// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK2-NEXT: store float* [[CONV3]], float** [[TMP15]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !8 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -929,33 +929,33 @@ int main() { // CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load double*, double** [[_TMP5]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load volatile double, double* [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP19:%.*]] = load double*, double** [[_TMP5]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP20:%.*]] = load volatile double, double* [[TMP19]], align 4, !llvm.access.group !5 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[G1_CASTED]] to double* -// CHECK3-NEXT: store double [[TMP20]], double* [[CONV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[G1_CASTED]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[SVAR6]], align 4 -// CHECK3-NEXT: store i32 [[TMP22]], i32* [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load float, float* [[SFVAR7]], align 4 +// CHECK3-NEXT: store double [[TMP20]], double* [[CONV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[G1_CASTED]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[SVAR6]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP22]], i32* [[SVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP24:%.*]] = load float, float* [[SFVAR7]], align 4, !llvm.access.group !5 // CHECK3-NEXT: [[CONV9:%.*]] = bitcast i32* [[SFVAR_CASTED]] to float* -// CHECK3-NEXT: store float [[TMP24]], float* [[CONV9]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, i32, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], double* [[G3]], i32 [[TMP21]], i32 [[TMP23]], i32 [[TMP25]]) +// CHECK3-NEXT: store float [[TMP24]], float* [[CONV9]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, i32, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], double* [[G3]], i32 [[TMP21]], i32 [[TMP23]], i32 [[TMP25]]), !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1031,38 +1031,38 @@ int main() { // CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: store double 1.000000e+00, double* [[G3]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP12]], align 4 -// CHECK3-NEXT: store i32 3, i32* [[SVAR_ADDR]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, float* [[CONV1]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store double 1.000000e+00, double* [[G3]], align 8, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP12:%.*]] = load double*, double** [[TMP]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP12]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 3, i32* [[SVAR_ADDR]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store float 4.000000e+00, float* [[CONV1]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G3]], double** [[TMP13]], align 4 +// CHECK3-NEXT: store double* [[G3]], double** [[TMP13]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK3-NEXT: store double* [[TMP15]], double** [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load double*, double** [[TMP]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store double* [[TMP15]], double** [[TMP14]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP16]], align 4 +// CHECK3-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP16]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[CONV1]], float** [[TMP17]], align 4 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK3-NEXT: store float* [[CONV1]], float** [[TMP17]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1203,33 +1203,33 @@ int main() { // CHECK4-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK4-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load double*, double** [[_TMP5]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load volatile double, double* [[TMP19]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP19:%.*]] = load double*, double** [[_TMP5]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP20:%.*]] = load volatile double, double* [[TMP19]], align 4, !llvm.access.group !5 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[G1_CASTED]] to double* -// CHECK4-NEXT: store double [[TMP20]], double* [[CONV]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[G1_CASTED]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[SVAR6]], align 4 -// CHECK4-NEXT: store i32 [[TMP22]], i32* [[SVAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = load float, float* [[SFVAR7]], align 4 +// CHECK4-NEXT: store double [[TMP20]], double* [[CONV]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[G1_CASTED]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[SVAR6]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: store i32 [[TMP22]], i32* [[SVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP24:%.*]] = load float, float* [[SFVAR7]], align 4, !llvm.access.group !5 // CHECK4-NEXT: [[CONV9:%.*]] = bitcast i32* [[SFVAR_CASTED]] to float* -// CHECK4-NEXT: store float [[TMP24]], float* [[CONV9]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, i32, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], double* [[G3]], i32 [[TMP21]], i32 [[TMP23]], i32 [[TMP25]]) +// CHECK4-NEXT: store float [[TMP24]], float* [[CONV9]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, i32, i32, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], double* [[G3]], i32 [[TMP21]], i32 [[TMP23]], i32 [[TMP25]]), !llvm.access.group !5 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -1305,38 +1305,38 @@ int main() { // CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK4-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: store double 1.000000e+00, double* [[G3]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK4-NEXT: store volatile double 1.000000e+00, double* [[TMP12]], align 4 -// CHECK4-NEXT: store i32 3, i32* [[SVAR_ADDR]], align 4 -// CHECK4-NEXT: store float 4.000000e+00, float* [[CONV1]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: store double 1.000000e+00, double* [[G3]], align 8, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP12:%.*]] = load double*, double** [[TMP]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: store volatile double 1.000000e+00, double* [[TMP12]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: store i32 3, i32* [[SVAR_ADDR]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: store float 4.000000e+00, float* [[CONV1]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK4-NEXT: store double* [[G3]], double** [[TMP13]], align 4 +// CHECK4-NEXT: store double* [[G3]], double** [[TMP13]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP15:%.*]] = load double*, double** [[TMP]], align 4 -// CHECK4-NEXT: store double* [[TMP15]], double** [[TMP14]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load double*, double** [[TMP]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: store double* [[TMP15]], double** [[TMP14]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK4-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP16]], align 4 +// CHECK4-NEXT: store i32* [[SVAR_ADDR]], i32** [[TMP16]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK4-NEXT: store float* [[CONV1]], float** [[TMP17]], align 4 -// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK4-NEXT: store float* [[CONV1]], float** [[TMP17]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !9 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK4-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -1661,34 +1661,34 @@ int main() { // CHECK8-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK8-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK8-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK8-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK8-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK8-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64 -// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR3]], align 4 +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !5 // CHECK8-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK8-NEXT: store i32 [[TMP28]], i32* [[CONV]], align 4 -// CHECK8-NEXT: [[TMP29:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK8-NEXT: [[TMP30:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 8 -// CHECK8-NEXT: [[TMP31:%.*]] = load i32, i32* [[SVAR9]], align 4 +// CHECK8-NEXT: store i32 [[TMP28]], i32* [[CONV]], align 4, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP29:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP30:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 8, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP31:%.*]] = load i32, i32* [[SVAR9]], align 4, !llvm.access.group !5 // CHECK8-NEXT: [[CONV11:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK8-NEXT: store i32 [[TMP31]], i32* [[CONV11]], align 4 -// CHECK8-NEXT: [[TMP32:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP25]], i64 [[TMP27]], [2 x i32]* [[VEC4]], i64 [[TMP29]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP30]], i64 [[TMP32]]) +// CHECK8-NEXT: store i32 [[TMP31]], i32* [[CONV11]], align 4, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP32:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP25]], i64 [[TMP27]], [2 x i32]* [[VEC4]], i64 [[TMP29]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP30]], i64 [[TMP32]]), !llvm.access.group !5 // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK8-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -1807,37 +1807,37 @@ int main() { // CHECK8-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK8-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC5]], i64 0, i64 [[IDXPROM]] -// CHECK8-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 -// CHECK8-NEXT: [[TMP24:%.*]] = load %struct.S*, %struct.S** [[_TMP9]], align 8 -// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP24:%.*]] = load %struct.S*, %struct.S** [[_TMP9]], align 8, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP25]] to i64 // CHECK8-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR6]], i64 0, i64 [[IDXPROM11]] // CHECK8-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[ARRAYIDX12]] to i8* // CHECK8-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[TMP24]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false) +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false), !llvm.access.group !9 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK8-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -2130,30 +2130,30 @@ int main() { // CHECK8-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK8-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK8-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4 +// CHECK8-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK8-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 4 -// CHECK8-NEXT: [[TMP27:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK8-NEXT: [[TMP28:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP23]], i64 [[TMP25]], [2 x i32]* [[VEC4]], i64 [[TMP27]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP28]]) +// CHECK8-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP27:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP28:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8, !llvm.access.group !14 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP23]], i64 [[TMP25]], [2 x i32]* [[VEC4]], i64 [[TMP27]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP28]]), !llvm.access.group !14 // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -2269,37 +2269,37 @@ int main() { // CHECK8-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK8-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK8-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !17 +// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK8-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 -// CHECK8-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8 -// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK8-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8, !llvm.access.group !17 +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK8-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP25]] to i64 // CHECK8-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] // CHECK8-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[ARRAYIDX11]] to i8* // CHECK8-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[TMP24]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false) +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false), !llvm.access.group !17 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK8-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK8-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -2627,34 +2627,34 @@ int main() { // CHECK9-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK9-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR3]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !5 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP28]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 8 -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[SVAR9]], align 4 +// CHECK9-NEXT: store i32 [[TMP28]], i32* [[CONV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP29:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP30:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[SVAR9]], align 4, !llvm.access.group !5 // CHECK9-NEXT: [[CONV11:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP31]], i32* [[CONV11]], align 4 -// CHECK9-NEXT: [[TMP32:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP25]], i64 [[TMP27]], [2 x i32]* [[VEC4]], i64 [[TMP29]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP30]], i64 [[TMP32]]) +// CHECK9-NEXT: store i32 [[TMP31]], i32* [[CONV11]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP32:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP25]], i64 [[TMP27]], [2 x i32]* [[VEC4]], i64 [[TMP29]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP30]], i64 [[TMP32]]), !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -2773,37 +2773,37 @@ int main() { // CHECK9-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC5]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load %struct.S*, %struct.S** [[_TMP9]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP24:%.*]] = load %struct.S*, %struct.S** [[_TMP9]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP25]] to i64 // CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR6]], i64 0, i64 [[IDXPROM11]] // CHECK9-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[ARRAYIDX12]] to i8* // CHECK9-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[TMP24]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false) +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false), !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK9-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3096,30 +3096,30 @@ int main() { // CHECK9-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP28:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP23]], i64 [[TMP25]], [2 x i32]* [[VEC4]], i64 [[TMP27]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP28]]) +// CHECK9-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP27:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP28:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8, !llvm.access.group !14 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP23]], i64 [[TMP25]], [2 x i32]* [[VEC4]], i64 [[TMP27]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP28]]), !llvm.access.group !14 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3235,37 +3235,37 @@ int main() { // CHECK9-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK9: omp.inner.for.cond.cleanup: // CHECK9-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP25]] to i64 // CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] // CHECK9-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[ARRAYIDX11]] to i8* // CHECK9-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[TMP24]] to i8* -// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false) +// CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 4, i1 false), !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3589,30 +3589,30 @@ int main() { // CHECK10-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK10-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK10-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK10-NEXT: store i32 [[TMP26]], i32* [[T_VAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 4 -// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[SVAR9]], align 4 -// CHECK10-NEXT: store i32 [[TMP29]], i32* [[SVAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP24]], i32 [[TMP25]], [2 x i32]* [[VEC4]], i32 [[TMP27]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP28]], i32 [[TMP30]]) +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: store i32 [[TMP26]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[SVAR9]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: store i32 [[TMP29]], i32* [[SVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP24]], i32 [[TMP25]], [2 x i32]* [[VEC4]], i32 [[TMP27]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP28]], i32 [[TMP30]]), !llvm.access.group !6 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -3727,35 +3727,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP23]] -// CHECK10-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 -// CHECK10-NEXT: [[TMP24:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[TMP24:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 [[TMP25]] // CHECK10-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[ARRAYIDX8]] to i8* // CHECK10-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[TMP24]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false) +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false), !llvm.access.group !10 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -4046,27 +4046,27 @@ int main() { // CHECK10-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK10-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK10-NEXT: store i32 [[TMP24]], i32* [[T_VAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK10-NEXT: [[TMP26:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 4 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP22]], i32 [[TMP23]], [2 x i32]* [[VEC4]], i32 [[TMP25]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP26]]) +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: store i32 [[TMP24]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP26:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP22]], i32 [[TMP23]], [2 x i32]* [[VEC4]], i32 [[TMP25]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP26]]), !llvm.access.group !15 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -4179,35 +4179,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK10: omp.inner.for.cond.cleanup: // CHECK10-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP23]] -// CHECK10-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 -// CHECK10-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP25]] // CHECK10-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[ARRAYIDX8]] to i8* // CHECK10-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[TMP24]] to i8* -// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false) +// CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false), !llvm.access.group !18 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -4531,30 +4531,30 @@ int main() { // CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK11-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP22]], [[TMP23]] // CHECK11-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP26]], i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[SVAR9]], align 4 -// CHECK11-NEXT: store i32 [[TMP29]], i32* [[SVAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP24]], i32 [[TMP25]], [2 x i32]* [[VEC4]], i32 [[TMP27]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP28]], i32 [[TMP30]]) +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: store i32 [[TMP26]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP28:%.*]] = load %struct.S*, %struct.S** [[_TMP8]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[SVAR9]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: store i32 [[TMP29]], i32* [[SVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP24]], i32 [[TMP25]], [2 x i32]* [[VEC4]], i32 [[TMP27]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP28]], i32 [[TMP30]]), !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -4669,35 +4669,35 @@ int main() { // CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP23]] -// CHECK11-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP24:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 [[TMP25]] // CHECK11-NEXT: [[TMP26:%.*]] = bitcast %struct.S* [[ARRAYIDX8]] to i8* // CHECK11-NEXT: [[TMP27:%.*]] = bitcast %struct.S* [[TMP24]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false) +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false), !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -4988,27 +4988,27 @@ int main() { // CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK11-NEXT: store i32 [[TMP24]], i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP22]], i32 [[TMP23]], [2 x i32]* [[VEC4]], i32 [[TMP25]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP26]]) +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: store i32 [[TMP24]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP26:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP22]], i32 [[TMP23]], [2 x i32]* [[VEC4]], i32 [[TMP25]], [2 x %struct.S.0]* [[S_ARR5]], %struct.S.0* [[TMP26]]), !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -5121,35 +5121,35 @@ int main() { // CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK11: omp.inner.for.cond.cleanup: // CHECK11-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP23]] -// CHECK11-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[TMP22]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP24:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP25]] // CHECK11-NEXT: [[TMP26:%.*]] = bitcast %struct.S.0* [[ARRAYIDX8]] to i8* // CHECK11-NEXT: [[TMP27:%.*]] = bitcast %struct.S.0* [[TMP24]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false) +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i32 4, i1 false), !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp index 67a1d863a6fe..52d2c6cc7768 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp @@ -184,22 +184,22 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -261,22 +261,22 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -336,26 +336,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -417,23 +417,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z9gtid_testv() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -547,22 +547,22 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -624,23 +624,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn4v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -700,26 +700,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !32 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -781,23 +781,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn5v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: call void @_Z3fn5v(), !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -863,35 +863,35 @@ int main() { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !38 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !38 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -953,23 +953,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn6v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: call void @_Z3fn6v(), !llvm.access.group !41 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1081,22 +1081,22 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !44 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1158,23 +1158,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn1v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK1-NEXT: call void @_Z3fn1v(), !llvm.access.group !47 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1234,26 +1234,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !50 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !50 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1315,23 +1315,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn2v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53 +// CHECK1-NEXT: call void @_Z3fn2v(), !llvm.access.group !53 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1397,35 +1397,35 @@ int main() { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !56 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !56 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !56 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !56 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !56 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1487,23 +1487,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !59 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn3v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !59 +// CHECK1-NEXT: call void @_Z3fn3v(), !llvm.access.group !59 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1593,22 +1593,22 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1670,22 +1670,22 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1745,26 +1745,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1826,23 +1826,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z9gtid_testv() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1956,22 +1956,22 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2033,23 +2033,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn4v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2109,26 +2109,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !32 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !32 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2190,23 +2190,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn5v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: call void @_Z3fn5v(), !llvm.access.group !35 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2272,35 +2272,35 @@ int main() { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !38 // CHECK2-NEXT: br label [[OMP_IF_END:%.*]] // CHECK2: omp_if.else: -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !38 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !38 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !38 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2362,23 +2362,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn6v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !41 +// CHECK2-NEXT: call void @_Z3fn6v(), !llvm.access.group !41 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2490,22 +2490,22 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !44 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2567,23 +2567,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn1v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK2-NEXT: call void @_Z3fn1v(), !llvm.access.group !47 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2643,26 +2643,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !50 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !50 +// CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !50 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2724,23 +2724,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn2v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53 +// CHECK2-NEXT: call void @_Z3fn2v(), !llvm.access.group !53 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2806,35 +2806,35 @@ int main() { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !56 // CHECK2-NEXT: br label [[OMP_IF_END:%.*]] // CHECK2: omp_if.else: -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !56 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !56 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !56 +// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !56 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !56 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2896,23 +2896,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !59 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn3v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !59 +// CHECK2-NEXT: call void @_Z3fn3v(), !llvm.access.group !59 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3002,22 +3002,22 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3079,22 +3079,22 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3154,26 +3154,26 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3235,23 +3235,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z9gtid_testv() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3365,22 +3365,22 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3442,23 +3442,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn4v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3693,41 +3693,41 @@ int main() { // CHECK3: omp_if.then: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !35 // CHECK3-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK3-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !35 // CHECK3-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP16]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then6: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]), !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !35 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]], !llvm.access.group !35 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END23:%.*]] @@ -3839,23 +3839,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn6v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !39 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] @@ -3967,23 +3967,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn6v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] @@ -4138,22 +4138,22 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !47 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4215,23 +4215,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn1v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: call void @_Z3fn1v(), !llvm.access.group !50 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4454,35 +4454,35 @@ int main() { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !55 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !55 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !55 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !55 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !55 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4544,23 +4544,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !58 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn3v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !58 +// CHECK3-NEXT: call void @_Z3fn3v(), !llvm.access.group !58 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4650,22 +4650,22 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4727,22 +4727,22 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4802,26 +4802,26 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4883,23 +4883,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z9gtid_testv() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5013,22 +5013,22 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5090,23 +5090,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn4v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5341,41 +5341,41 @@ int main() { // CHECK4: omp_if.then: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !35 // CHECK4-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK4-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 -// CHECK4-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1 -// CHECK4-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK4-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK4-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !35 // CHECK4-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP16]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then6: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]), !llvm.access.group !35 // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK4-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !35 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !35 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]], !llvm.access.group !35 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !35 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END23:%.*]] @@ -5487,23 +5487,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn6v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK4-NEXT: call void @_Z3fn6v(), !llvm.access.group !39 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] @@ -5615,23 +5615,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn6v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK4-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] @@ -5786,22 +5786,22 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !47 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5863,23 +5863,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn1v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 +// CHECK4-NEXT: call void @_Z3fn1v(), !llvm.access.group !50 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6102,35 +6102,35 @@ int main() { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !55 // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK4-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !55 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !55 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !55 +// CHECK4-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !55 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !55 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6192,23 +6192,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !58 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn3v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !58 +// CHECK4-NEXT: call void @_Z3fn3v(), !llvm.access.group !58 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6250,22 +6250,22 @@ int main() { // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6275,23 +6275,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK5-NEXT: call void @_Z9gtid_testv() +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6324,23 +6324,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: call void @_Z3fn4v() +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6350,23 +6350,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK5-NEXT: call void @_Z3fn5v() +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: call void @_Z3fn5v(), !llvm.access.group !12 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6376,23 +6376,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK5: omp.inner.for.cond21: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 // CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK5: omp.inner.for.body23: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK5-NEXT: call void @_Z3fn6v() +// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK5: omp.body.continue26: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK5: omp.inner.for.inc27: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK5: omp.inner.for.end29: // CHECK5-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6427,23 +6427,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: call void @_Z3fn1v() +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6453,23 +6453,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK5-NEXT: call void @_Z3fn2v() +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: call void @_Z3fn2v(), !llvm.access.group !21 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6479,23 +6479,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK5: omp.inner.for.cond21: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK5: omp.inner.for.body23: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK5-NEXT: call void @_Z3fn3v() +// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: call void @_Z3fn3v(), !llvm.access.group !24 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK5: omp.body.continue26: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK5: omp.inner.for.inc27: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK5: omp.inner.for.end29: // CHECK5-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6521,22 +6521,22 @@ int main() { // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6546,23 +6546,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK6-NEXT: call void @_Z9gtid_testv() +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6595,23 +6595,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: call void @_Z3fn4v() +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6621,23 +6621,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK6-NEXT: call void @_Z3fn5v() +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: call void @_Z3fn5v(), !llvm.access.group !12 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6647,23 +6647,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK6: omp.inner.for.cond21: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 // CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK6: omp.inner.for.body23: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK6-NEXT: call void @_Z3fn6v() +// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK6: omp.body.continue26: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK6: omp.inner.for.inc27: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK6: omp.inner.for.end29: // CHECK6-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6698,23 +6698,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: call void @_Z3fn1v() +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6724,23 +6724,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK6-NEXT: call void @_Z3fn2v() +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: call void @_Z3fn2v(), !llvm.access.group !21 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6750,23 +6750,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK6: omp.inner.for.cond21: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK6: omp.inner.for.body23: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK6-NEXT: call void @_Z3fn3v() +// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: call void @_Z3fn3v(), !llvm.access.group !24 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK6: omp.body.continue26: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK6: omp.inner.for.inc27: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK6: omp.inner.for.end29: // CHECK6-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6792,22 +6792,22 @@ int main() { // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -6817,23 +6817,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK7: omp.inner.for.cond7: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 // CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK7: omp.inner.for.body9: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK7-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK7-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK7-NEXT: call void @_Z9gtid_testv() +// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK7: omp.body.continue12: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK7: omp.inner.for.inc13: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK7-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK7: omp.inner.for.end15: // CHECK7-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6867,23 +6867,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: call void @_Z3fn4v() +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -6927,23 +6927,23 @@ int main() { // CHECK7: omp_if.then: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK7: omp.inner.for.cond22: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK7-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK7: omp.inner.for.body24: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK7-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK7-NEXT: call void @_Z3fn6v() +// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: call void @_Z3fn6v(), !llvm.access.group !14 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK7: omp.body.continue27: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK7: omp.inner.for.inc28: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK7: omp.inner.for.end30: // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] @@ -7003,23 +7003,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: call void @_Z3fn1v() +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -7055,23 +7055,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK7: omp.inner.for.cond21: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !22 // CHECK7-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK7-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK7: omp.inner.for.body23: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK7-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK7-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK7-NEXT: call void @_Z3fn3v() +// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: call void @_Z3fn3v(), !llvm.access.group !22 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK7: omp.body.continue26: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK7: omp.inner.for.inc27: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK7-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK7: omp.inner.for.end29: // CHECK7-NEXT: store i32 100, i32* [[I20]], align 4 @@ -7097,22 +7097,22 @@ int main() { // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7122,23 +7122,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK8: omp.inner.for.cond7: -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 // CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK8: omp.inner.for.body9: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK8-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK8-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK8-NEXT: call void @_Z9gtid_testv() +// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK8: omp.body.continue12: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK8: omp.inner.for.inc13: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK8-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK8: omp.inner.for.end15: // CHECK8-NEXT: store i32 100, i32* [[I6]], align 4 @@ -7172,23 +7172,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: call void @_Z3fn4v() +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7232,23 +7232,23 @@ int main() { // CHECK8: omp_if.then: // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK8: omp.inner.for.cond22: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK8-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK8: omp.inner.for.body24: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK8-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK8-NEXT: call void @_Z3fn6v() +// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: call void @_Z3fn6v(), !llvm.access.group !14 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK8: omp.body.continue27: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK8: omp.inner.for.inc28: -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK8: omp.inner.for.end30: // CHECK8-NEXT: br label [[OMP_IF_END:%.*]] @@ -7308,23 +7308,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: call void @_Z3fn1v() +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7360,23 +7360,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK8: omp.inner.for.cond21: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !22 // CHECK8-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK8-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK8: omp.inner.for.body23: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK8-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK8-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK8-NEXT: call void @_Z3fn3v() +// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: call void @_Z3fn3v(), !llvm.access.group !22 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK8: omp.body.continue26: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK8: omp.inner.for.inc27: -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK8-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK8: omp.inner.for.end29: // CHECK8-NEXT: store i32 100, i32* [[I20]], align 4 @@ -7450,22 +7450,22 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7527,22 +7527,22 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7602,26 +7602,26 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7683,23 +7683,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z9gtid_testv() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7813,22 +7813,22 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7890,23 +7890,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn4v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7966,26 +7966,26 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8047,23 +8047,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn5v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: call void @_Z3fn5v(), !llvm.access.group !39 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8129,35 +8129,35 @@ int main() { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !42 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !42 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8219,23 +8219,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn6v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: call void @_Z3fn6v(), !llvm.access.group !45 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8347,22 +8347,22 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !48 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8424,23 +8424,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn1v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51 +// CHECK9-NEXT: call void @_Z3fn1v(), !llvm.access.group !51 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8500,26 +8500,26 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8581,23 +8581,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn2v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK9-NEXT: call void @_Z3fn2v(), !llvm.access.group !57 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8663,35 +8663,35 @@ int main() { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !60 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !60 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !60 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !60 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !60 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8753,23 +8753,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !63 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn3v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !63 +// CHECK9-NEXT: call void @_Z3fn3v(), !llvm.access.group !63 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8859,22 +8859,22 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8936,22 +8936,22 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9011,26 +9011,26 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9092,23 +9092,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z9gtid_testv() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9222,22 +9222,22 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9299,23 +9299,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn4v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9375,26 +9375,26 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9456,23 +9456,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn5v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: call void @_Z3fn5v(), !llvm.access.group !39 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9538,35 +9538,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !42 // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !42 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !42 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !42 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !42 // CHECK10-NEXT: br label [[OMP_IF_END]] // CHECK10: omp_if.end: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9628,23 +9628,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn6v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: call void @_Z3fn6v(), !llvm.access.group !45 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9756,22 +9756,22 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !48 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9833,23 +9833,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn1v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51 +// CHECK10-NEXT: call void @_Z3fn1v(), !llvm.access.group !51 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9909,26 +9909,26 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 +// CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9990,23 +9990,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn2v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK10-NEXT: call void @_Z3fn2v(), !llvm.access.group !57 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10072,35 +10072,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !60 // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !60 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !60 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !60 +// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !60 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !60 // CHECK10-NEXT: br label [[OMP_IF_END]] // CHECK10: omp_if.end: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10162,23 +10162,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !63 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn3v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !63 +// CHECK10-NEXT: call void @_Z3fn3v(), !llvm.access.group !63 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10268,22 +10268,22 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10345,22 +10345,22 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10420,26 +10420,26 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10501,23 +10501,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z9gtid_testv() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10631,22 +10631,22 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10708,23 +10708,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn4v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10959,41 +10959,41 @@ int main() { // CHECK11: omp_if.then: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !39 // CHECK11-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK11-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1 -// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK11-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !39 // CHECK11-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP16]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then6: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]), !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !39 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]], !llvm.access.group !39 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END23:%.*]] @@ -11105,23 +11105,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn6v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] @@ -11233,23 +11233,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn6v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !47 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] @@ -11404,22 +11404,22 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !51 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11481,23 +11481,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn1v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: call void @_Z3fn1v(), !llvm.access.group !54 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11720,35 +11720,35 @@ int main() { // CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !59 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !59 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !59 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !59 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !59 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11810,23 +11810,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn3v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 +// CHECK11-NEXT: call void @_Z3fn3v(), !llvm.access.group !62 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11916,22 +11916,22 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11993,22 +11993,22 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12068,26 +12068,26 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12149,23 +12149,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z9gtid_testv() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK12-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12279,22 +12279,22 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12356,23 +12356,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn4v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12607,41 +12607,41 @@ int main() { // CHECK12: omp_if.then: // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 -// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !39 // CHECK12-NEXT: [[TOBOOL3:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK12-NEXT: [[FROMBOOL4:%.*]] = zext i1 [[TOBOOL3]] to i8 -// CHECK12-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1 -// CHECK12-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK12-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK12-NEXT: store i8 [[FROMBOOL4]], i8* [[CONV]], align 1, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !39 // CHECK12-NEXT: [[TOBOOL5:%.*]] = trunc i8 [[TMP16]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL5]], label [[OMP_IF_THEN6:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then6: -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]), !llvm.access.group !39 // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) -// CHECK12-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !39 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !39 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]], !llvm.access.group !39 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]), !llvm.access.group !39 // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END23:%.*]] @@ -12753,23 +12753,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn6v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] @@ -12881,23 +12881,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn6v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK12-NEXT: call void @_Z3fn6v(), !llvm.access.group !47 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] @@ -13052,22 +13052,22 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !51 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13129,23 +13129,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn1v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !54 +// CHECK12-NEXT: call void @_Z3fn1v(), !llvm.access.group !54 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13368,35 +13368,35 @@ int main() { // CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP0]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]]), !llvm.access.group !59 // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) -// CHECK12-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK12-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !59 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !59 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !59 +// CHECK12-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]], !llvm.access.group !59 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]), !llvm.access.group !59 // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13458,23 +13458,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn3v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 +// CHECK12-NEXT: call void @_Z3fn3v(), !llvm.access.group !62 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13516,22 +13516,22 @@ int main() { // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -13541,23 +13541,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK13-NEXT: call void @_Z9gtid_testv() +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -13590,23 +13590,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: call void @_Z3fn4v() +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -13616,23 +13616,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK13-NEXT: call void @_Z3fn5v() +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: call void @_Z3fn5v(), !llvm.access.group !16 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -13642,23 +13642,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK13: omp.inner.for.cond21: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 // CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK13: omp.inner.for.body23: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK13-NEXT: call void @_Z3fn6v() +// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK13: omp.body.continue26: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK13: omp.inner.for.inc27: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK13: omp.inner.for.end29: // CHECK13-NEXT: store i32 100, i32* [[I20]], align 4 @@ -13693,23 +13693,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: call void @_Z3fn1v() +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -13719,23 +13719,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK13-NEXT: call void @_Z3fn2v() +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: call void @_Z3fn2v(), !llvm.access.group !25 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -13745,23 +13745,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK13: omp.inner.for.cond21: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK13: omp.inner.for.body23: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK13-NEXT: call void @_Z3fn3v() +// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: call void @_Z3fn3v(), !llvm.access.group !28 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK13: omp.body.continue26: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK13: omp.inner.for.inc27: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK13: omp.inner.for.end29: // CHECK13-NEXT: store i32 100, i32* [[I20]], align 4 @@ -13787,22 +13787,22 @@ int main() { // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -13812,23 +13812,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK14-NEXT: call void @_Z9gtid_testv() +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -13861,23 +13861,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: call void @_Z3fn4v() +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -13887,23 +13887,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK14-NEXT: call void @_Z3fn5v() +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: call void @_Z3fn5v(), !llvm.access.group !16 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -13913,23 +13913,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK14: omp.inner.for.cond21: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 // CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK14: omp.inner.for.body23: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK14-NEXT: call void @_Z3fn6v() +// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK14: omp.body.continue26: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK14: omp.inner.for.inc27: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK14: omp.inner.for.end29: // CHECK14-NEXT: store i32 100, i32* [[I20]], align 4 @@ -13964,23 +13964,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: call void @_Z3fn1v() +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -13990,23 +13990,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK14-NEXT: call void @_Z3fn2v() +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: call void @_Z3fn2v(), !llvm.access.group !25 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14016,23 +14016,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK14: omp.inner.for.cond21: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK14: omp.inner.for.body23: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK14-NEXT: call void @_Z3fn3v() +// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: call void @_Z3fn3v(), !llvm.access.group !28 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK14: omp.body.continue26: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK14: omp.inner.for.inc27: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK14: omp.inner.for.end29: // CHECK14-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14058,22 +14058,22 @@ int main() { // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14083,23 +14083,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK15: omp.inner.for.cond7: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 // CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK15: omp.inner.for.body9: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK15-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK15-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK15-NEXT: call void @_Z9gtid_testv() +// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 +// CHECK15-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK15: omp.body.continue12: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK15: omp.inner.for.inc13: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK15-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK15: omp.inner.for.end15: // CHECK15-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14133,23 +14133,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: call void @_Z3fn4v() +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK15-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14193,23 +14193,23 @@ int main() { // CHECK15: omp_if.then: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK15: omp.inner.for.cond22: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !18 // CHECK15-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK15: omp.inner.for.body24: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK15-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK15-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK15-NEXT: call void @_Z3fn6v() +// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: call void @_Z3fn6v(), !llvm.access.group !18 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK15: omp.body.continue27: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK15: omp.inner.for.inc28: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK15-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK15: omp.inner.for.end30: // CHECK15-NEXT: br label [[OMP_IF_END:%.*]] @@ -14269,23 +14269,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: call void @_Z3fn1v() +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14321,23 +14321,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK15: omp.inner.for.cond21: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !26 // CHECK15-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK15-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK15: omp.inner.for.body23: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK15-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK15-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK15-NEXT: call void @_Z3fn3v() +// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: call void @_Z3fn3v(), !llvm.access.group !26 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK15: omp.body.continue26: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK15: omp.inner.for.inc27: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK15-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK15: omp.inner.for.end29: // CHECK15-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14363,22 +14363,22 @@ int main() { // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -14388,23 +14388,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK16: omp.inner.for.cond7: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 // CHECK16-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK16: omp.inner.for.body9: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK16-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK16-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK16-NEXT: call void @_Z9gtid_testv() +// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 +// CHECK16-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK16: omp.body.continue12: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK16: omp.inner.for.inc13: -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK16-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK16: omp.inner.for.end15: // CHECK16-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14438,23 +14438,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: call void @_Z3fn4v() +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK16-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -14498,23 +14498,23 @@ int main() { // CHECK16: omp_if.then: // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK16: omp.inner.for.cond22: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !18 // CHECK16-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK16-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK16: omp.inner.for.body24: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK16-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK16-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK16-NEXT: call void @_Z3fn6v() +// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: call void @_Z3fn6v(), !llvm.access.group !18 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK16: omp.body.continue27: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK16: omp.inner.for.inc28: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK16-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK16: omp.inner.for.end30: // CHECK16-NEXT: br label [[OMP_IF_END:%.*]] @@ -14574,23 +14574,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: call void @_Z3fn1v() +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -14626,23 +14626,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK16: omp.inner.for.cond21: -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !26 // CHECK16-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK16-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK16: omp.inner.for.body23: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK16-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK16-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK16-NEXT: call void @_Z3fn3v() +// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: call void @_Z3fn3v(), !llvm.access.group !26 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK16: omp.body.continue26: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK16: omp.inner.for.inc27: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK16-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK16: omp.inner.for.end29: // CHECK16-NEXT: store i32 100, i32* [[I20]], align 4 diff --git a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp index 35eb8d05123e..7542169b574b 100644 --- a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp @@ -4322,4 +4322,3 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK17-NEXT: call void @__kmpc_free(i32 [[TMP12]], i8* [[TMP22]], i8* inttoptr (i64 8 to i8*)) // CHECK17-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/parallel_for_codegen.cpp b/clang/test/OpenMP/parallel_for_codegen.cpp index 3550e76f21c4..7f17f8c1d855 100644 --- a/clang/test/OpenMP/parallel_for_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_codegen.cpp @@ -4697,7 +4697,7 @@ void range_for_collapsed() { // // // CHECK5-LABEL: define {{[^@]+}}@_Z17with_var_schedulev -// CHECK5-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG7:![0-9]+]] { +// CHECK5-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG6:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 diff --git a/clang/test/OpenMP/parallel_master_codegen.cpp b/clang/test/OpenMP/parallel_master_codegen.cpp index dc8343941a67..46df40533cfc 100644 --- a/clang/test/OpenMP/parallel_master_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_codegen.cpp @@ -1005,4 +1005,3 @@ void parallel_master_allocate() { // CHECK29-SAME: () #[[ATTR4:[0-9]+]] comdat { // CHECK29-NEXT: ret i32* @a // -// \ No newline at end of file diff --git a/clang/test/OpenMP/single_codegen.cpp b/clang/test/OpenMP/single_codegen.cpp index 4f7777cad610..a590aac9125d 100644 --- a/clang/test/OpenMP/single_codegen.cpp +++ b/clang/test/OpenMP/single_codegen.cpp @@ -4170,129 +4170,129 @@ void array_func(int n, int a[n], St s[2]) { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG9:![0-9]+]] -// CHECK5-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*, !dbg [[DBG9]] -// CHECK5-NEXT: call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP2]]), !dbg [[DBG10:![0-9]+]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG9]] -// CHECK5-NEXT: ret i8* [[TMP3]], !dbg [[DBG9]] +// CHECK5-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG10:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*, !dbg [[DBG10]] +// CHECK5-NEXT: call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP2]]), !dbg [[DBG11:![0-9]+]] +// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG10]] +// CHECK5-NEXT: ret i8* [[TMP3]], !dbg [[DBG10]] // // // CHECK5-LABEL: define {{[^@]+}}@_ZN9TestClassC1Ev -// CHECK5-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 !dbg [[DBG11:![0-9]+]] { +// CHECK5-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 !dbg [[DBG12:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8 // CHECK5-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void @_ZN9TestClassC2Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS1]]), !dbg [[DBG12:![0-9]+]] -// CHECK5-NEXT: ret void, !dbg [[DBG13:![0-9]+]] +// CHECK5-NEXT: call void @_ZN9TestClassC2Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS1]]), !dbg [[DBG13:![0-9]+]] +// CHECK5-NEXT: ret void, !dbg [[DBG14:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_. -// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG14:![0-9]+]] { +// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG15:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG15:![0-9]+]] -// CHECK5-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*, !dbg [[DBG15]] -// CHECK5-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP2]]) #[[ATTR3:[0-9]+]], !dbg [[DBG15]] -// CHECK5-NEXT: ret void, !dbg [[DBG16:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG16:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*, !dbg [[DBG16]] +// CHECK5-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[TMP2]]) #[[ATTR3:[0-9]+]], !dbg [[DBG16]] +// CHECK5-NEXT: ret void, !dbg [[DBG17:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@_ZN9TestClassD1Ev -// CHECK5-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] align 2 !dbg [[DBG17:![0-9]+]] { +// CHECK5-SAME: (%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] align 2 !dbg [[DBG18:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[THIS_ADDR:%.*]] = alloca %class.TestClass*, align 8 // CHECK5-NEXT: store %class.TestClass* [[THIS]], %class.TestClass** [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[THIS1:%.*]] = load %class.TestClass*, %class.TestClass** [[THIS_ADDR]], align 8 -// CHECK5-NEXT: call void @_ZN9TestClassD2Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR3]], !dbg [[DBG18:![0-9]+]] -// CHECK5-NEXT: ret void, !dbg [[DBG19:![0-9]+]] +// CHECK5-NEXT: call void @_ZN9TestClassD2Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR3]], !dbg [[DBG19:![0-9]+]] +// CHECK5-NEXT: ret void, !dbg [[DBG20:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_. -// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG20:![0-9]+]] { +// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG21:![0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG21:![0-9]+]] -// CHECK5-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%class.TestClass* @tc to i8*), i8* (i8*)* @.__kmpc_global_ctor_., i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_.), !dbg [[DBG21]] -// CHECK5-NEXT: ret void, !dbg [[DBG21]] +// CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG22:![0-9]+]] +// CHECK5-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB1]], i8* bitcast (%class.TestClass* @tc to i8*), i8* (i8*)* @.__kmpc_global_ctor_., i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_.), !dbg [[DBG22]] +// CHECK5-NEXT: ret void, !dbg [[DBG22]] // // // CHECK5-LABEL: define {{[^@]+}}@.__kmpc_global_ctor_..1 -// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG22:![0-9]+]] { +// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG23:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK5-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8 // CHECK5-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4 // CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG23:![0-9]+]] -// CHECK5-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [2 x %class.TestClass]*, !dbg [[DBG23]] -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %class.TestClass], [2 x %class.TestClass]* [[TMP2]], i32 0, i32 0, !dbg [[DBG24:![0-9]+]] -// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2, !dbg [[DBG24]] -// CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]], !dbg [[DBG24]] +// CHECK5-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG24:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to [2 x %class.TestClass]*, !dbg [[DBG24]] +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %class.TestClass], [2 x %class.TestClass]* [[TMP2]], i32 0, i32 0, !dbg [[DBG25:![0-9]+]] +// CHECK5-NEXT: [[ARRAYCTOR_END:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2, !dbg [[DBG25]] +// CHECK5-NEXT: br label [[ARRAYCTOR_LOOP:%.*]], !dbg [[DBG25]] // CHECK5: arrayctor.loop: -// CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %class.TestClass* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[INVOKE_CONT:%.*]] ], !dbg [[DBG24]] +// CHECK5-NEXT: [[ARRAYCTOR_CUR:%.*]] = phi %class.TestClass* [ [[ARRAY_BEGIN]], [[ENTRY:%.*]] ], [ [[ARRAYCTOR_NEXT:%.*]], [[INVOKE_CONT:%.*]] ], !dbg [[DBG25]] // CHECK5-NEXT: invoke void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYCTOR_CUR]]) -// CHECK5-NEXT: to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]], !dbg [[DBG24]] +// CHECK5-NEXT: to label [[INVOKE_CONT]] unwind label [[LPAD:%.*]], !dbg [[DBG25]] // CHECK5: invoke.cont: -// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1, !dbg [[DBG24]] -// CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]], !dbg [[DBG24]] -// CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]], !dbg [[DBG24]] +// CHECK5-NEXT: [[ARRAYCTOR_NEXT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYCTOR_CUR]], i64 1, !dbg [[DBG25]] +// CHECK5-NEXT: [[ARRAYCTOR_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYCTOR_NEXT]], [[ARRAYCTOR_END]], !dbg [[DBG25]] +// CHECK5-NEXT: br i1 [[ARRAYCTOR_DONE]], label [[ARRAYCTOR_CONT:%.*]], label [[ARRAYCTOR_LOOP]], !dbg [[DBG25]] // CHECK5: arrayctor.cont: -// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG23]] -// CHECK5-NEXT: ret i8* [[TMP3]], !dbg [[DBG23]] +// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG24]] +// CHECK5-NEXT: ret i8* [[TMP3]], !dbg [[DBG24]] // CHECK5: lpad: // CHECK5-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 } -// CHECK5-NEXT: cleanup, !dbg [[DBG25:![0-9]+]] -// CHECK5-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0, !dbg [[DBG25]] -// CHECK5-NEXT: store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG25]] -// CHECK5-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1, !dbg [[DBG25]] -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG25]] -// CHECK5-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %class.TestClass* [[ARRAY_BEGIN]], [[ARRAYCTOR_CUR]], !dbg [[DBG24]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG24]] +// CHECK5-NEXT: cleanup, !dbg [[DBG26:![0-9]+]] +// CHECK5-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0, !dbg [[DBG26]] +// CHECK5-NEXT: store i8* [[TMP5]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 1, !dbg [[DBG26]] +// CHECK5-NEXT: store i32 [[TMP6]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %class.TestClass* [[ARRAY_BEGIN]], [[ARRAYCTOR_CUR]], !dbg [[DBG25]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG25]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[ARRAYCTOR_CUR]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG24]] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG24]] -// CHECK5-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG24]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]], !dbg [[DBG24]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG24]] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[ARRAYCTOR_CUR]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG25]] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG25]] +// CHECK5-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG25]] +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]], !dbg [[DBG25]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG25]] // CHECK5: arraydestroy.done1: -// CHECK5-NEXT: br label [[EH_RESUME:%.*]], !dbg [[DBG24]] +// CHECK5-NEXT: br label [[EH_RESUME:%.*]], !dbg [[DBG25]] // CHECK5: eh.resume: -// CHECK5-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG24]] -// CHECK5-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG24]] -// CHECK5-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG24]] -// CHECK5-NEXT: [[LPAD_VAL2:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG24]] -// CHECK5-NEXT: resume { i8*, i32 } [[LPAD_VAL2]], !dbg [[DBG24]] +// CHECK5-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG25]] +// CHECK5-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG25]] +// CHECK5-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG25]] +// CHECK5-NEXT: [[LPAD_VAL2:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG25]] +// CHECK5-NEXT: resume { i8*, i32 } [[LPAD_VAL2]], !dbg [[DBG25]] // // // CHECK5-LABEL: define {{[^@]+}}@.__kmpc_global_dtor_..2 -// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG26:![0-9]+]] { +// CHECK5-SAME: (i8* [[TMP0:%.*]]) #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG27:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG27:![0-9]+]] -// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*, !dbg [[DBG27]] -// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2, !dbg [[DBG27]] -// CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP1:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !dbg [[DBG28:![0-9]+]] +// CHECK5-NEXT: [[ARRAY_BEGIN:%.*]] = bitcast i8* [[TMP1]] to %class.TestClass*, !dbg [[DBG28]] +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_TESTCLASS:%.*]], %class.TestClass* [[ARRAY_BEGIN]], i64 2, !dbg [[DBG28]] +// CHECK5-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG28]] // CHECK5: arraydestroy.body: -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG27]] -// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG27]] -// CHECK5-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG27]] -// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]], !dbg [[DBG27]] -// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG27]] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %class.TestClass* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG28]] +// CHECK5-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[CLASS_TESTCLASS]], %class.TestClass* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG28]] +// CHECK5-NEXT: call void @_ZN9TestClassD1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG28]] +// CHECK5-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %class.TestClass* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]], !dbg [[DBG28]] +// CHECK5-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG28]] // CHECK5: arraydestroy.done1: -// CHECK5-NEXT: ret void, !dbg [[DBG28:![0-9]+]] +// CHECK5-NEXT: ret void, !dbg [[DBG29:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.__omp_threadprivate_init_..3 -// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG29:![0-9]+]] { +// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG30:![0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]), !dbg [[DBG30:![0-9]+]] -// CHECK5-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB3]], i8* bitcast ([2 x %class.TestClass]* @tc2 to i8*), i8* (i8*)* @.__kmpc_global_ctor_..1, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..2), !dbg [[DBG30]] -// CHECK5-NEXT: ret void, !dbg [[DBG30]] +// CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]), !dbg [[DBG31:![0-9]+]] +// CHECK5-NEXT: call void @__kmpc_threadprivate_register(%struct.ident_t* @[[GLOB3]], i8* bitcast ([2 x %class.TestClass]* @tc2 to i8*), i8* (i8*)* @.__kmpc_global_ctor_..1, i8* (i8*, i8*)* null, void (i8*)* @.__kmpc_global_dtor_..2), !dbg [[DBG31]] +// CHECK5-NEXT: ret void, !dbg [[DBG31]] // // // CHECK5-LABEL: define {{[^@]+}}@__cxx_global_var_init -// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG31:![0-9]+]] { +// CHECK5-SAME: () #[[ATTR0]] section "__TEXT,__StaticInit,regular,pure_instructions" !dbg [[DBG32:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: call void @_ZN9TestClassC1Ev(%class.TestClass* nonnull align 4 dereferenceable(4) @tc), !dbg [[DBG33:![0-9]+]] // CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%class.TestClass*)* @_ZN9TestClassD1Ev to void (i8*)*), i8* bitcast (%class.TestClass* @tc to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG35:![0-9]+]] @@ -5238,4 +5238,3 @@ void array_func(int n, int a[n], St s[2]) { // CHECK6-NEXT: store %struct.St* [[TMP19]], %struct.St** [[TMP15]], align 8 // CHECK6-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_parallel_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_debug_codegen.cpp index 81d1b14750c8..0ac82cb0106c 100644 --- a/clang/test/OpenMP/target_parallel_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_debug_codegen.cpp @@ -65,7 +65,7 @@ int main() { return 0; } // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug__ -// CHECK1-SAME: ([10 x [10 x [10 x i32]]] addrspace(1)* noalias [[C:%.*]], i32 [[A:%.*]], [10 x [10 x i32]]* noalias [[B:%.*]], i8 addrspace(1)* noalias [[BB:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG24:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]] addrspace(1)* noalias [[C:%.*]], i32 [[A:%.*]], [10 x [10 x i32]]* noalias [[B:%.*]], i8 addrspace(1)* noalias [[BB:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG23:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]] addrspace(1)*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 @@ -256,7 +256,7 @@ int main() { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 -// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR4:[0-9]+]] !dbg [[DBG98:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5:[0-9]+]] !dbg [[DBG115:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 @@ -466,7 +466,7 @@ int main() { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37 -// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR4]] !dbg [[DBG168:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG192:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 @@ -687,7 +687,7 @@ int main() { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51 -// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR4]] !dbg [[DBG232:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG270:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 diff --git a/clang/test/OpenMP/target_parallel_for_codegen.cpp b/clang/test/OpenMP/target_parallel_for_codegen.cpp index 0fc92b4262ce..585ecf61cf35 100644 --- a/clang/test/OpenMP/target_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_codegen.cpp @@ -965,8 +965,8 @@ int bar(int n){ // CHECK1: .omp.linear.pu: // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 // CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK1-NEXT: store i32 [[TMP22]], i32* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 +// CHECK1-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 8 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: // CHECK1-NEXT: ret void @@ -2603,8 +2603,8 @@ int bar(int n){ // CHECK2: .omp.linear.pu: // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 // CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK2-NEXT: store i32 [[TMP22]], i32* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 +// CHECK2-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 8 // CHECK2-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK2: .omp.linear.pu.done: // CHECK2-NEXT: ret void @@ -4220,8 +4220,8 @@ int bar(int n){ // CHECK3: .omp.linear.pu: // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN2]], align 4 // CHECK3-NEXT: store i32 [[TMP20]], i32* [[LIN_ADDR]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK3-NEXT: store i32 [[TMP22]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK3: .omp.linear.pu.done: // CHECK3-NEXT: ret void @@ -5814,8 +5814,8 @@ int bar(int n){ // CHECK4: .omp.linear.pu: // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN2]], align 4 // CHECK4-NEXT: store i32 [[TMP20]], i32* [[LIN_ADDR]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK4-NEXT: store i32 [[TMP22]], i32* [[A_ADDR]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK4-NEXT: store i32 [[TMP21]], i32* [[A_ADDR]], align 4 // CHECK4-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK4: .omp.linear.pu.done: // CHECK4-NEXT: ret void @@ -7000,8 +7000,8 @@ int bar(int n){ // CHECK9: .omp.linear.pu: // CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 // CHECK9-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK9-NEXT: store i32 [[TMP22]], i32* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 +// CHECK9-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 8 // CHECK9-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK9: .omp.linear.pu.done: // CHECK9-NEXT: ret void @@ -7804,8 +7804,8 @@ int bar(int n){ // CHECK10: .omp.linear.pu: // CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 // CHECK10-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK10-NEXT: store i32 [[TMP22]], i32* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 +// CHECK10-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 8 // CHECK10-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK10: .omp.linear.pu.done: // CHECK10-NEXT: ret void @@ -8602,8 +8602,8 @@ int bar(int n){ // CHECK11: .omp.linear.pu: // CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN2]], align 4 // CHECK11-NEXT: store i32 [[TMP20]], i32* [[LIN_ADDR]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK11-NEXT: store i32 [[TMP22]], i32* [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK11: .omp.linear.pu.done: // CHECK11-NEXT: ret void @@ -9382,8 +9382,8 @@ int bar(int n){ // CHECK12: .omp.linear.pu: // CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN2]], align 4 // CHECK12-NEXT: store i32 [[TMP20]], i32* [[LIN_ADDR]], align 4 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK12-NEXT: store i32 [[TMP22]], i32* [[A_ADDR]], align 4 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK12-NEXT: store i32 [[TMP21]], i32* [[A_ADDR]], align 4 // CHECK12-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK12: .omp.linear.pu.done: // CHECK12-NEXT: ret void @@ -10597,8 +10597,8 @@ int bar(int n){ // CHECK17: .omp.linear.pu: // CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 // CHECK17-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK17-NEXT: store i32 [[TMP22]], i32* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 +// CHECK17-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 8 // CHECK17-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK17: .omp.linear.pu.done: // CHECK17-NEXT: ret void @@ -12235,8 +12235,8 @@ int bar(int n){ // CHECK18: .omp.linear.pu: // CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 // CHECK18-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 -// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK18-NEXT: store i32 [[TMP22]], i32* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 +// CHECK18-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 8 // CHECK18-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK18: .omp.linear.pu.done: // CHECK18-NEXT: ret void @@ -13852,8 +13852,8 @@ int bar(int n){ // CHECK19: .omp.linear.pu: // CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN2]], align 4 // CHECK19-NEXT: store i32 [[TMP20]], i32* [[LIN_ADDR]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK19-NEXT: store i32 [[TMP22]], i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK19-NEXT: store i32 [[TMP21]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK19: .omp.linear.pu.done: // CHECK19-NEXT: ret void @@ -15446,8 +15446,8 @@ int bar(int n){ // CHECK20: .omp.linear.pu: // CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN2]], align 4 // CHECK20-NEXT: store i32 [[TMP20]], i32* [[LIN_ADDR]], align 4 -// CHECK20-NEXT: [[TMP22:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK20-NEXT: store i32 [[TMP22]], i32* [[A_ADDR]], align 4 +// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK20-NEXT: store i32 [[TMP21]], i32* [[A_ADDR]], align 4 // CHECK20-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK20: .omp.linear.pu.done: // CHECK20-NEXT: ret void @@ -16632,8 +16632,8 @@ int bar(int n){ // CHECK25: .omp.linear.pu: // CHECK25-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 // CHECK25-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 -// CHECK25-NEXT: [[TMP22:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK25-NEXT: store i32 [[TMP22]], i32* [[CONV2]], align 8 +// CHECK25-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 +// CHECK25-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 8 // CHECK25-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK25: .omp.linear.pu.done: // CHECK25-NEXT: ret void @@ -17436,8 +17436,8 @@ int bar(int n){ // CHECK26: .omp.linear.pu: // CHECK26-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 // CHECK26-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 -// CHECK26-NEXT: [[TMP22:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK26-NEXT: store i32 [[TMP22]], i32* [[CONV2]], align 8 +// CHECK26-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 +// CHECK26-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 8 // CHECK26-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK26: .omp.linear.pu.done: // CHECK26-NEXT: ret void @@ -18234,8 +18234,8 @@ int bar(int n){ // CHECK27: .omp.linear.pu: // CHECK27-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN2]], align 4 // CHECK27-NEXT: store i32 [[TMP20]], i32* [[LIN_ADDR]], align 4 -// CHECK27-NEXT: [[TMP22:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK27-NEXT: store i32 [[TMP22]], i32* [[A_ADDR]], align 4 +// CHECK27-NEXT: [[TMP21:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK27-NEXT: store i32 [[TMP21]], i32* [[A_ADDR]], align 4 // CHECK27-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK27: .omp.linear.pu.done: // CHECK27-NEXT: ret void @@ -19014,8 +19014,8 @@ int bar(int n){ // CHECK28: .omp.linear.pu: // CHECK28-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN2]], align 4 // CHECK28-NEXT: store i32 [[TMP20]], i32* [[LIN_ADDR]], align 4 -// CHECK28-NEXT: [[TMP22:%.*]] = load i32, i32* [[A3]], align 4 -// CHECK28-NEXT: store i32 [[TMP22]], i32* [[A_ADDR]], align 4 +// CHECK28-NEXT: [[TMP21:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK28-NEXT: store i32 [[TMP21]], i32* [[A_ADDR]], align 4 // CHECK28-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK28: .omp.linear.pu.done: // CHECK28-NEXT: ret void diff --git a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp index 87d1dfdd170f..7f9ba2c9e773 100644 --- a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp @@ -55,7 +55,7 @@ int main() { return 0; } // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__ -// CHECK1-SAME: ([10 x [10 x [10 x i32]]] addrspace(1)* noalias [[C:%.*]], i32 [[A:%.*]], [10 x [10 x i32]]* noalias [[B:%.*]], i8 addrspace(1)* noalias [[BB:%.*]], i1 zeroext [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG14:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]] addrspace(1)* noalias [[C:%.*]], i32 [[A:%.*]], [10 x [10 x i32]]* noalias [[B:%.*]], i8 addrspace(1)* noalias [[BB:%.*]], i1 zeroext [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG13:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]] addrspace(1)*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 @@ -326,7 +326,7 @@ int main() { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13 -// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR4:[0-9]+]] !dbg [[DBG106:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR5:[0-9]+]] !dbg [[DBG123:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 @@ -615,7 +615,7 @@ int main() { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27 -// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR4]] !dbg [[DBG190:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG214:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 @@ -909,7 +909,7 @@ int main() { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41 -// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR4]] !dbg [[DBG269:![0-9]+]] { +// CHECK1-SAME: ([10 x [10 x [10 x i32]]]* nonnull align 4 dereferenceable(4000) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[B:%.*]], i8* nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG307:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x [10 x i32]]]*, align 8 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 diff --git a/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp index 10a62a2c6f17..fcb73beed6ed 100644 --- a/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp @@ -2415,4 +2415,3 @@ int main (int argc, char **argv) { // CHECK12-NEXT: call void @__tgt_register_requires(i64 1) // CHECK12-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp index 778dd0a0c3b0..106500b00271 100644 --- a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp @@ -3780,4 +3780,3 @@ int main() { // CHECK10-NEXT: call void @__tgt_register_requires(i64 1) // CHECK10-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp index 20008403c7c1..a1087d5a9843 100644 --- a/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp @@ -3615,4 +3615,3 @@ int main() { // CHECK12-NEXT: call void @__tgt_register_requires(i64 1) // CHECK12-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp index 7ed17efc5ec4..f2b302b1f9f1 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp @@ -3524,4 +3524,3 @@ int main (int argc, char **argv) { // CHECK12-NEXT: call void @__tgt_register_requires(i64 1) // CHECK12-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp index c5517aacd570..48bdd180c93d 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp @@ -8423,4 +8423,3 @@ int main (int argc, char **argv) { // CHECK12-NEXT: call void @__tgt_register_requires(i64 1) // CHECK12-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp index 3bac42768739..1eb2cdd9018e 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp @@ -8212,4 +8212,3 @@ int main() { // CHECK17-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) // CHECK17-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp index 7a368c75c244..470a213961f6 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp @@ -5370,4 +5370,3 @@ int main() { // CHECK8-NEXT: call void @__tgt_register_requires(i64 1) // CHECK8-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp index 7b289aa7a04a..ab2d571b7d60 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp @@ -367,7 +367,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -496,7 +496,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -610,28 +610,28 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !11 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]) +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]), !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -727,30 +727,30 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8, !llvm.access.group !15 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 0 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1078,7 +1078,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1207,7 +1207,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1321,28 +1321,28 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !11 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]) +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]), !llvm.access.group !11 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1438,30 +1438,30 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8, !llvm.access.group !15 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 0 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !15 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1772,7 +1772,7 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1896,7 +1896,7 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2007,25 +2007,25 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]), !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2118,29 +2118,29 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4, !llvm.access.group !16 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2451,7 +2451,7 @@ int target_teams_fun(int *g){ // CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2575,7 +2575,7 @@ int target_teams_fun(int *g){ // CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2686,25 +2686,25 @@ int target_teams_fun(int *g){ // CHECK4-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]) +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]), !llvm.access.group !12 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2797,29 +2797,29 @@ int target_teams_fun(int *g){ // CHECK4-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK4-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4 +// CHECK4-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4, !llvm.access.group !16 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3581,7 +3581,7 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3710,7 +3710,7 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3824,28 +3824,28 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 // CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !12 // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]) +// CHECK9-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8, !llvm.access.group !12 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]), !llvm.access.group !12 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3941,30 +3941,30 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !16 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8, !llvm.access.group !16 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 0 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !16 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !16 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4115,7 +4115,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -4244,7 +4244,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK10-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -4358,28 +4358,28 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 // CHECK10-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !12 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]) +// CHECK10-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4, !llvm.access.group !12 +// CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !12 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32*, i32** [[G_ADDR]], align 8, !llvm.access.group !12 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], i64 [[TMP19]], [1000 x i32]* [[TMP0]], i32* [[TMP20]]), !llvm.access.group !12 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -4475,30 +4475,30 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !16 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 8, !llvm.access.group !16 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i64 0 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !16 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4 +// CHECK10-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !16 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -4637,7 +4637,7 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -4761,7 +4761,7 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -4872,25 +4872,25 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]), !llvm.access.group !13 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -4983,29 +4983,29 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !17 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4, !llvm.access.group !17 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !17 // CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4, !llvm.access.group !17 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -5144,7 +5144,7 @@ int target_teams_fun(int *g){ // CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -5268,7 +5268,7 @@ int target_teams_fun(int *g){ // CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 // CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -5379,25 +5379,25 @@ int target_teams_fun(int *g){ // CHECK12-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]) +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32*, i32** [[G_ADDR]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], i32 [[TMP17]], [1000 x i32]* [[TMP0]], i32* [[TMP18]]), !llvm.access.group !13 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -5490,29 +5490,29 @@ int target_teams_fun(int *g){ // CHECK12-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !17 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32*, i32** [[G_ADDR]], align 4, !llvm.access.group !17 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 0 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !17 // CHECK12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK12-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4 +// CHECK12-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX6]], align 4, !llvm.access.group !17 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp index 79b93d237433..b7b088652fdf 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp @@ -195,23 +195,23 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -278,40 +278,40 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 456 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] // CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[J]], align 4 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[J]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -430,23 +430,23 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -513,40 +513,40 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK2-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 456 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] // CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[J]], align 4 +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[J]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 // CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4, !llvm.access.group !8 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -665,21 +665,21 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -744,38 +744,38 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[J]], align 4 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[J]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -894,21 +894,21 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !5 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -973,38 +973,38 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 456 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 456 // CHECK4-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 456 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK4-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK4-NEXT: store i32 [[ADD6]], i32* [[J]], align 4 +// CHECK4-NEXT: store i32 [[ADD6]], i32* [[J]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x [456 x i32]], [123 x [456 x i32]]* [[A]], i32 0, i32 [[TMP13]] -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [456 x i32], [456 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !9 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -1561,29 +1561,29 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 // CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] // CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !5 // CHECK9-NEXT: [[CONV17:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP21]], i32* [[CONV17]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: store i32 [[TMP21]], i32* [[CONV17]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV3]], align 8, !llvm.access.group !5 // CHECK9-NEXT: [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV18]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[M_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP19]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV18]], align 4, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[M_CASTED]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP19]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !5 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !llvm.access.group !5 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] -// CHECK9-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK9-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -1707,13 +1707,13 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !9 // CHECK9-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0 // CHECK9-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 // CHECK9-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] @@ -1722,16 +1722,16 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] // CHECK9-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK9-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK9-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0 // CHECK9-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 // CHECK9-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] // CHECK9-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 // CHECK9-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0 // CHECK9-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 // CHECK9-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] @@ -1741,23 +1741,23 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 // CHECK9-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] // CHECK9-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK9-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I13]], align 4 +// CHECK9-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I13]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 // CHECK9-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]] // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[J14]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[J14]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP29]] to i64 // CHECK9-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM38]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX39]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX39]], align 4, !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 // CHECK9-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK9-NEXT: store i64 [[ADD40]], i64* [[DOTOMP_IV]], align 8 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK9-NEXT: store i64 [[ADD40]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -1870,23 +1870,23 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x [2 x i32]]* [[TMP0]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x [2 x i32]]* [[TMP0]]), !llvm.access.group !14 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -1953,39 +1953,39 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK9-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 2 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] // CHECK9-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] -// CHECK9-NEXT: store i32 [[ADD7]], i32* [[J]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[J]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 // CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4, !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -2262,29 +2262,29 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK10-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK10-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK10-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 // CHECK10-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] // CHECK10-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !5 +// CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !5 // CHECK10-NEXT: [[CONV17:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP21]], i32* [[CONV17]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: store i32 [[TMP21]], i32* [[CONV17]], align 4, !llvm.access.group !5 +// CHECK10-NEXT: [[TMP22:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !5 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV3]], align 8, !llvm.access.group !5 // CHECK10-NEXT: [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP23]], i32* [[CONV18]], align 4 -// CHECK10-NEXT: [[TMP24:%.*]] = load i64, i64* [[M_CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP19]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK10-NEXT: store i32 [[TMP23]], i32* [[CONV18]], align 4, !llvm.access.group !5 +// CHECK10-NEXT: [[TMP24:%.*]] = load i64, i64* [[M_CASTED]], align 8, !llvm.access.group !5 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP19]], i64 [[TMP20]], i64 [[TMP22]], i64 [[TMP24]], i64 [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !5 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK10-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK10-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK10-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !llvm.access.group !5 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] -// CHECK10-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK10-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -2408,13 +2408,13 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !9 // CHECK10-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] // CHECK10-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0 // CHECK10-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 // CHECK10-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] @@ -2423,16 +2423,16 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] // CHECK10-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK10-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK10-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK10-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0 // CHECK10-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 // CHECK10-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] // CHECK10-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 // CHECK10-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]] -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_5]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0 // CHECK10-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 // CHECK10-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] @@ -2442,23 +2442,23 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 // CHECK10-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] // CHECK10-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK10-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I13]], align 4 +// CHECK10-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I13]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP27]] to i64 // CHECK10-NEXT: [[TMP28:%.*]] = mul nsw i64 [[IDXPROM]], [[TMP1]] // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[TMP28]] -// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[J14]], align 4 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[J14]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[IDXPROM38:%.*]] = sext i32 [[TMP29]] to i64 // CHECK10-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i64 [[IDXPROM38]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX39]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX39]], align 4, !llvm.access.group !9 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK10-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 // CHECK10-NEXT: [[ADD40:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK10-NEXT: store i64 [[ADD40]], i64* [[DOTOMP_IV]], align 8 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK10-NEXT: store i64 [[ADD40]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !9 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -2571,23 +2571,23 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x [2 x i32]]* [[TMP0]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x [2 x i32]]* [[TMP0]]), !llvm.access.group !14 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -2654,39 +2654,39 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK10-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[DIV4:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK10-NEXT: [[MUL5:%.*]] = mul nsw i32 [[DIV4]], 2 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL5]] // CHECK10-NEXT: [[MUL6:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 0, [[MUL6]] -// CHECK10-NEXT: store i32 [[ADD7]], i32* [[J]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD7]], i32* [[J]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP14]] to i64 // CHECK10-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM8]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX9]], align 4, !llvm.access.group !17 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK10-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -2954,29 +2954,29 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6 // CHECK11-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !6 // CHECK11-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6 // CHECK11-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP23]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[M_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP25]], i32* [[M_CASTED]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[M_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP20]], i32 [[TMP22]], i32 [[TMP24]], i32 [[TMP26]], i32 [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: store i32 [[TMP23]], i32* [[N_CASTED]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[M_ADDR]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: store i32 [[TMP25]], i32* [[M_CASTED]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[M_CASTED]], align 4, !llvm.access.group !6 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP20]], i32 [[TMP22]], i32 [[TMP24]], i32 [[TMP26]], i32 [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !6 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK11-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !llvm.access.group !6 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP27]], [[TMP28]] -// CHECK11-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK11-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -3100,13 +3100,13 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !10 // CHECK11-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0 // CHECK11-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 // CHECK11-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] @@ -3115,16 +3115,16 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] // CHECK11-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK11-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0 // CHECK11-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 // CHECK11-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] // CHECK11-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 // CHECK11-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0 // CHECK11-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 // CHECK11-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] @@ -3134,21 +3134,21 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 // CHECK11-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] // CHECK11-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK11-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I13]], align 4 +// CHECK11-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I13]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[TMP28:%.*]] = mul nsw i32 [[TMP27]], [[TMP1]] // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[J14]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[J14]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i32 [[TMP29]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX38]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX38]], align 4, !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK11-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 // CHECK11-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK11-NEXT: store i64 [[ADD39]], i64* [[DOTOMP_IV]], align 8 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK11-NEXT: store i64 [[ADD39]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -3261,21 +3261,21 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x [2 x i32]]* [[TMP0]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x [2 x i32]]* [[TMP0]]), !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -3340,37 +3340,37 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK11-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK11-NEXT: store i32 [[ADD6]], i32* [[J]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[J]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -3638,29 +3638,29 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK12-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK12-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK12-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6 // CHECK12-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] // CHECK12-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK12-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !6 // CHECK12-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -// CHECK12-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK12-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !6 // CHECK12-NEXT: [[TMP22:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP23]], i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[M_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP25]], i32* [[M_CASTED]], align 4 -// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[M_CASTED]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP20]], i32 [[TMP22]], i32 [[TMP24]], i32 [[TMP26]], i32 [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !6 +// CHECK12-NEXT: store i32 [[TMP23]], i32* [[N_CASTED]], align 4, !llvm.access.group !6 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !6 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[M_ADDR]], align 4, !llvm.access.group !6 +// CHECK12-NEXT: store i32 [[TMP25]], i32* [[M_CASTED]], align 4, !llvm.access.group !6 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[M_CASTED]], align 4, !llvm.access.group !6 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP20]], i32 [[TMP22]], i32 [[TMP24]], i32 [[TMP26]], i32 [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !6 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK12-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK12-NEXT: [[TMP27:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK12-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !llvm.access.group !6 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP27]], [[TMP28]] -// CHECK12-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK12-NEXT: store i64 [[ADD]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !6 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -3784,13 +3784,13 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i64 [[TMP18]], i64* [[DOTOMP_IV]], align 8 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK12-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK12-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !10 // CHECK12-NEXT: [[CMP16:%.*]] = icmp sle i64 [[TMP19]], [[TMP20]] // CHECK12-NEXT: br i1 [[CMP16]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK12-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[SUB17:%.*]] = sub nsw i32 [[TMP22]], 0 // CHECK12-NEXT: [[DIV18:%.*]] = sdiv i32 [[SUB17]], 1 // CHECK12-NEXT: [[MUL19:%.*]] = mul nsw i32 1, [[DIV18]] @@ -3799,16 +3799,16 @@ int main (int argc, char **argv) { // CHECK12-NEXT: [[MUL22:%.*]] = mul nsw i64 [[DIV21]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL22]] // CHECK12-NEXT: [[CONV23:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK12-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK12-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK12-NEXT: store i32 [[CONV23]], i32* [[I13]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[SUB24:%.*]] = sub nsw i32 [[TMP25]], 0 // CHECK12-NEXT: [[DIV25:%.*]] = sdiv i32 [[SUB24]], 1 // CHECK12-NEXT: [[MUL26:%.*]] = mul nsw i32 1, [[DIV25]] // CHECK12-NEXT: [[CONV27:%.*]] = sext i32 [[MUL26]] to i64 // CHECK12-NEXT: [[DIV28:%.*]] = sdiv i64 [[TMP24]], [[CONV27]] -// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[SUB29:%.*]] = sub nsw i32 [[TMP26]], 0 // CHECK12-NEXT: [[DIV30:%.*]] = sdiv i32 [[SUB29]], 1 // CHECK12-NEXT: [[MUL31:%.*]] = mul nsw i32 1, [[DIV30]] @@ -3818,21 +3818,21 @@ int main (int argc, char **argv) { // CHECK12-NEXT: [[MUL35:%.*]] = mul nsw i64 [[SUB34]], 1 // CHECK12-NEXT: [[ADD36:%.*]] = add nsw i64 0, [[MUL35]] // CHECK12-NEXT: [[CONV37:%.*]] = trunc i64 [[ADD36]] to i32 -// CHECK12-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I13]], align 4 +// CHECK12-NEXT: store i32 [[CONV37]], i32* [[J14]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I13]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[TMP28:%.*]] = mul nsw i32 [[TMP27]], [[TMP1]] // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP28]] -// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[J14]], align 4 +// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[J14]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i32, i32* [[ARRAYIDX]], i32 [[TMP29]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX38]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX38]], align 4, !llvm.access.group !10 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK12-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 // CHECK12-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP30]], 1 -// CHECK12-NEXT: store i64 [[ADD39]], i64* [[DOTOMP_IV]], align 8 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK12-NEXT: store i64 [[ADD39]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -3945,21 +3945,21 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x [2 x i32]]* [[TMP0]]) +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x [2 x i32]]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x [2 x i32]]* [[TMP0]]), !llvm.access.group !15 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -4024,37 +4024,37 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 2 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK12-NEXT: [[DIV3:%.*]] = sdiv i32 [[TMP12]], 2 // CHECK12-NEXT: [[MUL4:%.*]] = mul nsw i32 [[DIV3]], 2 // CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL4]] // CHECK12-NEXT: [[MUL5:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 0, [[MUL5]] -// CHECK12-NEXT: store i32 [[ADD6]], i32* [[J]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD6]], i32* [[J]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [2 x i32]], [10 x [2 x i32]]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !18 // CHECK12-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP14]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !18 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp index 60737285e51b..73d2762a6c12 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp @@ -272,23 +272,23 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !6 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -352,28 +352,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -437,23 +437,23 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -517,28 +517,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -602,43 +602,43 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], %struct.SS* [[TMP0]]), !llvm.access.group !21 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], 122 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: // CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP18]], [[COND_FALSE6]] ] -// CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -702,28 +702,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -883,23 +883,23 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !6 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -963,28 +963,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1048,23 +1048,23 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !15 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1128,28 +1128,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1213,43 +1213,43 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], %struct.SS* [[TMP0]]), !llvm.access.group !21 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], 122 // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK2: cond.true5: // CHECK2-NEXT: br label [[COND_END7:%.*]] // CHECK2: cond.false6: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK2-NEXT: br label [[COND_END7]] // CHECK2: cond.end7: // CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP18]], [[COND_FALSE6]] ] -// CHECK2-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK2-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1313,28 +1313,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1494,21 +1494,21 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !7 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1570,27 +1570,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1654,21 +1654,21 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !16 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1730,27 +1730,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1814,41 +1814,41 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]], %struct.SS* [[TMP0]]), !llvm.access.group !22 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], 122 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK3: cond.true5: // CHECK3-NEXT: br label [[COND_END7:%.*]] // CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK3-NEXT: br label [[COND_END7]] // CHECK3: cond.end7: // CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP16]], [[COND_FALSE6]] ] -// CHECK3-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK3-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1910,27 +1910,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2090,21 +2090,21 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !7 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2166,27 +2166,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2250,21 +2250,21 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !16 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2326,27 +2326,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2410,41 +2410,41 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]], %struct.SS* [[TMP0]]), !llvm.access.group !22 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], 122 // CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK4: cond.true5: // CHECK4-NEXT: br label [[COND_END7:%.*]] // CHECK4: cond.false6: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK4-NEXT: br label [[COND_END7]] // CHECK4: cond.end7: // CHECK4-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP16]], [[COND_FALSE6]] ] -// CHECK4-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK4-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2506,27 +2506,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3367,27 +3367,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3484,27 +3484,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3613,27 +3613,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !18 // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK9-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !18 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !18 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3730,27 +3730,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3872,55 +3872,55 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK9-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !24 // CHECK9-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !24 // CHECK9-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]) +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]), !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] // CHECK9-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] // CHECK9: cond.true14: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !24 // CHECK9-NEXT: br label [[COND_END16:%.*]] // CHECK9: cond.false15: -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: br label [[COND_END16]] // CHECK9: cond.end16: // CHECK9-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE14]] ], [ [[TMP33]], [[COND_FALSE15]] ] -// CHECK9-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP34]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK9-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: store i32 [[TMP34]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4020,27 +4020,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I7]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4206,23 +4206,23 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !30 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4286,27 +4286,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4370,23 +4370,23 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !36 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4450,27 +4450,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !39 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4547,47 +4547,47 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !42 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP20]], 9 // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK9: cond.true6: // CHECK9-NEXT: br label [[COND_END8:%.*]] // CHECK9: cond.false7: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: br label [[COND_END8]] // CHECK9: cond.end8: // CHECK9-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP21]], [[COND_FALSE7]] ] -// CHECK9-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK9-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4654,27 +4654,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -5021,27 +5021,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK10-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !9 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !9 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5138,27 +5138,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5267,27 +5267,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !18 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK10-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !18 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !18 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5384,27 +5384,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5526,55 +5526,55 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK10-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] // CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !24 // CHECK10-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !24 // CHECK10-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]) +// CHECK10-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !24 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]), !llvm.access.group !24 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK10-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK10-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK10-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK10-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] // CHECK10-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] // CHECK10: cond.true14: -// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !24 // CHECK10-NEXT: br label [[COND_END16:%.*]] // CHECK10: cond.false15: -// CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: br label [[COND_END16]] // CHECK10: cond.end16: // CHECK10-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE14]] ], [ [[TMP33]], [[COND_FALSE15]] ] -// CHECK10-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP34]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK10-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: store i32 [[TMP34]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5674,27 +5674,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK10-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[I7]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK10-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5860,23 +5860,23 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !30 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5940,27 +5940,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -6024,23 +6024,23 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !36 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -6104,27 +6104,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !39 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -6201,47 +6201,47 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !42 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK10-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !42 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !42 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP20]], 9 // CHECK10-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK10: cond.true6: // CHECK10-NEXT: br label [[COND_END8:%.*]] // CHECK10: cond.false7: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: br label [[COND_END8]] // CHECK10: cond.end8: // CHECK10-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP21]], [[COND_FALSE7]] ] -// CHECK10-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK10-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -6308,27 +6308,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK10-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -6670,24 +6670,24 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -6781,26 +6781,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -6906,24 +6906,24 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7017,26 +7017,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7152,51 +7152,51 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]), !llvm.access.group !25 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] // CHECK11-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK11: cond.true11: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK11-NEXT: br label [[COND_END13:%.*]] // CHECK11: cond.false12: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK11-NEXT: br label [[COND_END13]] // CHECK11: cond.end13: // CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] -// CHECK11-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK11-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7292,26 +7292,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7476,21 +7476,21 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !31 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7552,26 +7552,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !34 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7635,21 +7635,21 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !37 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !37 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7711,26 +7711,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !40 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !40 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !40 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7804,44 +7804,44 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !43 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9 // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK11: cond.true5: // CHECK11-NEXT: br label [[COND_END7:%.*]] // CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 // CHECK11-NEXT: br label [[COND_END7]] // CHECK11: cond.end7: // CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ] -// CHECK11-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK11-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7905,26 +7905,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !46 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -8266,24 +8266,24 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !10 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -8377,26 +8377,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -8502,24 +8502,24 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !19 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -8613,26 +8613,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -8748,51 +8748,51 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK12-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] // CHECK12-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]) +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]), !llvm.access.group !25 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] // CHECK12-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK12: cond.true11: -// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK12-NEXT: br label [[COND_END13:%.*]] // CHECK12: cond.false12: -// CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK12-NEXT: br label [[COND_END13]] // CHECK12: cond.end13: // CHECK12-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] -// CHECK12-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK12-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -8888,26 +8888,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK12-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -9072,21 +9072,21 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !31 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -9148,26 +9148,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !34 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -9231,21 +9231,21 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !37 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !37 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !37 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -9307,26 +9307,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !40 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !40 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !40 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -9400,44 +9400,44 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !43 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9 // CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK12: cond.true5: // CHECK12-NEXT: br label [[COND_END7:%.*]] // CHECK12: cond.false6: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 // CHECK12-NEXT: br label [[COND_END7]] // CHECK12: cond.end7: // CHECK12-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ] -// CHECK12-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK12-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -9501,26 +9501,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !46 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !46 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp index 501b5b289eb2..f1f72eee7f86 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -496,33 +496,33 @@ int main() { // CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !5 // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !5 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]) +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]), !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -671,40 +671,40 @@ int main() { // CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] // CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* // CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !9 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !9 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -994,30 +994,30 @@ int main() { // CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !14 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]) +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]), !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1148,37 +1148,37 @@ int main() { // CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP21]] to i64 // CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM11]] // CHECK1-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX12]] to i8* // CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false), !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1586,33 +1586,33 @@ int main() { // CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !5 // CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !5 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]) +// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]), !llvm.access.group !5 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1761,40 +1761,40 @@ int main() { // CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 // CHECK2-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] // CHECK2-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* // CHECK2-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !9 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !9 // CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8, !llvm.access.group !9 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2084,30 +2084,30 @@ int main() { // CHECK2-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !14 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]) +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]), !llvm.access.group !14 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2238,37 +2238,37 @@ int main() { // CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP21]] to i64 // CHECK2-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM11]] // CHECK2-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX12]] to i8* // CHECK2-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false) +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false), !llvm.access.group !17 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2668,29 +2668,29 @@ int main() { // CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]), !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2835,38 +2835,38 @@ int main() { // CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP18]] -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP19]] // CHECK3-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK3-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group !10 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !10 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3152,27 +3152,27 @@ int main() { // CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]), !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3300,35 +3300,35 @@ int main() { // CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP21]] // CHECK3-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* // CHECK3-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false) +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false), !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3728,29 +3728,29 @@ int main() { // CHECK4-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]) +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]), !llvm.access.group !6 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3895,38 +3895,38 @@ int main() { // CHECK4-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP18]] -// CHECK4-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP19]] // CHECK4-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK4-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group !10 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !10 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4212,27 +4212,27 @@ int main() { // CHECK4-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]) +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]), !llvm.access.group !15 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4360,35 +4360,35 @@ int main() { // CHECK4-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP19]] -// CHECK4-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP21]] // CHECK4-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* // CHECK4-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false) +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false), !llvm.access.group !18 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK4-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4705,36 +4705,36 @@ int main() { // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !4 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4 +// CHECK5-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4, !llvm.access.group !4 // CHECK5-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK5-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8, !llvm.access.group !4 // CHECK5-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]) +// CHECK5-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]), !llvm.access.group !4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4807,35 +4807,35 @@ int main() { // CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: store i32 1, i32* [[CONV]], align 8 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK5-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK5-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: store i32 1, i32* [[CONV]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: store i32 2, i32* [[CONV2]], align 8, !llvm.access.group !8 // CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK5-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8 +// CHECK5-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8, !llvm.access.group !8 // CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK5-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8, !llvm.access.group !8 // CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK5-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8 -// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK5-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !8 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -5073,36 +5073,36 @@ int main() { // CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK6-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK6-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !4 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK6-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK6-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4 +// CHECK6-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4, !llvm.access.group !4 // CHECK6-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK6-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK6-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8, !llvm.access.group !4 // CHECK6-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK6-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4 -// CHECK6-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]) +// CHECK6-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]), !llvm.access.group !4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5175,35 +5175,35 @@ int main() { // CHECK6-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK6-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: store i32 1, i32* [[CONV]], align 8 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK6-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK6-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: store i32 1, i32* [[CONV]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: store i32 2, i32* [[CONV2]], align 8, !llvm.access.group !8 // CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK6-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8 +// CHECK6-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8, !llvm.access.group !8 // CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK6-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8, !llvm.access.group !8 // CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK6-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8 -// CHECK6-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK6-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !8 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -6910,33 +6910,33 @@ int main() { // CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !6 // CHECK13-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !6 // CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]) +// CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !6 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]), !llvm.access.group !6 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -7085,40 +7085,40 @@ int main() { // CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 // CHECK13-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] // CHECK13-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* // CHECK13-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !10 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !10 // CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK13-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK13-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8, !llvm.access.group !10 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK13-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -7264,30 +7264,30 @@ int main() { // CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !15 // CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]) +// CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !15 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]), !llvm.access.group !15 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -7418,37 +7418,37 @@ int main() { // CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK13: omp.inner.for.cond.cleanup: // CHECK13-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP21]] to i64 // CHECK13-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM11]] // CHECK13-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX12]] to i8* // CHECK13-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false) +// CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false), !llvm.access.group !18 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK13-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -7677,33 +7677,33 @@ int main() { // CHECK14-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK14: omp.inner.for.cond.cleanup: // CHECK14-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !6 // CHECK14-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 -// CHECK14-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !6 // CHECK14-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 -// CHECK14-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]) +// CHECK14-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !6 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]), !llvm.access.group !6 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -7852,40 +7852,40 @@ int main() { // CHECK14-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK14-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK14: omp.inner.for.cond.cleanup: // CHECK14-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK14-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 // CHECK14-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] // CHECK14-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* // CHECK14-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK14-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !10 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !10 // CHECK14-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK14-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK14-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8, !llvm.access.group !10 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK14-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -8031,30 +8031,30 @@ int main() { // CHECK14-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK14: omp.inner.for.cond.cleanup: // CHECK14-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !15 // CHECK14-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 -// CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK14-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]) +// CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4, !llvm.access.group !15 +// CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !15 +// CHECK14-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !15 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]), !llvm.access.group !15 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -8185,37 +8185,37 @@ int main() { // CHECK14-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK14-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK14: omp.inner.for.cond.cleanup: // CHECK14-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !18 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK14-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK14-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK14-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8, !llvm.access.group !18 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP21]] to i64 // CHECK14-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM11]] // CHECK14-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX12]] to i8* // CHECK14-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK14-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false) +// CHECK14-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false), !llvm.access.group !18 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK14-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -8438,29 +8438,29 @@ int main() { // CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 // CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]) +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]), !llvm.access.group !7 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !7 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -8605,38 +8605,38 @@ int main() { // CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP18]] -// CHECK15-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP19]] // CHECK15-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK15-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group !11 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK15-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !11 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -8779,27 +8779,27 @@ int main() { // CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 // CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]) +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]), !llvm.access.group !16 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !16 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -8927,35 +8927,35 @@ int main() { // CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK15: omp.inner.for.cond.cleanup: // CHECK15-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP19]] -// CHECK15-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP21]] // CHECK15-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* // CHECK15-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false) +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false), !llvm.access.group !19 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK15-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -9178,29 +9178,29 @@ int main() { // CHECK16-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 // CHECK16-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK16-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK16: omp.inner.for.cond.cleanup: // CHECK16-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4 -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4 -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]) +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]), !llvm.access.group !7 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !7 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -9345,38 +9345,38 @@ int main() { // CHECK16-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK16-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK16: omp.inner.for.cond.cleanup: // CHECK16-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP18]] -// CHECK16-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK16-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP19]] // CHECK16-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK16-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK16-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) -// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK16-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group !11 +// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK16-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !11 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK16-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -9519,27 +9519,27 @@ int main() { // CHECK16-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 // CHECK16-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK16-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK16: omp.inner.for.cond.cleanup: // CHECK16-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK16-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]) +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]), !llvm.access.group !16 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !16 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -9667,35 +9667,35 @@ int main() { // CHECK16-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK16-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK16: omp.inner.for.cond.cleanup: // CHECK16-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP19]] -// CHECK16-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK16-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK16-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP21]] // CHECK16-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* // CHECK16-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK16-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false) +// CHECK16-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false), !llvm.access.group !19 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK16-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -9896,36 +9896,36 @@ int main() { // CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK17-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !5 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4, !llvm.access.group !5 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK17-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8, !llvm.access.group !5 // CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]) +// CHECK17-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]), !llvm.access.group !5 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -9998,35 +9998,35 @@ int main() { // CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: store i32 1, i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK17-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK17-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: store i32 1, i32* [[CONV]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: store i32 2, i32* [[CONV2]], align 8, !llvm.access.group !9 // CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0 -// CHECK17-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8 +// CHECK17-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8, !llvm.access.group !9 // CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK17-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8, !llvm.access.group !9 // CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 2 -// CHECK17-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8 -// CHECK17-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon* nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR3:[0-9]+]] +// CHECK17-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon* nonnull align 8 dereferenceable(24) [[REF_TMP]]) #[[ATTR3:[0-9]+]], !llvm.access.group !9 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp index f21b6fa5ae21..ca9521864102 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp @@ -205,26 +205,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -289,23 +289,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: store i32 0, i32* [[CONV]], align 8, !llvm.access.group !13 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -365,26 +365,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !18 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !18 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -446,23 +446,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z9gtid_testv() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !21 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -587,22 +587,22 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -664,23 +664,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn4v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: call void @_Z3fn4v(), !llvm.access.group !27 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -740,26 +740,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !30 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !30 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -821,23 +821,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn5v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: call void @_Z3fn5v(), !llvm.access.group !33 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -910,35 +910,35 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !36 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !36 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1000,23 +1000,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn6v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: call void @_Z3fn6v(), !llvm.access.group !39 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1130,22 +1130,22 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !42 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1207,23 +1207,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn1v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: call void @_Z3fn1v(), !llvm.access.group !45 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1283,26 +1283,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !48 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !48 +// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !48 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1364,23 +1364,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn2v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51 +// CHECK1-NEXT: call void @_Z3fn2v(), !llvm.access.group !51 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1453,35 +1453,35 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !54 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !54 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 +// CHECK1-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1543,23 +1543,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn3v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK1-NEXT: call void @_Z3fn3v(), !llvm.access.group !57 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1679,26 +1679,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK2-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !9 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1763,23 +1763,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: store i32 0, i32* [[CONV]], align 8, !llvm.access.group !13 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1839,26 +1839,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !18 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !18 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1920,23 +1920,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z9gtid_testv() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !21 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2061,22 +2061,22 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !24 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2138,23 +2138,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn4v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK2-NEXT: call void @_Z3fn4v(), !llvm.access.group !27 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2214,26 +2214,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !30 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !30 +// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !30 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2295,23 +2295,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn5v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK2-NEXT: call void @_Z3fn5v(), !llvm.access.group !33 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2384,35 +2384,35 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !36 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !36 // CHECK2-NEXT: br label [[OMP_IF_END:%.*]] // CHECK2: omp_if.else: -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !36 +// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2474,23 +2474,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn6v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK2-NEXT: call void @_Z3fn6v(), !llvm.access.group !39 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2604,22 +2604,22 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !42 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2681,23 +2681,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn1v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK2-NEXT: call void @_Z3fn1v(), !llvm.access.group !45 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2757,26 +2757,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !48 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !48 +// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !48 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2838,23 +2838,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn2v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51 +// CHECK2-NEXT: call void @_Z3fn2v(), !llvm.access.group !51 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2927,35 +2927,35 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !54 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !54 // CHECK2-NEXT: br label [[OMP_IF_END:%.*]] // CHECK2: omp_if.else: -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 +// CHECK2-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3017,23 +3017,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn3v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK2-NEXT: call void @_Z3fn3v(), !llvm.access.group !57 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3153,26 +3153,26 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !10 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !10, !llvm.access.group !9 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3237,23 +3237,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !10 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !10, !llvm.access.group !14 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3313,26 +3313,26 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !19 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !19 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3394,23 +3394,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z9gtid_testv() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !22 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3535,22 +3535,22 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !25 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3612,23 +3612,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn4v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: call void @_Z3fn4v(), !llvm.access.group !28 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3865,41 +3865,41 @@ int main() { // CHECK3: omp_if.then: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !34 // CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !34 // CHECK3-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then5: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !34 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !34 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END22:%.*]] @@ -4011,23 +4011,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn6v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !38 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] @@ -4139,23 +4139,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn6v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !42 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] @@ -4312,22 +4312,22 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !46 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4389,23 +4389,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn1v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 +// CHECK3-NEXT: call void @_Z3fn1v(), !llvm.access.group !49 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4635,35 +4635,35 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !54 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !54 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 +// CHECK3-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4725,23 +4725,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn3v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK3-NEXT: call void @_Z3fn3v(), !llvm.access.group !57 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4861,26 +4861,26 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !10 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !10, !llvm.access.group !9 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK4-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK4-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !9 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4945,23 +4945,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !10 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 +// CHECK4-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !10, !llvm.access.group !14 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5021,26 +5021,26 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !19 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !19 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5102,23 +5102,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z9gtid_testv() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !22 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5243,22 +5243,22 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !25 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5320,23 +5320,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn4v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 +// CHECK4-NEXT: call void @_Z3fn4v(), !llvm.access.group !28 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5573,41 +5573,41 @@ int main() { // CHECK4: omp_if.then: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 // CHECK4-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK4-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK4-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !34 // CHECK4-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK4-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK4-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !34 +// CHECK4-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !34 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !34 // CHECK4-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then5: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !34 // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !34 +// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !34 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END22:%.*]] @@ -5719,23 +5719,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn6v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 +// CHECK4-NEXT: call void @_Z3fn6v(), !llvm.access.group !38 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] @@ -5847,23 +5847,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn6v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: call void @_Z3fn6v(), !llvm.access.group !42 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] @@ -6020,22 +6020,22 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !46 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6097,23 +6097,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn1v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 +// CHECK4-NEXT: call void @_Z3fn1v(), !llvm.access.group !49 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6343,35 +6343,35 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !54 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !54 // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK4-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 +// CHECK4-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6433,23 +6433,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn3v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK4-NEXT: call void @_Z3fn3v(), !llvm.access.group !57 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6491,23 +6491,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: store i32 0, i32* @Arg, align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 0, i32* @Arg, align 4, !llvm.access.group !2 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6517,23 +6517,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK5-NEXT: call void @_Z9gtid_testv() +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6567,23 +6567,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: call void @_Z3fn4v() +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6593,23 +6593,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK5-NEXT: call void @_Z3fn5v() +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: call void @_Z3fn5v(), !llvm.access.group !12 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6623,23 +6623,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK5: omp.inner.for.cond21: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 // CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK5: omp.inner.for.body23: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK5-NEXT: call void @_Z3fn6v() +// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK5: omp.body.continue26: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK5: omp.inner.for.inc27: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK5: omp.inner.for.end29: // CHECK5-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6675,23 +6675,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: call void @_Z3fn1v() +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6701,23 +6701,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK5-NEXT: call void @_Z3fn2v() +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: call void @_Z3fn2v(), !llvm.access.group !21 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6731,23 +6731,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK5: omp.inner.for.cond21: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK5: omp.inner.for.body23: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK5-NEXT: call void @_Z3fn3v() +// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: call void @_Z3fn3v(), !llvm.access.group !24 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK5: omp.body.continue26: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK5: omp.inner.for.inc27: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK5: omp.inner.for.end29: // CHECK5-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6773,23 +6773,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: store i32 0, i32* @Arg, align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 +// CHECK6-NEXT: store i32 0, i32* @Arg, align 4, !llvm.access.group !2 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6799,23 +6799,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK6-NEXT: call void @_Z9gtid_testv() +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6849,23 +6849,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: call void @_Z3fn4v() +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6875,23 +6875,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK6-NEXT: call void @_Z3fn5v() +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: call void @_Z3fn5v(), !llvm.access.group !12 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6905,23 +6905,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK6: omp.inner.for.cond21: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 // CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK6: omp.inner.for.body23: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK6-NEXT: call void @_Z3fn6v() +// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK6: omp.body.continue26: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK6: omp.inner.for.inc27: -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK6: omp.inner.for.end29: // CHECK6-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6957,23 +6957,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: call void @_Z3fn1v() +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6983,23 +6983,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK6-NEXT: call void @_Z3fn2v() +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: call void @_Z3fn2v(), !llvm.access.group !21 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -7013,23 +7013,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK6: omp.inner.for.cond21: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK6: omp.inner.for.body23: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK6-NEXT: call void @_Z3fn3v() +// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: call void @_Z3fn3v(), !llvm.access.group !24 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK6: omp.body.continue26: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK6: omp.inner.for.inc27: -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK6: omp.inner.for.end29: // CHECK6-NEXT: store i32 100, i32* [[I20]], align 4 @@ -7055,23 +7055,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !3 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !3, !llvm.access.group !2 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -7081,23 +7081,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK7: omp.inner.for.cond7: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !7 // CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK7: omp.inner.for.body9: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 // CHECK7-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK7-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK7-NEXT: call void @_Z9gtid_testv() +// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !7 +// CHECK7-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !7 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK7: omp.body.continue12: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK7: omp.inner.for.inc13: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 // CHECK7-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK7: omp.inner.for.end15: // CHECK7-NEXT: store i32 100, i32* [[I6]], align 4 @@ -7131,23 +7131,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: call void @_Z3fn4v() +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: call void @_Z3fn4v(), !llvm.access.group !10 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -7191,23 +7191,23 @@ int main() { // CHECK7: omp_if.then: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK7: omp.inner.for.cond22: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 // CHECK7-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK7-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK7: omp.inner.for.body24: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK7-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK7-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK7-NEXT: call void @_Z3fn6v() +// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK7: omp.body.continue27: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK7: omp.inner.for.inc28: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK7-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK7: omp.inner.for.end30: // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] @@ -7268,23 +7268,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: call void @_Z3fn1v() +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: call void @_Z3fn1v(), !llvm.access.group !19 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -7324,23 +7324,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK7: omp.inner.for.cond21: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !23 // CHECK7-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK7-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK7: omp.inner.for.body23: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 // CHECK7-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK7-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK7-NEXT: call void @_Z3fn3v() +// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !23 +// CHECK7-NEXT: call void @_Z3fn3v(), !llvm.access.group !23 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK7: omp.body.continue26: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK7: omp.inner.for.inc27: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 // CHECK7-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK7: omp.inner.for.end29: // CHECK7-NEXT: store i32 100, i32* [[I20]], align 4 @@ -7366,23 +7366,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !3 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 +// CHECK8-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !3, !llvm.access.group !2 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7392,23 +7392,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK8: omp.inner.for.cond7: -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !7 // CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK8: omp.inner.for.body9: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 // CHECK8-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK8-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK8-NEXT: call void @_Z9gtid_testv() +// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !7 +// CHECK8-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !7 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK8: omp.body.continue12: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK8: omp.inner.for.inc13: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 // CHECK8-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK8: omp.inner.for.end15: // CHECK8-NEXT: store i32 100, i32* [[I6]], align 4 @@ -7442,23 +7442,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: call void @_Z3fn4v() +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: call void @_Z3fn4v(), !llvm.access.group !10 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7502,23 +7502,23 @@ int main() { // CHECK8: omp_if.then: // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK8: omp.inner.for.cond22: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 // CHECK8-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK8-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK8: omp.inner.for.body24: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK8-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK8-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK8-NEXT: call void @_Z3fn6v() +// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK8: omp.body.continue27: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK8: omp.inner.for.inc28: -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK8-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK8: omp.inner.for.end30: // CHECK8-NEXT: br label [[OMP_IF_END:%.*]] @@ -7579,23 +7579,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: call void @_Z3fn1v() +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK8-NEXT: call void @_Z3fn1v(), !llvm.access.group !19 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7635,23 +7635,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK8: omp.inner.for.cond21: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !23 // CHECK8-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK8-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK8: omp.inner.for.body23: -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 // CHECK8-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK8-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK8-NEXT: call void @_Z3fn3v() +// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !23 +// CHECK8-NEXT: call void @_Z3fn3v(), !llvm.access.group !23 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK8: omp.body.continue26: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK8: omp.inner.for.inc27: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 // CHECK8-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK8: omp.inner.for.end29: // CHECK8-NEXT: store i32 100, i32* [[I20]], align 4 @@ -7755,26 +7755,26 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !13 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7839,23 +7839,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: store i32 0, i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: store i32 0, i32* [[CONV]], align 8, !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7915,26 +7915,26 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !22 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7996,23 +7996,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z9gtid_testv() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !25 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8137,22 +8137,22 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !28 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8214,23 +8214,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn4v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 +// CHECK9-NEXT: call void @_Z3fn4v(), !llvm.access.group !31 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8290,26 +8290,26 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !34 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8371,23 +8371,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn5v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37 +// CHECK9-NEXT: call void @_Z3fn5v(), !llvm.access.group !37 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8460,35 +8460,35 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !40 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !40 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !40 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !40 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8550,23 +8550,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn6v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK9-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8680,22 +8680,22 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !46 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8757,23 +8757,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn1v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 +// CHECK9-NEXT: call void @_Z3fn1v(), !llvm.access.group !49 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8833,26 +8833,26 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !52 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !52 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8914,23 +8914,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn2v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !55 +// CHECK9-NEXT: call void @_Z3fn2v(), !llvm.access.group !55 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9003,35 +9003,35 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !58 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !58 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !58 +// CHECK9-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9093,23 +9093,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn3v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 +// CHECK9-NEXT: call void @_Z3fn3v(), !llvm.access.group !61 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9229,26 +9229,26 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK10-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !13 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9313,23 +9313,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: store i32 0, i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: store i32 0, i32* [[CONV]], align 8, !llvm.access.group !17 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9389,26 +9389,26 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !22 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !22 +// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !22 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9470,23 +9470,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z9gtid_testv() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 +// CHECK10-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !25 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9611,22 +9611,22 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !28 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9688,23 +9688,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn4v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 +// CHECK10-NEXT: call void @_Z3fn4v(), !llvm.access.group !31 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9764,26 +9764,26 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !34 +// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !34 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9845,23 +9845,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn5v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37 +// CHECK10-NEXT: call void @_Z3fn5v(), !llvm.access.group !37 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9934,35 +9934,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !40 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !40 // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !40 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !40 +// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !40 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40 // CHECK10-NEXT: br label [[OMP_IF_END]] // CHECK10: omp_if.end: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10024,23 +10024,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn6v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK10-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10154,22 +10154,22 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !46 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10231,23 +10231,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn1v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 +// CHECK10-NEXT: call void @_Z3fn1v(), !llvm.access.group !49 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10307,26 +10307,26 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !52 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !52 +// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !52 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10388,23 +10388,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn2v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !55 +// CHECK10-NEXT: call void @_Z3fn2v(), !llvm.access.group !55 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10477,35 +10477,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !58 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !58 // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !58 +// CHECK10-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 // CHECK10-NEXT: br label [[OMP_IF_END]] // CHECK10: omp_if.end: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10567,23 +10567,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn3v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 +// CHECK10-NEXT: call void @_Z3fn3v(), !llvm.access.group !61 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10703,26 +10703,26 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !14 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !14, !llvm.access.group !13 // CHECK11-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK11-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !13 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10787,23 +10787,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !14 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !14, !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10863,26 +10863,26 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !23 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !23 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10944,23 +10944,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z9gtid_testv() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !26 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11085,22 +11085,22 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !29 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11162,23 +11162,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn4v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 +// CHECK11-NEXT: call void @_Z3fn4v(), !llvm.access.group !32 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11415,41 +11415,41 @@ int main() { // CHECK11: omp_if.then: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 // CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38 // CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38 // CHECK11-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then5: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !38 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !38 +// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !38 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END22:%.*]] @@ -11561,23 +11561,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn6v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !42 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] @@ -11689,23 +11689,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn6v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !46 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] @@ -11862,22 +11862,22 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !50 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11939,23 +11939,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn1v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53 +// CHECK11-NEXT: call void @_Z3fn1v(), !llvm.access.group !53 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12185,35 +12185,35 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !58 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !58 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !58 +// CHECK11-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12275,23 +12275,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn3v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 +// CHECK11-NEXT: call void @_Z3fn3v(), !llvm.access.group !61 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12411,26 +12411,26 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !14 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !14, !llvm.access.group !13 // CHECK12-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK12-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK12-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !13 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12495,23 +12495,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !14 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !14, !llvm.access.group !18 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12571,26 +12571,26 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !23 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !23 +// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !23 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12652,23 +12652,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z9gtid_testv() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 +// CHECK12-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !26 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12793,22 +12793,22 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !29 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12870,23 +12870,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn4v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 +// CHECK12-NEXT: call void @_Z3fn4v(), !llvm.access.group !32 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13123,41 +13123,41 @@ int main() { // CHECK12: omp_if.then: // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 // CHECK12-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK12-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK12-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38 // CHECK12-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK12-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK12-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK12-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK12-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !38 +// CHECK12-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !38 +// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38 // CHECK12-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then5: -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !38 // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK12-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !38 +// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !38 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END22:%.*]] @@ -13269,23 +13269,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn6v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !42 +// CHECK12-NEXT: call void @_Z3fn6v(), !llvm.access.group !42 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] @@ -13397,23 +13397,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn6v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !46 +// CHECK12-NEXT: call void @_Z3fn6v(), !llvm.access.group !46 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] @@ -13570,22 +13570,22 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !50 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13647,23 +13647,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn1v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53 +// CHECK12-NEXT: call void @_Z3fn1v(), !llvm.access.group !53 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13893,35 +13893,35 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !58 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !58 // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK12-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK12-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !58 +// CHECK12-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13983,23 +13983,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn3v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 +// CHECK12-NEXT: call void @_Z3fn3v(), !llvm.access.group !61 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -14041,23 +14041,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: store i32 0, i32* @Arg, align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 0, i32* @Arg, align 4, !llvm.access.group !6 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -14067,23 +14067,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK13-NEXT: call void @_Z9gtid_testv() +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14117,23 +14117,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: call void @_Z3fn4v() +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -14143,23 +14143,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK13-NEXT: call void @_Z3fn5v() +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: call void @_Z3fn5v(), !llvm.access.group !16 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14173,23 +14173,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK13: omp.inner.for.cond21: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 // CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK13: omp.inner.for.body23: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK13-NEXT: call void @_Z3fn6v() +// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK13: omp.body.continue26: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK13: omp.inner.for.inc27: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK13: omp.inner.for.end29: // CHECK13-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14225,23 +14225,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: call void @_Z3fn1v() +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -14251,23 +14251,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK13-NEXT: call void @_Z3fn2v() +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: call void @_Z3fn2v(), !llvm.access.group !25 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14281,23 +14281,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK13: omp.inner.for.cond21: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK13: omp.inner.for.body23: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK13-NEXT: call void @_Z3fn3v() +// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: call void @_Z3fn3v(), !llvm.access.group !28 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK13: omp.body.continue26: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK13: omp.inner.for.inc27: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK13: omp.inner.for.end29: // CHECK13-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14323,23 +14323,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: store i32 0, i32* @Arg, align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: store i32 0, i32* @Arg, align 4, !llvm.access.group !6 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -14349,23 +14349,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK14-NEXT: call void @_Z9gtid_testv() +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14399,23 +14399,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: call void @_Z3fn4v() +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -14425,23 +14425,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK14-NEXT: call void @_Z3fn5v() +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: call void @_Z3fn5v(), !llvm.access.group !16 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14455,23 +14455,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK14: omp.inner.for.cond21: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 // CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK14: omp.inner.for.body23: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK14-NEXT: call void @_Z3fn6v() +// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK14: omp.body.continue26: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK14: omp.inner.for.inc27: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK14: omp.inner.for.end29: // CHECK14-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14507,23 +14507,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: call void @_Z3fn1v() +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -14533,23 +14533,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK14-NEXT: call void @_Z3fn2v() +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: call void @_Z3fn2v(), !llvm.access.group !25 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14563,23 +14563,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK14: omp.inner.for.cond21: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK14: omp.inner.for.body23: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK14-NEXT: call void @_Z3fn3v() +// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: call void @_Z3fn3v(), !llvm.access.group !28 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK14: omp.body.continue26: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK14: omp.inner.for.inc27: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK14: omp.inner.for.end29: // CHECK14-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14605,23 +14605,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !7 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 +// CHECK15-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !7, !llvm.access.group !6 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14631,23 +14631,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK15: omp.inner.for.cond7: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK15: omp.inner.for.body9: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK15-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK15-NEXT: call void @_Z9gtid_testv() +// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !11 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK15: omp.body.continue12: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK15: omp.inner.for.inc13: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 // CHECK15-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK15: omp.inner.for.end15: // CHECK15-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14681,23 +14681,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: call void @_Z3fn4v() +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: call void @_Z3fn4v(), !llvm.access.group !14 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14741,23 +14741,23 @@ int main() { // CHECK15: omp_if.then: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK15: omp.inner.for.cond22: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK15: omp.inner.for.body24: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK15-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK15-NEXT: call void @_Z3fn6v() +// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK15: omp.body.continue27: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK15: omp.inner.for.inc28: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK15: omp.inner.for.end30: // CHECK15-NEXT: br label [[OMP_IF_END:%.*]] @@ -14818,23 +14818,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: call void @_Z3fn1v() +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: call void @_Z3fn1v(), !llvm.access.group !23 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14874,23 +14874,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK15: omp.inner.for.cond21: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !27 // CHECK15-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK15: omp.inner.for.body23: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 // CHECK15-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK15-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK15-NEXT: call void @_Z3fn3v() +// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !27 +// CHECK15-NEXT: call void @_Z3fn3v(), !llvm.access.group !27 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK15: omp.body.continue26: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK15: omp.inner.for.inc27: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 // CHECK15-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK15: omp.inner.for.end29: // CHECK15-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14916,23 +14916,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !7 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 +// CHECK16-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !7, !llvm.access.group !6 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -14942,23 +14942,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK16: omp.inner.for.cond7: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK16: omp.inner.for.body9: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK16-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK16-NEXT: call void @_Z9gtid_testv() +// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !11 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK16: omp.body.continue12: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK16: omp.inner.for.inc13: -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 // CHECK16-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK16: omp.inner.for.end15: // CHECK16-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14992,23 +14992,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: call void @_Z3fn4v() +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 +// CHECK16-NEXT: call void @_Z3fn4v(), !llvm.access.group !14 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -15052,23 +15052,23 @@ int main() { // CHECK16: omp_if.then: // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK16: omp.inner.for.cond22: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK16-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK16: omp.inner.for.body24: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK16-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK16-NEXT: call void @_Z3fn6v() +// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK16: omp.body.continue27: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK16: omp.inner.for.inc28: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK16: omp.inner.for.end30: // CHECK16-NEXT: br label [[OMP_IF_END:%.*]] @@ -15129,23 +15129,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: call void @_Z3fn1v() +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK16-NEXT: call void @_Z3fn1v(), !llvm.access.group !23 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -15185,23 +15185,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK16: omp.inner.for.cond21: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !27 // CHECK16-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK16-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK16: omp.inner.for.body23: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 // CHECK16-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK16-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK16-NEXT: call void @_Z3fn3v() +// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !27 +// CHECK16-NEXT: call void @_Z3fn3v(), !llvm.access.group !27 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK16: omp.body.continue26: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK16: omp.inner.for.inc27: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 // CHECK16-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK16: omp.inner.for.end29: // CHECK16-NEXT: store i32 100, i32* [[I20]], align 4 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp index 07d47f3a78f4..1887c26c0d8c 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp @@ -290,40 +290,40 @@ int main() { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load volatile double, double* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP7]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP13:%.*]] = load volatile double, double* [[TMP12]], align 8, !llvm.access.group !4 // CHECK1-NEXT: [[CONV11:%.*]] = bitcast i64* [[G1_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP13]], double* [[CONV11]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[SVAR8]], align 4 +// CHECK1-NEXT: store double [[TMP13]], double* [[CONV11]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[SVAR8]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[CONV12:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[CONV12]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load float, float* [[SFVAR9]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[CONV12]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP17:%.*]] = load float, float* [[SFVAR9]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[CONV13:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* -// CHECK1-NEXT: store float [[TMP17]], float* [[CONV13]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load double, double* [[G5]], align 8 +// CHECK1-NEXT: store float [[TMP17]], float* [[CONV13]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP19:%.*]] = load double, double* [[G5]], align 8, !llvm.access.group !4 // CHECK1-NEXT: [[CONV14:%.*]] = bitcast i64* [[G_CASTED]] to double* -// CHECK1-NEXT: store double [[TMP19]], double* [[CONV14]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]]) +// CHECK1-NEXT: store double [[TMP19]], double* [[CONV14]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]]), !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -421,37 +421,37 @@ int main() { // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP9]], align 8 -// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP11]], align 8 -// CHECK1-NEXT: store i32 3, i32* [[SVAR10]], align 4 -// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR11]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP9]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP11]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store i32 3, i32* [[SVAR10]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: store float 4.000000e+00, float* [[SFVAR11]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double* [[G7]], double** [[TMP12]], align 8 +// CHECK1-NEXT: store double* [[G7]], double** [[TMP12]], align 8, !llvm.access.group !8 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP14:%.*]] = load double*, double** [[_TMP9]], align 8 -// CHECK1-NEXT: store double* [[TMP14]], double** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load double*, double** [[_TMP9]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store double* [[TMP14]], double** [[TMP13]], align 8, !llvm.access.group !8 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store i32* [[SVAR10]], i32** [[TMP15]], align 8 +// CHECK1-NEXT: store i32* [[SVAR10]], i32** [[TMP15]], align 8, !llvm.access.group !8 // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store float* [[SFVAR11]], float** [[TMP16]], align 8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK1-NEXT: store float* [[SFVAR11]], float** [[TMP16]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -609,40 +609,40 @@ int main() { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP7]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load volatile double, double* [[TMP12]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP7]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP13:%.*]] = load volatile double, double* [[TMP12]], align 8, !llvm.access.group !4 // CHECK2-NEXT: [[CONV11:%.*]] = bitcast i64* [[G1_CASTED]] to double* -// CHECK2-NEXT: store double [[TMP13]], double* [[CONV11]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[SVAR8]], align 4 +// CHECK2-NEXT: store double [[TMP13]], double* [[CONV11]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[SVAR8]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[CONV12:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP15]], i32* [[CONV12]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = load float, float* [[SFVAR9]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], i32* [[CONV12]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP17:%.*]] = load float, float* [[SFVAR9]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[CONV13:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* -// CHECK2-NEXT: store float [[TMP17]], float* [[CONV13]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load double, double* [[G5]], align 8 +// CHECK2-NEXT: store float [[TMP17]], float* [[CONV13]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP19:%.*]] = load double, double* [[G5]], align 8, !llvm.access.group !4 // CHECK2-NEXT: [[CONV14:%.*]] = bitcast i64* [[G_CASTED]] to double* -// CHECK2-NEXT: store double [[TMP19]], double* [[CONV14]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]]) +// CHECK2-NEXT: store double [[TMP19]], double* [[CONV14]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]]), !llvm.access.group !4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -740,37 +740,37 @@ int main() { // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP9]], align 8 -// CHECK2-NEXT: store volatile double 1.000000e+00, double* [[TMP11]], align 8 -// CHECK2-NEXT: store i32 3, i32* [[SVAR10]], align 4 -// CHECK2-NEXT: store float 4.000000e+00, float* [[SFVAR11]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP11:%.*]] = load double*, double** [[_TMP9]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: store volatile double 1.000000e+00, double* [[TMP11]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: store i32 3, i32* [[SVAR10]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: store float 4.000000e+00, float* [[SFVAR11]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK2-NEXT: store double* [[G7]], double** [[TMP12]], align 8 +// CHECK2-NEXT: store double* [[G7]], double** [[TMP12]], align 8, !llvm.access.group !8 // CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP14:%.*]] = load double*, double** [[_TMP9]], align 8 -// CHECK2-NEXT: store double* [[TMP14]], double** [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load double*, double** [[_TMP9]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: store double* [[TMP14]], double** [[TMP13]], align 8, !llvm.access.group !8 // CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK2-NEXT: store i32* [[SVAR10]], i32** [[TMP15]], align 8 +// CHECK2-NEXT: store i32* [[SVAR10]], i32** [[TMP15]], align 8, !llvm.access.group !8 // CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK2-NEXT: store float* [[SFVAR11]], float** [[TMP16]], align 8 -// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK2-NEXT: store float* [[SFVAR11]], float** [[TMP16]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !8 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -913,29 +913,29 @@ int main() { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load float, float* [[SFVAR6]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[SVAR5]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[SVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP15:%.*]] = load float, float* [[SFVAR6]], align 4, !llvm.access.group !5 // CHECK3-NEXT: [[CONV8:%.*]] = bitcast i32* [[SFVAR_CASTED]] to float* -// CHECK3-NEXT: store float [[TMP15]], float* [[CONV8]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, i32, i32, double*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP10]], i32 [[TMP11]], double* [[TMP12]], i32 [[TMP14]], i32 [[TMP16]], double* [[G2]]) +// CHECK3-NEXT: store float [[TMP15]], float* [[CONV8]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, i32, i32, double*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP10]], i32 [[TMP11]], double* [[TMP12]], i32 [[TMP14]], i32 [[TMP16]], double* [[G2]]), !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1030,37 +1030,37 @@ int main() { // CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP13]], align 4 -// CHECK3-NEXT: store i32 3, i32* [[SVAR5]], align 4 -// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR6]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store volatile double 1.000000e+00, double* [[TMP13]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 3, i32* [[SVAR5]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store float 4.000000e+00, float* [[SFVAR6]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[G2]], double** [[TMP14]], align 4 +// CHECK3-NEXT: store double* [[G2]], double** [[TMP14]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK3-NEXT: store double* [[TMP16]], double** [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store double* [[TMP16]], double** [[TMP15]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store i32* [[SVAR5]], i32** [[TMP17]], align 4 +// CHECK3-NEXT: store i32* [[SVAR5]], i32** [[TMP17]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store float* [[SFVAR6]], float** [[TMP18]], align 4 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK3-NEXT: store float* [[SFVAR6]], float** [[TMP18]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1203,29 +1203,29 @@ int main() { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK4-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[SVAR5]], align 4 -// CHECK4-NEXT: store i32 [[TMP13]], i32* [[SVAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load float, float* [[SFVAR6]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP12:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[SVAR5]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: store i32 [[TMP13]], i32* [[SVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP15:%.*]] = load float, float* [[SFVAR6]], align 4, !llvm.access.group !5 // CHECK4-NEXT: [[CONV8:%.*]] = bitcast i32* [[SFVAR_CASTED]] to float* -// CHECK4-NEXT: store float [[TMP15]], float* [[CONV8]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, i32, i32, double*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP10]], i32 [[TMP11]], double* [[TMP12]], i32 [[TMP14]], i32 [[TMP16]], double* [[G2]]) +// CHECK4-NEXT: store float [[TMP15]], float* [[CONV8]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[SFVAR_CASTED]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, double*, i32, i32, double*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP10]], i32 [[TMP11]], double* [[TMP12]], i32 [[TMP14]], i32 [[TMP16]], double* [[G2]]), !llvm.access.group !5 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -1320,37 +1320,37 @@ int main() { // CHECK4-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK4-NEXT: store volatile double 1.000000e+00, double* [[TMP13]], align 4 -// CHECK4-NEXT: store i32 3, i32* [[SVAR5]], align 4 -// CHECK4-NEXT: store float 4.000000e+00, float* [[SFVAR6]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP13:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: store volatile double 1.000000e+00, double* [[TMP13]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: store i32 3, i32* [[SVAR5]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: store float 4.000000e+00, float* [[SFVAR6]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK4-NEXT: store double* [[G2]], double** [[TMP14]], align 4 +// CHECK4-NEXT: store double* [[G2]], double** [[TMP14]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP16:%.*]] = load double*, double** [[_TMP4]], align 4 -// CHECK4-NEXT: store double* [[TMP16]], double** [[TMP15]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load double*, double** [[_TMP4]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: store double* [[TMP16]], double** [[TMP15]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK4-NEXT: store i32* [[SVAR5]], i32** [[TMP17]], align 4 +// CHECK4-NEXT: store i32* [[SVAR5]], i32** [[TMP17]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK4-NEXT: store float* [[SFVAR6]], float** [[TMP18]], align 4 -// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK4-NEXT: store float* [[SFVAR6]], float** [[TMP18]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !9 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -1627,34 +1627,34 @@ int main() { // CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK5-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK5-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR3]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !5 // CHECK5-NEXT: [[CONV10:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP15]], i32* [[CONV10]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[SVAR8]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], i32* [[CONV10]], align 4, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[SVAR8]], align 4, !llvm.access.group !5 // CHECK5-NEXT: [[CONV11:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP18]], i32* [[CONV11]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP12]], i64 [[TMP14]], [2 x i32]* [[VEC4]], i64 [[TMP16]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP17]], i64 [[TMP19]]) +// CHECK5-NEXT: store i32 [[TMP18]], i32* [[CONV11]], align 4, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP19:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP12]], i64 [[TMP14]], [2 x i32]* [[VEC4]], i64 [[TMP16]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP17]], i64 [[TMP19]]), !llvm.access.group !5 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -1797,37 +1797,37 @@ int main() { // CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK5-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR5]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR5]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC6]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP9]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP9]], align 8, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP17]] to i64 // CHECK5-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR7]], i64 0, i64 [[IDXPROM12]] // CHECK5-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX13]] to i8* // CHECK5-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[TMP16]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -2140,30 +2140,30 @@ int main() { // CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK5-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK5-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !14 // CHECK5-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK5-NEXT: store i32 [[TMP15]], i32* [[CONV8]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP12]], i64 [[TMP14]], [2 x i32]* [[VEC3]], i64 [[TMP16]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP17]]) +// CHECK5-NEXT: store i32 [[TMP15]], i32* [[CONV8]], align 4, !llvm.access.group !14 +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 +// CHECK5-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8, !llvm.access.group !14 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP12]], i64 [[TMP14]], [2 x i32]* [[VEC3]], i64 [[TMP16]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP17]]), !llvm.access.group !14 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -2300,37 +2300,37 @@ int main() { // CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK5-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK5-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK5: omp.inner.for.cond.cleanup: // CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR4]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR4]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC5]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK5-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP17]] to i64 // CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR6]], i64 0, i64 [[IDXPROM10]] // CHECK5-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[ARRAYIDX11]] to i8* // CHECK5-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[TMP16]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !17 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK5-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -2680,34 +2680,34 @@ int main() { // CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK6-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK6-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK6: omp.inner.for.cond.cleanup: // CHECK6-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK6-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK6-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR3]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR3]], align 4, !llvm.access.group !5 // CHECK6-NEXT: [[CONV10:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK6-NEXT: store i32 [[TMP15]], i32* [[CONV10]], align 4 -// CHECK6-NEXT: [[TMP16:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK6-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8 -// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[SVAR8]], align 4 +// CHECK6-NEXT: store i32 [[TMP15]], i32* [[CONV10]], align 4, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP16:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[_TMP7]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[SVAR8]], align 4, !llvm.access.group !5 // CHECK6-NEXT: [[CONV11:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* -// CHECK6-NEXT: store i32 [[TMP18]], i32* [[CONV11]], align 4 -// CHECK6-NEXT: [[TMP19:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP12]], i64 [[TMP14]], [2 x i32]* [[VEC4]], i64 [[TMP16]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP17]], i64 [[TMP19]]) +// CHECK6-NEXT: store i32 [[TMP18]], i32* [[CONV11]], align 4, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP19:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP12]], i64 [[TMP14]], [2 x i32]* [[VEC4]], i64 [[TMP16]], [2 x %struct.S]* [[S_ARR5]], %struct.S* [[TMP17]], i64 [[TMP19]]), !llvm.access.group !5 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -2850,37 +2850,37 @@ int main() { // CHECK6-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK6-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK6: omp.inner.for.cond.cleanup: // CHECK6-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR5]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR5]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC6]], i64 0, i64 [[IDXPROM]] -// CHECK6-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK6-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP9]], align 8 -// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP9]], align 8, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP17]] to i64 // CHECK6-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR7]], i64 0, i64 [[IDXPROM12]] // CHECK6-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX13]] to i8* // CHECK6-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[TMP16]] to i8* -// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) +// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !9 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -3193,30 +3193,30 @@ int main() { // CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK6-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK6-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK6: omp.inner.for.cond.cleanup: // CHECK6-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK6-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK6-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR2]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !14 // CHECK6-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK6-NEXT: store i32 [[TMP15]], i32* [[CONV8]], align 4 -// CHECK6-NEXT: [[TMP16:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK6-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP12]], i64 [[TMP14]], [2 x i32]* [[VEC3]], i64 [[TMP16]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP17]]) +// CHECK6-NEXT: store i32 [[TMP15]], i32* [[CONV8]], align 4, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP16:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 8, !llvm.access.group !14 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP12]], i64 [[TMP14]], [2 x i32]* [[VEC3]], i64 [[TMP16]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP17]]), !llvm.access.group !14 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -3353,37 +3353,37 @@ int main() { // CHECK6-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK6-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK6-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK6: omp.inner.for.cond.cleanup: // CHECK6-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR4]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR4]], align 4, !llvm.access.group !17 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC5]], i64 0, i64 [[IDXPROM]] -// CHECK6-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK6-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8 -// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK6-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP8]], align 8, !llvm.access.group !17 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK6-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP17]] to i64 // CHECK6-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR6]], i64 0, i64 [[IDXPROM10]] // CHECK6-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[ARRAYIDX11]] to i8* // CHECK6-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[TMP16]] to i8* -// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) +// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !17 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK6-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK6-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -3725,30 +3725,30 @@ int main() { // CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP13]], i32* [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[SVAR7]], align 4 -// CHECK7-NEXT: store i32 [[TMP16]], i32* [[SVAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP11]], i32 [[TMP12]], [2 x i32]* [[VEC3]], i32 [[TMP14]], [2 x %struct.S]* [[S_ARR4]], %struct.S* [[TMP15]], i32 [[TMP17]]) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP15:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[SVAR7]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: store i32 [[TMP16]], i32* [[SVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP11]], i32 [[TMP12]], [2 x i32]* [[VEC3]], i32 [[TMP14]], [2 x %struct.S]* [[S_ARR4]], %struct.S* [[TMP15]], i32 [[TMP17]]), !llvm.access.group !6 // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -3887,35 +3887,35 @@ int main() { // CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK7-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK7-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 [[TMP17]] // CHECK7-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX9]] to i8* // CHECK7-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[TMP16]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false), !llvm.access.group !10 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK7-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK7-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -4224,27 +4224,27 @@ int main() { // CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK7-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK7-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK7-NEXT: store i32 [[TMP13]], i32* [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP11]], i32 [[TMP12]], [2 x i32]* [[VEC3]], i32 [[TMP14]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP15]]) +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: store i32 [[TMP13]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP15:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP11]], i32 [[TMP12]], [2 x i32]* [[VEC3]], i32 [[TMP14]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP15]]), !llvm.access.group !15 // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -4378,35 +4378,35 @@ int main() { // CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK7-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK7: omp.inner.for.cond.cleanup: // CHECK7-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK7-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 [[TMP17]] // CHECK7-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[ARRAYIDX8]] to i8* // CHECK7-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[TMP16]] to i8* -// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) +// CHECK7-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false), !llvm.access.group !18 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK7-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -4748,30 +4748,30 @@ int main() { // CHECK8-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK8-NEXT: store i32 [[TMP13]], i32* [[T_VAR_CASTED]], align 4 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK8-NEXT: [[TMP15:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[SVAR7]], align 4 -// CHECK8-NEXT: store i32 [[TMP16]], i32* [[SVAR_CASTED]], align 4 -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP11]], i32 [[TMP12]], [2 x i32]* [[VEC3]], i32 [[TMP14]], [2 x %struct.S]* [[S_ARR4]], %struct.S* [[TMP15]], i32 [[TMP17]]) +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: store i32 [[TMP13]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP15:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[SVAR7]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: store i32 [[TMP16]], i32* [[SVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[SVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP11]], i32 [[TMP12]], [2 x i32]* [[VEC3]], i32 [[TMP14]], [2 x %struct.S]* [[S_ARR4]], %struct.S* [[TMP15]], i32 [[TMP17]]), !llvm.access.group !6 // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -4910,35 +4910,35 @@ int main() { // CHECK8-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK8-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK8-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4 -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP16:%.*]] = load %struct.S*, %struct.S** [[_TMP6]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK8-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR4]], i32 0, i32 [[TMP17]] // CHECK8-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX9]] to i8* // CHECK8-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[TMP16]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false), !llvm.access.group !10 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK8-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK8-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -5247,27 +5247,27 @@ int main() { // CHECK8-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK8-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK8-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK8-NEXT: store i32 [[TMP13]], i32* [[T_VAR_CASTED]], align 4 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK8-NEXT: [[TMP15:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP11]], i32 [[TMP12]], [2 x i32]* [[VEC3]], i32 [[TMP14]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP15]]) +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: store i32 [[TMP13]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: [[TMP15:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP11]], i32 [[TMP12]], [2 x i32]* [[VEC3]], i32 [[TMP14]], [2 x %struct.S.0]* [[S_ARR4]], %struct.S.0* [[TMP15]]), !llvm.access.group !15 // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -5401,35 +5401,35 @@ int main() { // CHECK8-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK8-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK8: omp.inner.for.cond.cleanup: // CHECK8-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[T_VAR2]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC3]], i32 0, i32 [[TMP15]] -// CHECK8-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4 -// CHECK8-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4 -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP16:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP6]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR4]], i32 0, i32 [[TMP17]] // CHECK8-NEXT: [[TMP18:%.*]] = bitcast %struct.S.0* [[ARRAYIDX8]] to i8* // CHECK8-NEXT: [[TMP19:%.*]] = bitcast %struct.S.0* [[TMP16]] to i8* -// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) +// CHECK8-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false), !llvm.access.group !18 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK8-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: diff --git a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp index 5122779d1ddb..007685e1d25b 100644 --- a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp +++ b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp @@ -5241,4 +5241,3 @@ int bar(int n){ // CHECK28-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 // CHECK28-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp index 51663d0feb22..d80366e55beb 100644 --- a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp +++ b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp @@ -5437,4 +5437,3 @@ int bar(int n){ // CHECK28-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 // CHECK28-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/teams_codegen.cpp b/clang/test/OpenMP/teams_codegen.cpp index 37a87fc25003..efbb8d648c6c 100644 --- a/clang/test/OpenMP/teams_codegen.cpp +++ b/clang/test/OpenMP/teams_codegen.cpp @@ -4161,4 +4161,3 @@ void foo() { // CHECK44-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK44-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/teams_distribute_codegen.cpp b/clang/test/OpenMP/teams_distribute_codegen.cpp index d2e99882fd11..6124911226a9 100644 --- a/clang/test/OpenMP/teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_codegen.cpp @@ -4346,4 +4346,3 @@ int main (int argc, char **argv) { // CHECK28-NEXT: call void @__tgt_register_requires(i64 1) // CHECK28-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp index 79626ef5333f..f766b6257c24 100644 --- a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp @@ -3791,4 +3791,3 @@ int main() { // CHECK10-NEXT: call void @__tgt_register_requires(i64 1) // CHECK10-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp index 025ea7585edd..965e00ee1d5b 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp @@ -8336,4 +8336,3 @@ int main (int argc, char **argv) { // CHECK12-NEXT: call void @__tgt_register_requires(i64 1) // CHECK12-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp index cb8e73661ba8..22ba0d8197a0 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp @@ -5079,4 +5079,3 @@ int main() { // CHECK10-NEXT: call void @__tgt_register_requires(i64 1) // CHECK10-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp index 0b8ebc471a9e..7e35b20b4bf9 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp @@ -4680,4 +4680,3 @@ int main() { // CHECK6-NEXT: call void @__tgt_register_requires(i64 1) // CHECK6-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp index 4a89c664740c..b3d27129b22d 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp @@ -414,23 +414,23 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i32* [[TMP0]], [100 x i32]* [[TMP1]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i32* [[TMP0]], [100 x i32]* [[TMP1]]), !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -524,27 +524,27 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -665,7 +665,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -791,7 +791,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1061,23 +1061,23 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i32* [[TMP0]], [100 x i32]* [[TMP1]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i32* [[TMP0]], [100 x i32]* [[TMP1]]), !llvm.access.group !5 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1171,27 +1171,27 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP1]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1312,7 +1312,7 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1438,7 +1438,7 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1700,21 +1700,21 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32* [[TMP0]], [100 x i32]* [[TMP1]]) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32* [[TMP0]], [100 x i32]* [[TMP1]]), !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1806,26 +1806,26 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP1]], i32 0, i32 [[TMP18]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1942,7 +1942,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2065,7 +2065,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2327,21 +2327,21 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32* [[TMP0]], [100 x i32]* [[TMP1]]) +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, [100 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32* [[TMP0]], [100 x i32]* [[TMP1]]), !llvm.access.group !6 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2433,26 +2433,26 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP1]], i32 0, i32 [[TMP18]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK4-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2569,7 +2569,7 @@ int main (int argc, char **argv) { // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2692,7 +2692,7 @@ int main (int argc, char **argv) { // CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3467,7 +3467,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3595,7 +3595,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3834,7 +3834,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -3962,7 +3962,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -4195,7 +4195,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -4320,7 +4320,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -4553,7 +4553,7 @@ int main (int argc, char **argv) { // CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -4678,7 +4678,7 @@ int main (int argc, char **argv) { // CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -5174,7 +5174,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -5270,7 +5270,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -5429,7 +5429,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -5525,7 +5525,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK18-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -5680,7 +5680,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -5773,7 +5773,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -5928,7 +5928,7 @@ int main (int argc, char **argv) { // CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -6021,7 +6021,7 @@ int main (int argc, char **argv) { // CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP14]], 1 // CHECK20-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -6509,7 +6509,7 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK25-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: @@ -6637,7 +6637,7 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK25-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: @@ -6788,23 +6788,23 @@ int main (int argc, char **argv) { // CHECK25-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK25-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK25-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK25-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK25-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK25-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !11 // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK25-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK25-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK25-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: @@ -6868,27 +6868,27 @@ int main (int argc, char **argv) { // CHECK25-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK25-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK25-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK25-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK25-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 +// CHECK25-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 // CHECK25-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK25-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK25-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK25-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK25-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK25: omp.loop.exit: @@ -7120,7 +7120,7 @@ int main (int argc, char **argv) { // CHECK26-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] // CHECK26-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK26-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK26-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK26: omp.inner.for.end: // CHECK26-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK26: omp.loop.exit: @@ -7248,7 +7248,7 @@ int main (int argc, char **argv) { // CHECK26-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK26-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK26-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK26-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK26-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK26: omp.inner.for.end: // CHECK26-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK26: omp.loop.exit: @@ -7399,23 +7399,23 @@ int main (int argc, char **argv) { // CHECK26-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK26-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK26: omp.inner.for.cond: -// CHECK26-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK26-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK26-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK26-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK26-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK26-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK26: omp.inner.for.body: -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK26-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK26-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK26-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK26-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK26-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK26-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !11 // CHECK26-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK26: omp.inner.for.inc: -// CHECK26-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK26-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK26-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK26-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK26-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK26-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK26-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK26-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK26: omp.inner.for.end: // CHECK26-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK26: omp.loop.exit: @@ -7479,27 +7479,27 @@ int main (int argc, char **argv) { // CHECK26-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK26-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK26: omp.inner.for.cond: -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK26-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK26-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK26-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK26-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK26: omp.inner.for.body: -// CHECK26-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK26-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK26-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK26-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK26-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 +// CHECK26-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 // CHECK26-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK26-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK26-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK26-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK26: omp.body.continue: // CHECK26-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK26: omp.inner.for.inc: -// CHECK26-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK26-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK26-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK26-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK26-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK26-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK26-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK26: omp.inner.for.end: // CHECK26-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK26: omp.loop.exit: @@ -7725,7 +7725,7 @@ int main (int argc, char **argv) { // CHECK27-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK27-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: @@ -7850,7 +7850,7 @@ int main (int argc, char **argv) { // CHECK27-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK27-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: @@ -7997,21 +7997,21 @@ int main (int argc, char **argv) { // CHECK27-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !12 // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK27-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK27-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: @@ -8073,26 +8073,26 @@ int main (int argc, char **argv) { // CHECK27-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK27-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK27-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK27-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK27-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !16 +// CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 // CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK27-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK27-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK27-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK27-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK27-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK27: omp.loop.exit: @@ -8318,7 +8318,7 @@ int main (int argc, char **argv) { // CHECK28-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] // CHECK28-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK28-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK28-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK28: omp.inner.for.end: // CHECK28-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK28: omp.loop.exit: @@ -8443,7 +8443,7 @@ int main (int argc, char **argv) { // CHECK28-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK28-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK28-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK28-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK28-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK28: omp.inner.for.end: // CHECK28-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK28: omp.loop.exit: @@ -8590,21 +8590,21 @@ int main (int argc, char **argv) { // CHECK28-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK28-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK28: omp.inner.for.cond: -// CHECK28-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK28-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK28-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK28-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK28-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK28-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK28: omp.inner.for.body: -// CHECK28-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK28-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK28-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK28-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !12 +// CHECK28-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 +// CHECK28-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !12 // CHECK28-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK28: omp.inner.for.inc: -// CHECK28-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK28-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK28-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK28-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !12 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK28-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK28-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK28-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK28-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK28: omp.inner.for.end: // CHECK28-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK28: omp.loop.exit: @@ -8666,26 +8666,26 @@ int main (int argc, char **argv) { // CHECK28-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK28-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK28: omp.inner.for.cond: -// CHECK28-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK28-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK28-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK28-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK28-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK28-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK28: omp.inner.for.body: -// CHECK28-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK28-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK28-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK28-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK28-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK28-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !16 +// CHECK28-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 // CHECK28-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK28-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK28-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK28-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK28: omp.body.continue: // CHECK28-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK28: omp.inner.for.inc: -// CHECK28-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK28-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK28-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK28-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK28-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK28-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK28-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK28: omp.inner.for.end: // CHECK28-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK28: omp.loop.exit: diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp index 0d8917b3a4e2..0080196f4da0 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp @@ -284,23 +284,23 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !6 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -364,28 +364,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -449,23 +449,23 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -529,28 +529,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -614,43 +614,43 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], %struct.SS* [[TMP0]]), !llvm.access.group !21 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], 122 // CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK1: cond.true5: // CHECK1-NEXT: br label [[COND_END7:%.*]] // CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: br label [[COND_END7]] // CHECK1: cond.end7: // CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP18]], [[COND_FALSE6]] ] -// CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -714,28 +714,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -895,23 +895,23 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !6 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -975,28 +975,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1060,23 +1060,23 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !15 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1140,28 +1140,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1225,43 +1225,43 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], %struct.SS* [[TMP0]]), !llvm.access.group !21 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP17]], 122 // CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK2: cond.true5: // CHECK2-NEXT: br label [[COND_END7:%.*]] // CHECK2: cond.false6: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 // CHECK2-NEXT: br label [[COND_END7]] // CHECK2: cond.end7: // CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP18]], [[COND_FALSE6]] ] -// CHECK2-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK2-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1325,28 +1325,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1506,21 +1506,21 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !7 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1582,27 +1582,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1666,21 +1666,21 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !16 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1742,27 +1742,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1826,41 +1826,41 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]], %struct.SS* [[TMP0]]), !llvm.access.group !22 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], 122 // CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK3: cond.true5: // CHECK3-NEXT: br label [[COND_END7:%.*]] // CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK3-NEXT: br label [[COND_END7]] // CHECK3: cond.end7: // CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP16]], [[COND_FALSE6]] ] -// CHECK3-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK3-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1922,27 +1922,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2102,21 +2102,21 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !7 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2178,27 +2178,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2262,21 +2262,21 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !16 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2338,27 +2338,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2422,41 +2422,41 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 123 // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP7]], i32 [[TMP8]], %struct.SS* [[TMP0]]), !llvm.access.group !22 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[TMP10]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP15]], 122 // CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK4: cond.true5: // CHECK4-NEXT: br label [[COND_END7:%.*]] // CHECK4: cond.false6: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK4-NEXT: br label [[COND_END7]] // CHECK4: cond.end7: // CHECK4-NEXT: [[COND8:%.*]] = phi i32 [ 122, [[COND_TRUE5]] ], [ [[TMP16]], [[COND_FALSE6]] ] -// CHECK4-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK4-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2518,27 +2518,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3370,23 +3370,23 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3483,27 +3483,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3606,23 +3606,23 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !18 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3719,27 +3719,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3858,51 +3858,51 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !24 // CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]) +// CHECK9-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]), !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] // CHECK9: cond.true12: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !24 // CHECK9-NEXT: br label [[COND_END14:%.*]] // CHECK9: cond.false13: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: br label [[COND_END14]] // CHECK9: cond.end14: // CHECK9-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE12]] ], [ [[TMP32]], [[COND_FALSE13]] ] -// CHECK9-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK9-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4002,27 +4002,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4185,23 +4185,23 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !30 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4265,27 +4265,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4349,23 +4349,23 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !36 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4429,27 +4429,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !39 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4529,47 +4529,47 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !42 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK9-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP20]], 9 // CHECK9-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK9: cond.true6: // CHECK9-NEXT: br label [[COND_END8:%.*]] // CHECK9: cond.false7: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: br label [[COND_END8]] // CHECK9: cond.end8: // CHECK9-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP21]], [[COND_FALSE7]] ] -// CHECK9-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK9-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4636,27 +4636,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -4994,23 +4994,23 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !9 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5107,27 +5107,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK10-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5230,23 +5230,23 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !18 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5343,27 +5343,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK10-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5482,51 +5482,51 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK10-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !24 // CHECK10-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]) +// CHECK10-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !24 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]), !llvm.access.group !24 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK10-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK10-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] // CHECK10-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] // CHECK10: cond.true12: -// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !24 // CHECK10-NEXT: br label [[COND_END14:%.*]] // CHECK10: cond.false13: -// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: br label [[COND_END14]] // CHECK10: cond.end14: // CHECK10-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE12]] ], [ [[TMP32]], [[COND_FALSE13]] ] -// CHECK10-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK10-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5626,27 +5626,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK10-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5809,23 +5809,23 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !30 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5889,27 +5889,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -5973,23 +5973,23 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !36 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -6053,27 +6053,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !39 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -6153,47 +6153,47 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !42 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK10-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !42 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !42 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP20]], 9 // CHECK10-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] // CHECK10: cond.true6: // CHECK10-NEXT: br label [[COND_END8:%.*]] // CHECK10: cond.false7: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: br label [[COND_END8]] // CHECK10: cond.end8: // CHECK10-NEXT: [[COND9:%.*]] = phi i32 [ 9, [[COND_TRUE6]] ], [ [[TMP21]], [[COND_FALSE7]] ] -// CHECK10-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK10-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: store i32 [[TMP22]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -6260,27 +6260,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK10-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -6615,21 +6615,21 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -6724,26 +6724,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -6845,21 +6845,21 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -6954,26 +6954,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7088,48 +7088,48 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]), !llvm.access.group !25 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK11: cond.true11: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK11-NEXT: br label [[COND_END13:%.*]] // CHECK11: cond.false12: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK11-NEXT: br label [[COND_END13]] // CHECK11: cond.end13: // CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] -// CHECK11-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK11-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7226,26 +7226,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7407,21 +7407,21 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !31 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7483,26 +7483,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !34 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7566,21 +7566,21 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !37 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !37 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7642,26 +7642,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !40 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !40 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !40 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7738,44 +7738,44 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !43 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9 // CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK11: cond.true5: // CHECK11-NEXT: br label [[COND_END7:%.*]] // CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 // CHECK11-NEXT: br label [[COND_END7]] // CHECK11: cond.end7: // CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ] -// CHECK11-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK11-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -7839,26 +7839,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !46 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -8193,21 +8193,21 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !10 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -8302,26 +8302,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -8423,21 +8423,21 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !19 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -8532,26 +8532,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -8666,48 +8666,48 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK12-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] // CHECK12-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]) +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]), !llvm.access.group !25 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] // CHECK12-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK12: cond.true11: -// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !25 // CHECK12-NEXT: br label [[COND_END13:%.*]] // CHECK12: cond.false12: -// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK12-NEXT: br label [[COND_END13]] // CHECK12: cond.end13: // CHECK12-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] -// CHECK12-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK12-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -8804,26 +8804,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK12-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -8985,21 +8985,21 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !31 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -9061,26 +9061,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !34 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -9144,21 +9144,21 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !37 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !37 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !37 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -9220,26 +9220,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !40 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !40 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !40 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -9316,44 +9316,44 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], 10 // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !43 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP18]], 9 // CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] // CHECK12: cond.true5: // CHECK12-NEXT: br label [[COND_END7:%.*]] // CHECK12: cond.false6: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 // CHECK12-NEXT: br label [[COND_END7]] // CHECK12: cond.end7: // CHECK12-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP19]], [[COND_FALSE6]] ] -// CHECK12-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK12-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: store i32 [[TMP20]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -9417,26 +9417,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !46 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !46 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp index 80291b5a3aa6..6eb15b78e96f 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -477,33 +477,33 @@ int main() { // CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !5 // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !5 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]) +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]), !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -652,40 +652,40 @@ int main() { // CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] // CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* // CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !9 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !9 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -977,30 +977,30 @@ int main() { // CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !14 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]) +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]), !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1131,37 +1131,37 @@ int main() { // CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP21]] to i64 // CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM11]] // CHECK1-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX12]] to i8* // CHECK1-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false), !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1569,33 +1569,33 @@ int main() { // CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !5 // CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !5 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]) +// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP15]], i64 [[TMP17]], [2 x i32]* [[VEC2]], i64 [[TMP19]], [2 x %struct.S]* [[S_ARR3]], %struct.S* [[VAR5]], i64 [[TMP21]]), !llvm.access.group !5 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1744,40 +1744,40 @@ int main() { // CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP19]] to i64 // CHECK2-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR5]], i64 0, i64 [[IDXPROM10]] // CHECK2-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX11]] to i8* // CHECK2-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !9 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !9 // CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8, !llvm.access.group !9 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2069,30 +2069,30 @@ int main() { // CHECK2-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !14 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]) +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP21:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], [2 x i32]* [[VEC2]], i64 [[TMP20]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP21]]), !llvm.access.group !14 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2223,37 +2223,37 @@ int main() { // CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[CMP10:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP9]], align 8, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP21]] to i64 // CHECK2-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR5]], i64 0, i64 [[IDXPROM11]] // CHECK2-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX12]] to i8* // CHECK2-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false) +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i64 4, i1 false), !llvm.access.group !17 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2653,29 +2653,29 @@ int main() { // CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]) +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]), !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2820,38 +2820,38 @@ int main() { // CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP18]] -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP19]] // CHECK3-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK3-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group !10 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !10 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3139,27 +3139,27 @@ int main() { // CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]), !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3287,35 +3287,35 @@ int main() { // CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP21]] // CHECK3-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* // CHECK3-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false) +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false), !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3715,29 +3715,29 @@ int main() { // CHECK4-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]) +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: store i32 [[TMP16]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: store i32 [[TMP18]], i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[SIVAR_CASTED]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S]*, %struct.S*, i32)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP14]], i32 [[TMP15]], [2 x i32]* [[VEC1]], i32 [[TMP17]], [2 x %struct.S]* [[S_ARR2]], %struct.S* [[VAR4]], i32 [[TMP19]]), !llvm.access.group !6 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3882,38 +3882,38 @@ int main() { // CHECK4-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP18]] -// CHECK4-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[TMP17]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP19]] // CHECK4-NEXT: [[TMP20:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK4-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group !10 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !10 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4201,27 +4201,27 @@ int main() { // CHECK4-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]) +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: store i32 [[TMP17]], i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_CASTED]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP19:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [2 x i32]*, i32, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], [2 x i32]* [[VEC2]], i32 [[TMP18]], [2 x %struct.S.0]* [[S_ARR3]], %struct.S.0* [[TMP19]]), !llvm.access.group !15 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4349,35 +4349,35 @@ int main() { // CHECK4-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP19]] -// CHECK4-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[TMP18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP20:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP21]] // CHECK4-NEXT: [[TMP22:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* // CHECK4-NEXT: [[TMP23:%.*]] = bitcast %struct.S.0* [[TMP20]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false) +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP22]], i8* align 4 [[TMP23]], i32 4, i1 false), !llvm.access.group !18 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK4-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6002,36 +6002,36 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK9-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8, !llvm.access.group !4 // CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]) +// CHECK9-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]), !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -6104,35 +6104,35 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 8, !llvm.access.group !8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 +// CHECK9-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 8, !llvm.access.group !8 // CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8, !llvm.access.group !8 // CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 +// CHECK9-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8, !llvm.access.group !8 // CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8 -// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8, !llvm.access.group !8 +// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -6370,36 +6370,36 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4 +// CHECK10-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP14:%.*]] = load volatile i32, i32* [[TMP13]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK10-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8, !llvm.access.group !4 // CHECK10-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]) +// CHECK10-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]), !llvm.access.group !4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -6472,35 +6472,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK10-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 8, !llvm.access.group !8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 +// CHECK10-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !8 +// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 8, !llvm.access.group !8 // CHECK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK10-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8 +// CHECK10-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8, !llvm.access.group !8 // CHECK10-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK10-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 +// CHECK10-NEXT: store i32* [[TMP13]], i32** [[TMP12]], align 8, !llvm.access.group !8 // CHECK10-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK10-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8 -// CHECK10-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK10-NEXT: store i32* [[CONV2]], i32** [[TMP14]], align 8, !llvm.access.group !8 +// CHECK10-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !8 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK10-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp index 265e94c21ed3..083654213f5e 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp @@ -177,22 +177,22 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -254,22 +254,22 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -329,26 +329,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -410,23 +410,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z9gtid_testv() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -548,22 +548,22 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -625,23 +625,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn4v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -701,26 +701,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !32 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -782,23 +782,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn5v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: call void @_Z3fn5v(), !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -876,35 +876,35 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !38 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -966,23 +966,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn6v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: call void @_Z3fn6v(), !llvm.access.group !41 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1102,22 +1102,22 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !44 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1179,23 +1179,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn1v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK1-NEXT: call void @_Z3fn1v(), !llvm.access.group !47 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1255,26 +1255,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !50 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !50 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1336,23 +1336,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn2v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53 +// CHECK1-NEXT: call void @_Z3fn2v(), !llvm.access.group !53 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1430,35 +1430,35 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !56 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !56 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !56 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !56 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !56 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !56 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1520,23 +1520,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !59 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn3v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !59 +// CHECK1-NEXT: call void @_Z3fn3v(), !llvm.access.group !59 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1626,22 +1626,22 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1703,22 +1703,22 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1778,26 +1778,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1859,23 +1859,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z9gtid_testv() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1997,22 +1997,22 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2074,23 +2074,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn4v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2150,26 +2150,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !32 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !32 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !32 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2231,23 +2231,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn5v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: call void @_Z3fn5v(), !llvm.access.group !35 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2325,35 +2325,35 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !38 // CHECK2-NEXT: br label [[OMP_IF_END:%.*]] // CHECK2: omp_if.else: -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !38 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2415,23 +2415,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn6v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !41 +// CHECK2-NEXT: call void @_Z3fn6v(), !llvm.access.group !41 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2551,22 +2551,22 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !44 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2628,23 +2628,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn1v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK2-NEXT: call void @_Z3fn1v(), !llvm.access.group !47 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2704,26 +2704,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !50 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !50 +// CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !50 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !50 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2785,23 +2785,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn2v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53 +// CHECK2-NEXT: call void @_Z3fn2v(), !llvm.access.group !53 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2879,35 +2879,35 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !56 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !56 // CHECK2-NEXT: br label [[OMP_IF_END:%.*]] // CHECK2: omp_if.else: -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !56 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !56 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !56 +// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !56 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !56 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2969,23 +2969,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !59 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn3v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !59 +// CHECK2-NEXT: call void @_Z3fn3v(), !llvm.access.group !59 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3075,22 +3075,22 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3152,22 +3152,22 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3227,26 +3227,26 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3308,23 +3308,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z9gtid_testv() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3446,22 +3446,22 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3523,23 +3523,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn4v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3781,41 +3781,41 @@ int main() { // CHECK3: omp_if.then: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !35 // CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !35 // CHECK3-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then5: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !35 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !35 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END22:%.*]] @@ -3927,23 +3927,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn6v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !39 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] @@ -4055,23 +4055,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn6v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] @@ -4234,22 +4234,22 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !47 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4311,23 +4311,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn1v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: call void @_Z3fn1v(), !llvm.access.group !50 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4562,35 +4562,35 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !55 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !55 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !55 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !55 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !55 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !55 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4652,23 +4652,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !58 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn3v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !58 +// CHECK3-NEXT: call void @_Z3fn3v(), !llvm.access.group !58 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4758,22 +4758,22 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !11 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4835,22 +4835,22 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4910,26 +4910,26 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !20 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !20 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !20 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4991,23 +4991,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z9gtid_testv() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !23 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5129,22 +5129,22 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !26 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5206,23 +5206,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn4v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: call void @_Z3fn4v(), !llvm.access.group !29 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5464,41 +5464,41 @@ int main() { // CHECK4: omp_if.then: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK4-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !35 // CHECK4-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK4-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK4-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !35 // CHECK4-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then5: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !35 // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !35 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !35 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !35 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !35 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END22:%.*]] @@ -5610,23 +5610,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn6v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK4-NEXT: call void @_Z3fn6v(), !llvm.access.group !39 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] @@ -5738,23 +5738,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn6v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK4-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] @@ -5917,22 +5917,22 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !47 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5994,23 +5994,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn1v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 +// CHECK4-NEXT: call void @_Z3fn1v(), !llvm.access.group !50 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6245,35 +6245,35 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !55 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !55 // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK4-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !55 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !55 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !55 +// CHECK4-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !55 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !55 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6335,23 +6335,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !58 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn3v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !58 +// CHECK4-NEXT: call void @_Z3fn3v(), !llvm.access.group !58 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6393,22 +6393,22 @@ int main() { // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6418,23 +6418,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK5-NEXT: call void @_Z9gtid_testv() +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6468,23 +6468,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: call void @_Z3fn4v() +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6494,23 +6494,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK5-NEXT: call void @_Z3fn5v() +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: call void @_Z3fn5v(), !llvm.access.group !12 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6524,23 +6524,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK5: omp.inner.for.cond21: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 // CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK5: omp.inner.for.body23: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK5-NEXT: call void @_Z3fn6v() +// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK5: omp.body.continue26: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK5: omp.inner.for.inc27: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK5: omp.inner.for.end29: // CHECK5-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6576,23 +6576,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: call void @_Z3fn1v() +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6602,23 +6602,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK5-NEXT: call void @_Z3fn2v() +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: call void @_Z3fn2v(), !llvm.access.group !21 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6632,23 +6632,23 @@ int main() { // CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK5: omp.inner.for.cond21: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK5: omp.inner.for.body23: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK5-NEXT: call void @_Z3fn3v() +// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: call void @_Z3fn3v(), !llvm.access.group !24 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK5: omp.body.continue26: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK5: omp.inner.for.inc27: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK5: omp.inner.for.end29: // CHECK5-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6674,22 +6674,22 @@ int main() { // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6699,23 +6699,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK6-NEXT: call void @_Z9gtid_testv() +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6749,23 +6749,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: call void @_Z3fn4v() +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6775,23 +6775,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK6-NEXT: call void @_Z3fn5v() +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: call void @_Z3fn5v(), !llvm.access.group !12 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6805,23 +6805,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK6: omp.inner.for.cond21: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 // CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK6: omp.inner.for.body23: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK6-NEXT: call void @_Z3fn6v() +// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK6: omp.body.continue26: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK6: omp.inner.for.inc27: -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK6: omp.inner.for.end29: // CHECK6-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6857,23 +6857,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: call void @_Z3fn1v() +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6883,23 +6883,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK6-NEXT: call void @_Z3fn2v() +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: call void @_Z3fn2v(), !llvm.access.group !21 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6913,23 +6913,23 @@ int main() { // CHECK6-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK6: omp.inner.for.cond21: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK6: omp.inner.for.body23: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK6-NEXT: call void @_Z3fn3v() +// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: call void @_Z3fn3v(), !llvm.access.group !24 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK6: omp.body.continue26: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK6: omp.inner.for.inc27: -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK6: omp.inner.for.end29: // CHECK6-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6955,22 +6955,22 @@ int main() { // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -6980,23 +6980,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK7: omp.inner.for.cond7: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 // CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK7: omp.inner.for.body9: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK7-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK7-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK7-NEXT: call void @_Z9gtid_testv() +// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK7: omp.body.continue12: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK7: omp.inner.for.inc13: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK7-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK7: omp.inner.for.end15: // CHECK7-NEXT: store i32 100, i32* [[I6]], align 4 @@ -7030,23 +7030,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: call void @_Z3fn4v() +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -7090,23 +7090,23 @@ int main() { // CHECK7: omp_if.then: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK7: omp.inner.for.cond22: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK7-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK7: omp.inner.for.body24: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK7-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK7-NEXT: call void @_Z3fn6v() +// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: call void @_Z3fn6v(), !llvm.access.group !14 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK7: omp.body.continue27: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK7: omp.inner.for.inc28: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK7: omp.inner.for.end30: // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] @@ -7167,23 +7167,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: call void @_Z3fn1v() +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -7223,23 +7223,23 @@ int main() { // CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK7: omp.inner.for.cond21: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !22 // CHECK7-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK7-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK7: omp.inner.for.body23: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK7-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK7-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK7-NEXT: call void @_Z3fn3v() +// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: call void @_Z3fn3v(), !llvm.access.group !22 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK7: omp.body.continue26: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK7: omp.inner.for.inc27: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK7-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK7: omp.inner.for.end29: // CHECK7-NEXT: store i32 100, i32* [[I20]], align 4 @@ -7265,22 +7265,22 @@ int main() { // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7290,23 +7290,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK8: omp.inner.for.cond7: -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 // CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK8: omp.inner.for.body9: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK8-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK8-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK8-NEXT: call void @_Z9gtid_testv() +// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK8: omp.body.continue12: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK8: omp.inner.for.inc13: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK8-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK8: omp.inner.for.end15: // CHECK8-NEXT: store i32 100, i32* [[I6]], align 4 @@ -7340,23 +7340,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: call void @_Z3fn4v() +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7400,23 +7400,23 @@ int main() { // CHECK8: omp_if.then: // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK8: omp.inner.for.cond22: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK8-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK8: omp.inner.for.body24: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK8-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK8-NEXT: call void @_Z3fn6v() +// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: call void @_Z3fn6v(), !llvm.access.group !14 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK8: omp.body.continue27: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK8: omp.inner.for.inc28: -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK8: omp.inner.for.end30: // CHECK8-NEXT: br label [[OMP_IF_END:%.*]] @@ -7477,23 +7477,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: call void @_Z3fn1v() +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7533,23 +7533,23 @@ int main() { // CHECK8-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK8: omp.inner.for.cond21: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !22 // CHECK8-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK8-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK8: omp.inner.for.body23: -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK8-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK8-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK8-NEXT: call void @_Z3fn3v() +// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: call void @_Z3fn3v(), !llvm.access.group !22 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK8: omp.body.continue26: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK8: omp.inner.for.inc27: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK8-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK8: omp.inner.for.end29: // CHECK8-NEXT: store i32 100, i32* [[I20]], align 4 @@ -7623,22 +7623,22 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7700,22 +7700,22 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7775,26 +7775,26 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7856,23 +7856,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z9gtid_testv() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7994,22 +7994,22 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8071,23 +8071,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn4v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK9-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8147,26 +8147,26 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8228,23 +8228,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn5v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK9-NEXT: call void @_Z3fn5v(), !llvm.access.group !39 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8322,35 +8322,35 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !42 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !42 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !42 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8412,23 +8412,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn6v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK9-NEXT: call void @_Z3fn6v(), !llvm.access.group !45 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8548,22 +8548,22 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !48 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8625,23 +8625,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn1v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51 +// CHECK9-NEXT: call void @_Z3fn1v(), !llvm.access.group !51 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8701,26 +8701,26 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8782,23 +8782,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn2v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK9-NEXT: call void @_Z3fn2v(), !llvm.access.group !57 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8876,35 +8876,35 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !60 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !60 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !60 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !60 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !60 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !60 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8966,23 +8966,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !63 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn3v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !63 +// CHECK9-NEXT: call void @_Z3fn3v(), !llvm.access.group !63 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9072,22 +9072,22 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9149,22 +9149,22 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9224,26 +9224,26 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9305,23 +9305,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z9gtid_testv() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9443,22 +9443,22 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9520,23 +9520,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn4v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK10-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9596,26 +9596,26 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9677,23 +9677,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn5v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK10-NEXT: call void @_Z3fn5v(), !llvm.access.group !39 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9771,35 +9771,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !42 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !42 // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !42 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !42 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !42 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !42 // CHECK10-NEXT: br label [[OMP_IF_END]] // CHECK10: omp_if.end: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9861,23 +9861,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn6v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK10-NEXT: call void @_Z3fn6v(), !llvm.access.group !45 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9997,22 +9997,22 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !48 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10074,23 +10074,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn1v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51 +// CHECK10-NEXT: call void @_Z3fn1v(), !llvm.access.group !51 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10150,26 +10150,26 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !54 +// CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10231,23 +10231,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn2v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK10-NEXT: call void @_Z3fn2v(), !llvm.access.group !57 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10325,35 +10325,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !60 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !60 // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !60 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !60 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !60 +// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !60 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !60 // CHECK10-NEXT: br label [[OMP_IF_END]] // CHECK10: omp_if.end: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10415,23 +10415,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !63 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn3v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !63 +// CHECK10-NEXT: call void @_Z3fn3v(), !llvm.access.group !63 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10521,22 +10521,22 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10598,22 +10598,22 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10673,26 +10673,26 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10754,23 +10754,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z9gtid_testv() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10892,22 +10892,22 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10969,23 +10969,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn4v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11227,41 +11227,41 @@ int main() { // CHECK11: omp_if.then: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !39 // CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !39 // CHECK11-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then5: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !39 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !39 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END22:%.*]] @@ -11373,23 +11373,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn6v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] @@ -11501,23 +11501,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn6v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !47 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] @@ -11680,22 +11680,22 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !51 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11757,23 +11757,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn1v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: call void @_Z3fn1v(), !llvm.access.group !54 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12008,35 +12008,35 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !59 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !59 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !59 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !59 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !59 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !59 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12098,23 +12098,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn3v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 +// CHECK11-NEXT: call void @_Z3fn3v(), !llvm.access.group !62 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12204,22 +12204,22 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !15 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12281,22 +12281,22 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12356,26 +12356,26 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !24 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !24 +// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !24 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !24 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12437,23 +12437,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z9gtid_testv() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK12-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !27 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12575,22 +12575,22 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12652,23 +12652,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn4v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: call void @_Z3fn4v(), !llvm.access.group !33 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12910,41 +12910,41 @@ int main() { // CHECK12: omp_if.then: // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK12-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !39 // CHECK12-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK12-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK12-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK12-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK12-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !39 // CHECK12-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then5: -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !39 // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK12-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !39 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !39 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !39 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !39 // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END22:%.*]] @@ -13056,23 +13056,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn6v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK12-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] @@ -13184,23 +13184,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn6v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK12-NEXT: call void @_Z3fn6v(), !llvm.access.group !47 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] @@ -13363,22 +13363,22 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !51 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13440,23 +13440,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn1v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !54 +// CHECK12-NEXT: call void @_Z3fn1v(), !llvm.access.group !54 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13691,35 +13691,35 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !59 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..16 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !59 // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK12-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK12-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !59 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !59 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4, !llvm.access.group !59 +// CHECK12-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !59 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !59 // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13781,23 +13781,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn3v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 +// CHECK12-NEXT: call void @_Z3fn3v(), !llvm.access.group !62 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13839,22 +13839,22 @@ int main() { // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -13864,23 +13864,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK13-NEXT: call void @_Z9gtid_testv() +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -13914,23 +13914,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: call void @_Z3fn4v() +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -13940,23 +13940,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK13-NEXT: call void @_Z3fn5v() +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: call void @_Z3fn5v(), !llvm.access.group !16 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -13970,23 +13970,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK13: omp.inner.for.cond21: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 // CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK13: omp.inner.for.body23: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK13-NEXT: call void @_Z3fn6v() +// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK13: omp.body.continue26: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK13: omp.inner.for.inc27: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK13: omp.inner.for.end29: // CHECK13-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14022,23 +14022,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: call void @_Z3fn1v() +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -14048,23 +14048,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK13-NEXT: call void @_Z3fn2v() +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: call void @_Z3fn2v(), !llvm.access.group !25 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14078,23 +14078,23 @@ int main() { // CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK13: omp.inner.for.cond21: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK13: omp.inner.for.body23: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK13-NEXT: call void @_Z3fn3v() +// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: call void @_Z3fn3v(), !llvm.access.group !28 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK13: omp.body.continue26: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK13: omp.inner.for.inc27: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK13: omp.inner.for.end29: // CHECK13-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14120,22 +14120,22 @@ int main() { // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -14145,23 +14145,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK14-NEXT: call void @_Z9gtid_testv() +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14195,23 +14195,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: call void @_Z3fn4v() +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -14221,23 +14221,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK14-NEXT: call void @_Z3fn5v() +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: call void @_Z3fn5v(), !llvm.access.group !16 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14251,23 +14251,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK14: omp.inner.for.cond21: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 // CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK14: omp.inner.for.body23: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK14-NEXT: call void @_Z3fn6v() +// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK14: omp.body.continue26: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK14: omp.inner.for.inc27: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK14: omp.inner.for.end29: // CHECK14-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14303,23 +14303,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: call void @_Z3fn1v() +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -14329,23 +14329,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK14-NEXT: call void @_Z3fn2v() +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: call void @_Z3fn2v(), !llvm.access.group !25 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14359,23 +14359,23 @@ int main() { // CHECK14-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK14: omp.inner.for.cond21: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK14: omp.inner.for.body23: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK14-NEXT: call void @_Z3fn3v() +// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: call void @_Z3fn3v(), !llvm.access.group !28 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK14: omp.body.continue26: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK14: omp.inner.for.inc27: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK14: omp.inner.for.end29: // CHECK14-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14401,22 +14401,22 @@ int main() { // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14426,23 +14426,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK15: omp.inner.for.cond7: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 // CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK15: omp.inner.for.body9: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK15-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK15-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK15-NEXT: call void @_Z9gtid_testv() +// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 +// CHECK15-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK15: omp.body.continue12: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK15: omp.inner.for.inc13: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK15-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK15: omp.inner.for.end15: // CHECK15-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14476,23 +14476,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: call void @_Z3fn4v() +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK15-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14536,23 +14536,23 @@ int main() { // CHECK15: omp_if.then: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK15: omp.inner.for.cond22: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !18 // CHECK15-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK15: omp.inner.for.body24: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK15-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK15-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK15-NEXT: call void @_Z3fn6v() +// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: call void @_Z3fn6v(), !llvm.access.group !18 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK15: omp.body.continue27: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK15: omp.inner.for.inc28: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK15-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK15: omp.inner.for.end30: // CHECK15-NEXT: br label [[OMP_IF_END:%.*]] @@ -14613,23 +14613,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: call void @_Z3fn1v() +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14669,23 +14669,23 @@ int main() { // CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK15: omp.inner.for.cond21: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !26 // CHECK15-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK15: omp.inner.for.body23: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK15-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK15-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK15-NEXT: call void @_Z3fn3v() +// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: call void @_Z3fn3v(), !llvm.access.group !26 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK15: omp.body.continue26: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK15: omp.inner.for.inc27: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK15-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK15: omp.inner.for.end29: // CHECK15-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14711,22 +14711,22 @@ int main() { // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -14736,23 +14736,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK16: omp.inner.for.cond7: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 // CHECK16-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK16: omp.inner.for.body9: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK16-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK16-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 -// CHECK16-NEXT: call void @_Z9gtid_testv() +// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 +// CHECK16-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK16: omp.body.continue12: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK16: omp.inner.for.inc13: -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK16-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 +// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK16: omp.inner.for.end15: // CHECK16-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14786,23 +14786,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: call void @_Z3fn4v() +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK16-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -14846,23 +14846,23 @@ int main() { // CHECK16: omp_if.then: // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK16: omp.inner.for.cond22: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !18 // CHECK16-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK16-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK16: omp.inner.for.body24: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK16-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK16-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 -// CHECK16-NEXT: call void @_Z3fn6v() +// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: call void @_Z3fn6v(), !llvm.access.group !18 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK16: omp.body.continue27: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK16: omp.inner.for.inc28: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK16-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK16: omp.inner.for.end30: // CHECK16-NEXT: br label [[OMP_IF_END:%.*]] @@ -14923,23 +14923,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: call void @_Z3fn1v() +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -14979,23 +14979,23 @@ int main() { // CHECK16-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK16: omp.inner.for.cond21: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !26 // CHECK16-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK16-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK16: omp.inner.for.body23: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK16-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK16-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 -// CHECK16-NEXT: call void @_Z3fn3v() +// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: call void @_Z3fn3v(), !llvm.access.group !26 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK16: omp.body.continue26: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK16: omp.inner.for.inc27: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK16-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK16: omp.inner.for.end29: // CHECK16-NEXT: store i32 100, i32* [[I20]], align 4 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp index 36fb55d14161..935fa2fdc3a1 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp @@ -229,24 +229,24 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group !9 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -307,26 +307,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK1-NEXT: invoke void @_Z3foov() -// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !13 // CHECK1: invoke.cont: // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -343,7 +343,7 @@ int main() { // CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null // CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9:[0-9]+]] +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9:[0-9]+]], !llvm.access.group !13 // CHECK1-NEXT: unreachable // // @@ -411,26 +411,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !18 // CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !18 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -491,26 +491,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 // CHECK1-NEXT: invoke void @_Z3foov() -// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !21 // CHECK1: invoke.cont: // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -527,7 +527,7 @@ int main() { // CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null // CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !21 // CHECK1-NEXT: unreachable // // @@ -671,24 +671,24 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group !24 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -749,26 +749,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK1-NEXT: invoke void @_Z3foov() -// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !27 // CHECK1: invoke.cont: // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -785,7 +785,7 @@ int main() { // CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null // CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !27 // CHECK1-NEXT: unreachable // // @@ -832,24 +832,24 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group !30 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -910,26 +910,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK1-NEXT: invoke void @_Z3foov() -// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !33 // CHECK1: invoke.cont: // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -946,7 +946,7 @@ int main() { // CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null // CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !33 // CHECK1-NEXT: unreachable // // @@ -993,24 +993,24 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group !36 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !36 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1071,26 +1071,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK1-NEXT: invoke void @_Z3foov() -// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !39 // CHECK1: invoke.cont: // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1107,7 +1107,7 @@ int main() { // CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null // CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !39 // CHECK1-NEXT: unreachable // // @@ -1184,26 +1184,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !42 // CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !42 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK1-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !42 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1264,26 +1264,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 // CHECK1-NEXT: invoke void @_Z3foov() -// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !45 // CHECK1: invoke.cont: // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1300,7 +1300,7 @@ int main() { // CHECK1-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK1-NEXT: catch i8* null // CHECK1-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK1-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !45 // CHECK1-NEXT: unreachable // // @@ -1465,24 +1465,24 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group !9 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !9 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1543,26 +1543,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK2-NEXT: invoke void @_Z3foov() -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !13 // CHECK2: invoke.cont: // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1579,7 +1579,7 @@ int main() { // CHECK2-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK2-NEXT: catch i8* null // CHECK2-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9:[0-9]+]] +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9:[0-9]+]], !llvm.access.group !13 // CHECK2-NEXT: unreachable // // @@ -1647,26 +1647,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !18 // CHECK2-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !18 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !18 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1727,26 +1727,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 // CHECK2-NEXT: invoke void @_Z3foov() -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !21 // CHECK2: invoke.cont: // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1763,7 +1763,7 @@ int main() { // CHECK2-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK2-NEXT: catch i8* null // CHECK2-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !21 // CHECK2-NEXT: unreachable // // @@ -1907,24 +1907,24 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group !24 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !24 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1985,26 +1985,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK2-NEXT: invoke void @_Z3foov() -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !27 // CHECK2: invoke.cont: // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2021,7 +2021,7 @@ int main() { // CHECK2-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK2-NEXT: catch i8* null // CHECK2-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !27 // CHECK2-NEXT: unreachable // // @@ -2068,24 +2068,24 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group !30 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2146,26 +2146,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK2-NEXT: invoke void @_Z3foov() -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !33 // CHECK2: invoke.cont: // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2182,7 +2182,7 @@ int main() { // CHECK2-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK2-NEXT: catch i8* null // CHECK2-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !33 // CHECK2-NEXT: unreachable // // @@ -2229,24 +2229,24 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group !36 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !36 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2307,26 +2307,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK2-NEXT: invoke void @_Z3foov() -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !39 // CHECK2: invoke.cont: // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2343,7 +2343,7 @@ int main() { // CHECK2-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK2-NEXT: catch i8* null // CHECK2-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !39 // CHECK2-NEXT: unreachable // // @@ -2420,26 +2420,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !42 // CHECK2-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !42 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK2-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !42 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2500,26 +2500,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 // CHECK2-NEXT: invoke void @_Z3foov() -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !45 // CHECK2: invoke.cont: // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2536,7 +2536,7 @@ int main() { // CHECK2-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK2-NEXT: catch i8* null // CHECK2-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !45 // CHECK2-NEXT: unreachable // // @@ -3399,24 +3399,24 @@ int main() { // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group !9 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -3477,26 +3477,26 @@ int main() { // CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK5-NEXT: invoke void @_Z3foov() -// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !13 // CHECK5: invoke.cont: // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -3513,7 +3513,7 @@ int main() { // CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null // CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9:[0-9]+]] +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9:[0-9]+]], !llvm.access.group !13 // CHECK5-NEXT: unreachable // // @@ -3581,26 +3581,26 @@ int main() { // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !18 // CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !18 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -3661,26 +3661,26 @@ int main() { // CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 // CHECK5-NEXT: invoke void @_Z3foov() -// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !21 // CHECK5: invoke.cont: // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -3697,7 +3697,7 @@ int main() { // CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null // CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !21 // CHECK5-NEXT: unreachable // // @@ -3832,24 +3832,24 @@ int main() { // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group !24 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !24 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -3910,26 +3910,26 @@ int main() { // CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK5-NEXT: invoke void @_Z3foov() -// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !27 // CHECK5: invoke.cont: // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -3946,7 +3946,7 @@ int main() { // CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null // CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !27 // CHECK5-NEXT: unreachable // // @@ -3993,24 +3993,24 @@ int main() { // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group !30 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4071,26 +4071,26 @@ int main() { // CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK5-NEXT: invoke void @_Z3foov() -// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !33 // CHECK5: invoke.cont: // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4107,7 +4107,7 @@ int main() { // CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null // CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !33 // CHECK5-NEXT: unreachable // // @@ -4154,24 +4154,24 @@ int main() { // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group !36 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !36 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4232,26 +4232,26 @@ int main() { // CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK5-NEXT: invoke void @_Z3foov() -// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !39 // CHECK5: invoke.cont: // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4268,7 +4268,7 @@ int main() { // CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null // CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !39 // CHECK5-NEXT: unreachable // // @@ -4345,26 +4345,26 @@ int main() { // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !42 // CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !42 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK5-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !42 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4425,26 +4425,26 @@ int main() { // CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 // CHECK5-NEXT: invoke void @_Z3foov() -// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !45 // CHECK5: invoke.cont: // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4461,7 +4461,7 @@ int main() { // CHECK5-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK5-NEXT: catch i8* null // CHECK5-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !45 // CHECK5-NEXT: unreachable // // @@ -4635,24 +4635,24 @@ int main() { // CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2) -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 2), !llvm.access.group !9 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !9 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -4713,26 +4713,26 @@ int main() { // CHECK6-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK6-NEXT: invoke void @_Z3foov() -// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !13 // CHECK6: invoke.cont: // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -4749,7 +4749,7 @@ int main() { // CHECK6-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK6-NEXT: catch i8* null // CHECK6-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9:[0-9]+]] +// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9:[0-9]+]], !llvm.access.group !13 // CHECK6-NEXT: unreachable // // @@ -4817,26 +4817,26 @@ int main() { // CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !18 // CHECK6-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !18 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]) +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !18 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -4897,26 +4897,26 @@ int main() { // CHECK6-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 // CHECK6-NEXT: invoke void @_Z3foov() -// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !21 // CHECK6: invoke.cont: // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -4933,7 +4933,7 @@ int main() { // CHECK6-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK6-NEXT: catch i8* null // CHECK6-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !21 // CHECK6-NEXT: unreachable // // @@ -5068,24 +5068,24 @@ int main() { // CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5) -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 5), !llvm.access.group !24 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !24 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5146,26 +5146,26 @@ int main() { // CHECK6-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK6-NEXT: invoke void @_Z3foov() -// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !27 // CHECK6: invoke.cont: // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5182,7 +5182,7 @@ int main() { // CHECK6-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK6-NEXT: catch i8* null // CHECK6-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !27 // CHECK6-NEXT: unreachable // // @@ -5229,24 +5229,24 @@ int main() { // CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23) -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 23), !llvm.access.group !30 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK6-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !30 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5307,26 +5307,26 @@ int main() { // CHECK6-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK6-NEXT: invoke void @_Z3foov() -// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !33 // CHECK6: invoke.cont: // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5343,7 +5343,7 @@ int main() { // CHECK6-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK6-NEXT: catch i8* null // CHECK6-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !33 // CHECK6-NEXT: unreachable // // @@ -5390,24 +5390,24 @@ int main() { // CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1) -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 1), !llvm.access.group !36 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK6-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !36 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5468,26 +5468,26 @@ int main() { // CHECK6-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK6-NEXT: invoke void @_Z3foov() -// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !39 // CHECK6: invoke.cont: // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5504,7 +5504,7 @@ int main() { // CHECK6-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK6-NEXT: catch i8* null // CHECK6-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !39 // CHECK6-NEXT: unreachable // // @@ -5581,26 +5581,26 @@ int main() { // CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !42 // CHECK6-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 -// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !42 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK6-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]) +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !42 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5661,26 +5661,26 @@ int main() { // CHECK6-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 // CHECK6-NEXT: invoke void @_Z3foov() -// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK6-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !llvm.access.group !45 // CHECK6: invoke.cont: // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5697,7 +5697,7 @@ int main() { // CHECK6-NEXT: [[TMP13:%.*]] = landingpad { i8*, i32 } // CHECK6-NEXT: catch i8* null // CHECK6-NEXT: [[TMP14:%.*]] = extractvalue { i8*, i32 } [[TMP13]], 0 -// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]] +// CHECK6-NEXT: call void @__clang_call_terminate(i8* [[TMP14]]) #[[ATTR9]], !llvm.access.group !45 // CHECK6-NEXT: unreachable // // diff --git a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp index 9f432be90370..36d65ef6e984 100644 --- a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp @@ -438,40 +438,40 @@ int main() { // CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i64 0, i64 [[IDXPROM8]] // CHECK1-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX9]] to i8* // CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR5]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !5 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !5 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 8, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -795,37 +795,37 @@ int main() { // CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i64 0, i64 [[IDXPROM9]] // CHECK1-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[ARRAYIDX10]] to i8* // CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S.0* [[TMP18]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1244,40 +1244,40 @@ int main() { // CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP17]] to i64 // CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i64 0, i64 [[IDXPROM8]] // CHECK2-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX9]] to i8* // CHECK2-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR5]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !5 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !5 // CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 8, !llvm.access.group !5 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1601,37 +1601,37 @@ int main() { // CHECK2-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP19]] to i64 // CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i64 0, i64 [[IDXPROM9]] // CHECK2-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[ARRAYIDX10]] to i8* // CHECK2-NEXT: [[TMP21:%.*]] = bitcast %struct.S.0* [[TMP18]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !11 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2042,38 +2042,38 @@ int main() { // CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP17]] // CHECK3-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK3-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false), !llvm.access.group !6 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2393,35 +2393,35 @@ int main() { // CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP17]] -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP19]] // CHECK3-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* // CHECK3-NEXT: [[TMP21:%.*]] = bitcast %struct.S.0* [[TMP18]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2832,38 +2832,38 @@ int main() { // CHECK4-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC1]], i32 0, i32 [[TMP16]] -// CHECK4-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR2]], i32 0, i32 [[TMP17]] // CHECK4-NEXT: [[TMP18:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK4-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR4]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false) -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4 +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i32 4, i1 false), !llvm.access.group !6 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[SIVAR_ADDR]], align 4, !llvm.access.group !6 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3183,35 +3183,35 @@ int main() { // CHECK4-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK4: omp.inner.for.cond.cleanup: // CHECK4-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[T_VAR_ADDR]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i32 0, i32 [[TMP17]] -// CHECK4-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[TMP16]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP18:%.*]] = load %struct.S.0*, %struct.S.0** [[_TMP7]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR3]], i32 0, i32 [[TMP19]] // CHECK4-NEXT: [[TMP20:%.*]] = bitcast %struct.S.0* [[ARRAYIDX9]] to i8* // CHECK4-NEXT: [[TMP21:%.*]] = bitcast %struct.S.0* [[TMP18]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false) +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i32 4, i1 false), !llvm.access.group !12 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK4-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4843,35 +4843,35 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: store volatile i32 1, i32* [[TMP8]], align 4 -// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: store volatile i32 1, i32* [[TMP8]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 8, !llvm.access.group !4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8 +// CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8, !llvm.access.group !4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK9-NEXT: store i32* [[TMP11]], i32** [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: store i32* [[TMP11]], i32** [[TMP10]], align 8, !llvm.access.group !4 // CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP12]], align 8 -// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP12]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK9-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -5107,35 +5107,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK10-NEXT: store volatile i32 1, i32* [[TMP8]], align 4 -// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: store volatile i32 1, i32* [[TMP8]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 8, !llvm.access.group !4 // CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK10-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8 +// CHECK10-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8, !llvm.access.group !4 // CHECK10-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP]], align 8 -// CHECK10-NEXT: store i32* [[TMP11]], i32** [[TMP10]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: store i32* [[TMP11]], i32** [[TMP10]], align 8, !llvm.access.group !4 // CHECK10-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK10-NEXT: store i32* [[CONV2]], i32** [[TMP12]], align 8 -// CHECK10-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]) +// CHECK10-NEXT: store i32* [[CONV2]], i32** [[TMP12]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(24) [[REF_TMP]]), !llvm.access.group !4 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK10-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: diff --git a/clang/test/OpenMP/teams_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_firstprivate_codegen.cpp index 1866f264dea7..7ec4418794e3 100644 --- a/clang/test/OpenMP/teams_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_firstprivate_codegen.cpp @@ -5258,4 +5258,3 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK20-NEXT: call void @__tgt_register_requires(i64 1) // CHECK20-NEXT: ret void // -// \ No newline at end of file From a364e8f6adeb4b42296413b6112a8bdfbd2ea116 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Wed, 22 Dec 2021 11:24:06 -0500 Subject: [PATCH 058/293] [NFC][Clang] Move function implementation of `OpenMPAtomicUpdateChecker` into anonymous namespace Just to keep code consistent as `OpenMPAtomicUpdateChecker` is defined in anonymous namespace. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D116068 --- clang/lib/Sema/SemaOpenMP.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 5ecfe7fca55c..a5962ec3b1d9 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -10754,7 +10754,6 @@ class OpenMPAtomicUpdateChecker { bool checkBinaryOperation(BinaryOperator *AtomicBinOp, unsigned DiagId = 0, unsigned NoteId = 0); }; -} // namespace bool OpenMPAtomicUpdateChecker::checkBinaryOperation( BinaryOperator *AtomicBinOp, unsigned DiagId, unsigned NoteId) { @@ -10915,6 +10914,7 @@ bool OpenMPAtomicUpdateChecker::checkStatement(Stmt *S, unsigned DiagId, } return ErrorFound != NoError; } +} // namespace StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, Stmt *AStmt, From ea22fdd120aeb1bbb9ea96670d70193dc02b2c5f Mon Sep 17 00:00:00 2001 From: Jeremy Morse Date: Wed, 22 Dec 2021 16:13:29 +0000 Subject: [PATCH 059/293] [Clang][DebugInfo] Cease turning instruction-referencing off by default Over in D114631 I turned this debug-info feature on by default, for x86_64 only. I'd previously stripped out the clang cc1 option that controlled it in 651122fc4ac, unfortunately that turned out to not be completely effective, and the two things deleted in this patch continued to keep it off-by-default. Oooff. As a follow-up, this patch removes the last few things to do with ValueTrackingVariableLocations from clang, which was the original purpose of D114631. In an ideal world, if this patch causes you trouble you'd revert 3c045070882f instead, which was where this behaviour was supposed to start being the default, although that might not be practical any more. --- clang/include/clang/Basic/CodeGenOptions.def | 3 --- clang/lib/CodeGen/BackendUtil.cpp | 2 -- 2 files changed, 5 deletions(-) diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 94b3003a9c33..723302f108e2 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -368,9 +368,6 @@ ENUM_CODEGENOPT(DebuggerTuning, llvm::DebuggerKind, 3, /// emitted. VALUE_CODEGENOPT(DwarfVersion, 3, 0) -/// Whether to use experimental new variable location tracking. -CODEGENOPT(ValueTrackingVariableLocations, 1, 0) - /// Whether we should emit CodeView debug information. It's possible to emit /// CodeView and DWARF into the same object. CODEGENOPT(EmitCodeView, 1, 0) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 5e16d3525b38..bacac0a20d4d 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -603,8 +603,6 @@ static bool initTargetOptions(DiagnosticsEngine &Diags, Options.ForceDwarfFrameSection = CodeGenOpts.ForceDwarfFrameSection; Options.EmitCallSiteInfo = CodeGenOpts.EmitCallSiteInfo; Options.EnableAIXExtendedAltivecABI = CodeGenOpts.EnableAIXExtendedAltivecABI; - Options.ValueTrackingVariableLocations = - CodeGenOpts.ValueTrackingVariableLocations; Options.XRayOmitFunctionIndex = CodeGenOpts.XRayOmitFunctionIndex; Options.LoopAlignment = CodeGenOpts.LoopAlignment; From 09b53296cf1649d6f953e71d1c3cd970ad74dde8 Mon Sep 17 00:00:00 2001 From: Ron Lieberman Date: Wed, 22 Dec 2021 11:39:28 -0500 Subject: [PATCH 060/293] Revert "[AMDGPU] Move call clobbered return address registers s[30:31] to callee saved range" This reverts commit 9075009d1fd5f2bf9aa6c2f362d2993691a316b3. Failed amdgpu runtime buildbot # 3514 --- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 38 +- llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td | 8 +- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 1 + llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 3 + llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td | 6 +- llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp | 3 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 46 +- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 2 +- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 17 - llvm/lib/Target/AMDGPU/SIInstructions.td | 2 +- llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp | 13 +- llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 17 + llvm/lib/Target/AMDGPU/SOPInstructions.td | 7 +- .../combine-fma-add-mul-post-legalize.mir | 456 ++- .../combine-fma-add-mul-pre-legalize.mir | 456 ++- .../combine-shift-imm-chain-illegal-types.mir | 22 +- .../CodeGen/AMDGPU/GlobalISel/dummy-target.ll | 92 +- .../CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll | 6 +- .../CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll | 105 +- llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll | 14 +- llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll | 22 +- llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll | 34 +- .../AMDGPU/GlobalISel/function-returns.ll | 1771 ++++++----- .../CodeGen/AMDGPU/GlobalISel/global-value.ll | 40 +- .../GlobalISel/irtranslator-atomicrmw.ll | 16 +- .../irtranslator-call-abi-attribute-hints.ll | 86 +- .../irtranslator-call-implicit-args.ll | 176 +- .../GlobalISel/irtranslator-call-non-fixed.ll | 50 +- .../irtranslator-call-return-values.ll | 80 +- .../AMDGPU/GlobalISel/irtranslator-call.ll | 418 +-- .../GlobalISel/irtranslator-constantexpr.ll | 18 +- .../GlobalISel/irtranslator-constrained-fp.ll | 116 +- ...translator-fixed-function-abi-vgpr-args.ll | 6 +- .../GlobalISel/irtranslator-function-args.ll | 704 +++-- .../GlobalISel/irtranslator-getelementptr.ll | 64 +- .../GlobalISel/irtranslator-indirect-call.ll | 10 +- .../GlobalISel/irtranslator-inline-asm.ll | 180 +- .../irtranslator-memory-intrinsics.ll | 90 +- .../GlobalISel/irtranslator-metadata.ll | 5 +- .../AMDGPU/GlobalISel/irtranslator-ptrmask.ll | 48 +- .../AMDGPU/GlobalISel/irtranslator-sat.ll | 144 +- .../GlobalISel/irtranslator-sibling-call.ll | 493 +-- .../GlobalISel/irtranslator-tail-call.ll | 39 +- .../AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll | 60 +- .../AMDGPU/GlobalISel/load-unaligned.ll | 28 +- .../GlobalISel/regbankcombiner-smed3.mir | 120 +- .../GlobalISel/regbankcombiner-umed3.mir | 120 +- .../CodeGen/AMDGPU/GlobalISel/roundeven.ll | 10 +- .../test/CodeGen/AMDGPU/GlobalISel/saddsat.ll | 22 +- .../test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll | 22 +- .../test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll | 22 +- .../test/CodeGen/AMDGPU/GlobalISel/usubsat.ll | 22 +- .../abi-attribute-hints-undefined-behavior.ll | 10 +- llvm/test/CodeGen/AMDGPU/amdpal-callable.ll | 10 +- .../test/CodeGen/AMDGPU/branch-relax-spill.ll | 294 +- .../AMDGPU/call-graph-register-usage.ll | 5 +- .../AMDGPU/call-preserved-registers.ll | 29 +- .../test/CodeGen/AMDGPU/callee-frame-setup.ll | 73 +- .../callee-special-input-sgprs-fixed-abi.ll | 2 +- .../AMDGPU/cross-block-use-is-not-abi-copy.ll | 40 +- llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll | 74 +- .../AMDGPU/fix-frame-ptr-reg-copy-livein.ll | 10 +- llvm/test/CodeGen/AMDGPU/fpow.ll | 48 +- ...frame-setup-without-sgpr-to-vgpr-spills.ll | 41 +- llvm/test/CodeGen/AMDGPU/fshr.ll | 16 +- .../AMDGPU/gfx-callable-argument-types.ll | 2812 ++++++++--------- .../gfx-callable-preserved-registers.ll | 224 +- .../AMDGPU/gfx-callable-return-types.ll | 298 +- llvm/test/CodeGen/AMDGPU/indirect-call.ll | 1796 ++++++----- llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll | 24 +- llvm/test/CodeGen/AMDGPU/ipra.ll | 25 +- .../AMDGPU/llvm.amdgcn.implicitarg.ptr.ll | 4 - llvm/test/CodeGen/AMDGPU/llvm.powi.ll | 12 +- .../CodeGen/AMDGPU/mul24-pass-ordering.ll | 22 +- .../AMDGPU/need-fp-from-vgpr-spills.ll | 8 +- llvm/test/CodeGen/AMDGPU/nested-calls.ll | 6 +- .../CodeGen/AMDGPU/no-remat-indirect-mov.mir | 4 +- .../AMDGPU/no-source-locations-in-prologue.ll | 8 +- .../AMDGPU/reserve-vgpr-for-sgpr-spill.ll | 8 +- llvm/test/CodeGen/AMDGPU/save-fp.ll | 8 +- llvm/test/CodeGen/AMDGPU/sibling-call.ll | 16 +- .../CodeGen/AMDGPU/splitkit-copy-bundle.mir | 8 +- llvm/test/CodeGen/AMDGPU/stack-realign.ll | 7 +- llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll | 8 +- llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll | 8 +- llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll | 8 +- .../CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll | 25 +- llvm/test/CodeGen/AMDGPU/udiv.ll | 72 +- llvm/test/CodeGen/AMDGPU/udiv64.ll | 46 +- .../AMDGPU/unstructured-cfg-def-use-issue.ll | 44 +- .../AMDGPU/urem-seteq-illegal-types.ll | 15 +- llvm/test/CodeGen/AMDGPU/urem64.ll | 6 +- .../CodeGen/AMDGPU/vgpr-tuple-allocation.ll | 34 +- llvm/test/CodeGen/AMDGPU/wave32.ll | 4 +- .../test/CodeGen/AMDGPU/wwm-reserved-spill.ll | 191 +- .../CodeGen/MIR/AMDGPU/machine-metadata.mir | 24 +- .../CodeGen/MIR/AMDGPU/stack-id-assert.mir | 11 +- 97 files changed, 6961 insertions(+), 5827 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 8041e2993c11..2f1e7823f65c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -337,6 +337,7 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val, FunctionLoweringInfo &FLI) const { MachineFunction &MF = B.getMF(); + MachineRegisterInfo &MRI = MF.getRegInfo(); SIMachineFunctionInfo *MFI = MF.getInfo(); MFI->setIfReturnsVoid(!Val); @@ -352,15 +353,40 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B, const Value *Val, return true; } - unsigned ReturnOpc = - IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::SI_RETURN; + auto const &ST = MF.getSubtarget(); + + unsigned ReturnOpc = 0; + if (IsShader) + ReturnOpc = AMDGPU::SI_RETURN_TO_EPILOG; + else if (CC == CallingConv::AMDGPU_Gfx) + ReturnOpc = AMDGPU::S_SETPC_B64_return_gfx; + else + ReturnOpc = AMDGPU::S_SETPC_B64_return; + auto Ret = B.buildInstrNoInsert(ReturnOpc); + Register ReturnAddrVReg; + if (ReturnOpc == AMDGPU::S_SETPC_B64_return) { + ReturnAddrVReg = MRI.createVirtualRegister(&AMDGPU::CCR_SGPR_64RegClass); + Ret.addUse(ReturnAddrVReg); + } else if (ReturnOpc == AMDGPU::S_SETPC_B64_return_gfx) { + ReturnAddrVReg = + MRI.createVirtualRegister(&AMDGPU::Gfx_CCR_SGPR_64RegClass); + Ret.addUse(ReturnAddrVReg); + } if (!FLI.CanLowerReturn) insertSRetStores(B, Val->getType(), VRegs, FLI.DemoteRegister); else if (!lowerReturnVal(B, Val, VRegs, Ret)) return false; + if (ReturnOpc == AMDGPU::S_SETPC_B64_return || + ReturnOpc == AMDGPU::S_SETPC_B64_return_gfx) { + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + Register LiveInReturn = MF.addLiveIn(TRI->getReturnAddressReg(MF), + &AMDGPU::SGPR_64RegClass); + B.buildCopy(ReturnAddrVReg, LiveInReturn); + } + // TODO: Handle CalleeSavedRegsViaCopy. B.insertInstr(Ret); @@ -575,6 +601,14 @@ bool AMDGPUCallLowering::lowerFormalArguments( SmallVector ArgLocs; CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext()); + if (!IsEntryFunc) { + Register ReturnAddrReg = TRI->getReturnAddressReg(MF); + Register LiveInReturn = MF.addLiveIn(ReturnAddrReg, + &AMDGPU::SGPR_64RegClass); + MBB.addLiveIn(ReturnAddrReg); + B.buildCopy(LiveInReturn, ReturnAddrReg); + } + if (Info->hasImplicitBufferPtr()) { Register ImplicitBufferPtrReg = Info->addImplicitBufferPtr(*TRI); MF.addLiveIn(ImplicitBufferPtrReg, &AMDGPU::SGPR_64RegClass); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td index dec227777d8f..1682d43ae671 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -152,10 +152,6 @@ def CSR_AMDGPU_AGPRs_32_255 : CalleeSavedRegs< (sequence "AGPR%u", 32, 255) >; -def CSR_AMDGPU_SGPRs_30_31 : CalleeSavedRegs< - (sequence "SGPR%u", 30, 31) ->; - def CSR_AMDGPU_SGPRs_32_105 : CalleeSavedRegs< (sequence "SGPR%u", 32, 105) >; @@ -186,7 +182,7 @@ def CSR_AMDGPU_AllAllocatableSRegs : CalleeSavedRegs< >; def CSR_AMDGPU_HighRegs : CalleeSavedRegs< - (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SGPRs_30_31, CSR_AMDGPU_SGPRs_32_105) + (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SGPRs_32_105) >; def CSR_AMDGPU_HighRegs_With_AGPRs : CalleeSavedRegs< @@ -194,7 +190,7 @@ def CSR_AMDGPU_HighRegs_With_AGPRs : CalleeSavedRegs< >; def CSR_AMDGPU_SI_Gfx : CalleeSavedRegs< - (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SI_Gfx_SGPRs_4_29, CSR_AMDGPU_SGPRs_30_31, CSR_AMDGPU_SI_Gfx_SGPRs_64_105) + (add CSR_AMDGPU_VGPRs, CSR_AMDGPU_SI_Gfx_SGPRs_4_29, CSR_AMDGPU_SI_Gfx_SGPRs_64_105) >; def CSR_AMDGPU_SI_Gfx_With_AGPRs : CalleeSavedRegs< diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index d662bb1d9af9..54177564afbc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4398,6 +4398,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(TC_RETURN) NODE_NAME_CASE(TRAP) NODE_NAME_CASE(RET_FLAG) + NODE_NAME_CASE(RET_GFX_FLAG) NODE_NAME_CASE(RETURN_TO_EPILOG) NODE_NAME_CASE(ENDPGM) NODE_NAME_CASE(DWORDADDR) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 3dee2922501d..daaca8737c5d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -367,6 +367,9 @@ enum NodeType : unsigned { // Return with values from a non-entry function. RET_FLAG, + // Return with values from a non-entry function (AMDGPU_Gfx CC). + RET_GFX_FLAG, + DWORDADDR, FRACT, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 23b8fcf75f16..391dc8428539 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -355,7 +355,11 @@ def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone, def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone, +def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] +>; + +def AMDGPUret_gfx_flag : SDNode<"AMDGPUISD::RET_GFX_FLAG", SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] >; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index d05536ca3894..3fad7e192195 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -120,7 +120,8 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { // FIXME: Should be able to handle this with emitPseudoExpansionLowering. We // need to select it to the subtarget specific version, and there's no way to // do that with a single pseudo source operation. - if (Opcode == AMDGPU::S_SETPC_B64_return) + if (Opcode == AMDGPU::S_SETPC_B64_return || + Opcode == AMDGPU::S_SETPC_B64_return_gfx) Opcode = AMDGPU::S_SETPC_B64; else if (Opcode == AMDGPU::SI_CALL) { // SI_CALL is just S_SWAPPC_B64 with an additional operand to track the diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 77937c48a9f1..9f138136e6e9 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2618,6 +2618,24 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, SmallVector RetOps; RetOps.push_back(Chain); // Operand #0 = Chain (updated below) + // Add return address for callable functions. + if (!Info->isEntryFunction()) { + const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo(); + SDValue ReturnAddrReg = CreateLiveInRegister( + DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64); + + SDValue ReturnAddrVirtualReg = + DAG.getRegister(MF.getRegInfo().createVirtualRegister( + CallConv != CallingConv::AMDGPU_Gfx + ? &AMDGPU::CCR_SGPR_64RegClass + : &AMDGPU::Gfx_CCR_SGPR_64RegClass), + MVT::i64); + Chain = + DAG.getCopyToReg(Chain, DL, ReturnAddrVirtualReg, ReturnAddrReg, Flag); + Flag = Chain.getValue(1); + RetOps.push_back(ReturnAddrVirtualReg); + } + // Copy the result values into the output registers. for (unsigned I = 0, RealRVLocIdx = 0, E = RVLocs.size(); I != E; ++I, ++RealRVLocIdx) { @@ -2674,8 +2692,15 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, RetOps.push_back(Flag); unsigned Opc = AMDGPUISD::ENDPGM; - if (!IsWaveEnd) - Opc = IsShader ? AMDGPUISD::RETURN_TO_EPILOG : AMDGPUISD::RET_FLAG; + if (!IsWaveEnd) { + if (IsShader) + Opc = AMDGPUISD::RETURN_TO_EPILOG; + else if (CallConv == CallingConv::AMDGPU_Gfx) + Opc = AMDGPUISD::RET_GFX_FLAG; + else + Opc = AMDGPUISD::RET_FLAG; + } + return DAG.getNode(Opc, DL, MVT::Other, RetOps); } @@ -3243,6 +3268,21 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, } + SDValue PhysReturnAddrReg; + if (IsTailCall) { + // Since the return is being combined with the call, we need to pass on the + // return address. + + const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo(); + SDValue ReturnAddrReg = CreateLiveInRegister( + DAG, &AMDGPU::SReg_64RegClass, TRI->getReturnAddressReg(MF), MVT::i64); + + PhysReturnAddrReg = DAG.getRegister(TRI->getReturnAddressReg(MF), + MVT::i64); + Chain = DAG.getCopyToReg(Chain, DL, PhysReturnAddrReg, ReturnAddrReg, InFlag); + InFlag = Chain.getValue(1); + } + // We don't usually want to end the call-sequence here because we would tidy // the frame up *after* the call, however in the ABI-changing tail-call case // we've carefully laid out the parameters so that when sp is reset they'll be @@ -3272,6 +3312,8 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI, // this information must travel along with the operation for eventual // consumption by emitEpilogue. Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32)); + + Ops.push_back(PhysReturnAddrReg); } // Add argument registers to the end of the list so that they are known live diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index b1771e1ff084..6fbe5d45ce0a 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -955,8 +955,8 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore( // NOTE: this could be improved with knowledge of all call sites or // with knowledge of the called routines. if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG || - MI.getOpcode() == AMDGPU::SI_RETURN || MI.getOpcode() == AMDGPU::S_SETPC_B64_return || + MI.getOpcode() == AMDGPU::S_SETPC_B64_return_gfx || (MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) { Wait = Wait.combined(AMDGPU::Waitcnt::allZero(ST->hasVscnt())); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 92f6456f70e4..4ce2c8a02194 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2071,23 +2071,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MI.setDesc(get(ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64)); break; } - case AMDGPU::SI_RETURN: { - const MachineFunction *MF = MBB.getParent(); - const GCNSubtarget &ST = MF->getSubtarget(); - const SIRegisterInfo *TRI = ST.getRegisterInfo(); - // Hiding the return address use with SI_RETURN may lead to extra kills in - // the function and missing live-ins. We are fine in practice because callee - // saved register handling ensures the register value is restored before - // RET, but we need the undef flag here to appease the MachineVerifier - // liveness checks. - MachineInstrBuilder MIB = - BuildMI(MBB, MI, DL, get(AMDGPU::S_SETPC_B64_return)) - .addReg(TRI->getReturnAddressReg(*MF), RegState::Undef); - - MIB.copyImplicitOps(MI); - MI.eraseFromParent(); - break; - } } return true; } diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 929d7526d49f..2bbce1d8ba93 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -463,7 +463,7 @@ def SI_RETURN_TO_EPILOG : SPseudoInstSI < // Return for returning function calls. def SI_RETURN : SPseudoInstSI < - (outs), (ins), [(AMDGPUret_flag)], + (outs), (ins), [], "; return"> { let isTerminator = 1; let isBarrier = 1; diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp index ccbdefe034d6..55196fe334e6 100644 --- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp @@ -79,8 +79,6 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock, const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - const GCNSubtarget &ST = MF.getSubtarget(); - const SIRegisterInfo *RI = ST.getRegisterInfo(); MachineBasicBlock::iterator I = SaveBlock.begin(); if (!TFI->spillCalleeSavedRegisters(SaveBlock, I, CSI, TRI)) { @@ -91,8 +89,8 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock, MCRegister Reg = CS.getReg(); MachineInstrSpan MIS(I, &SaveBlock); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( - Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(Reg, MVT::i32); // If this value was already livein, we probably have a direct use of the // incoming register value, so don't kill at the spill point. This happens @@ -121,8 +119,7 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock, const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - const GCNSubtarget &ST = MF.getSubtarget(); - const SIRegisterInfo *RI = ST.getRegisterInfo(); + // Restore all registers immediately before the return and any // terminators that precede it. MachineBasicBlock::iterator I = RestoreBlock.getFirstTerminator(); @@ -131,8 +128,8 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock, if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) { for (const CalleeSavedInfo &CI : reverse(CSI)) { unsigned Reg = CI.getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass( - Reg, Reg == RI->getReturnAddressReg(MF) ? MVT::i64 : MVT::i32); + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(Reg, MVT::i32); TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI); assert(I != RestoreBlock.begin() && diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 110cfeb09eb5..340e2b48e5cd 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -701,6 +701,23 @@ def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16], let HasSGPR = 1; } +// CCR (call clobbered registers) SGPR 64-bit registers +def CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, + (add (trunc SGPR_64, 16))> { + let CopyCost = SGPR_64.CopyCost; + let AllocationPriority = SGPR_64.AllocationPriority; + let HasSGPR = 1; +} + +// Call clobbered 64-bit SGPRs for AMDGPU_Gfx CC +def Gfx_CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, + (add (trunc (shl SGPR_64, 15), 1), // s[30:31] + (trunc (shl SGPR_64, 18), 14))> { // s[36:37]-s[s62:63] + let CopyCost = SGPR_64.CopyCost; + let AllocationPriority = SGPR_64.AllocationPriority; + let HasSGPR = 1; +} + def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32, (add TTMP_64Regs)> { let isAllocatable = 0; diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 0ca679cd506c..1713586dcf5b 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -152,8 +152,8 @@ class SOP1_64_0 pattern=[]> : SOP1_Pseudo < } // 64-bit input, no output -class SOP1_1 pattern=[]> : SOP1_Pseudo < - opName, (outs), (ins SReg_64:$src0), "$src0", pattern> { +class SOP1_1 pattern=[]> : SOP1_Pseudo < + opName, (outs), (ins rc:$src0), "$src0", pattern> { let has_sdst = 0; } @@ -300,7 +300,8 @@ def S_SETPC_B64 : SOP1_1 <"s_setpc_b64">; let isReturn = 1 in { // Define variant marked as return rather than branch. -def S_SETPC_B64_return : SOP1_1<"">; +def S_SETPC_B64_return : SOP1_1<"", CCR_SGPR_64, [(AMDGPUret_flag i64:$src0)]>; +def S_SETPC_B64_return_gfx : SOP1_1<"", Gfx_CCR_SGPR_64, [(AMDGPUret_gfx_flag i64:$src0)]>; } } // End isTerminator = 1, isBarrier = 1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir index 0c48b0e3585d..3c6c6523e100 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir @@ -12,157 +12,193 @@ name: test_f32_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2 + liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_f32_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] ; GFX9: $vgpr0 = COPY [[FADD]](s32) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_f32_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] ; GFX9-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-LABEL: name: test_f32_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] ; GFX10: $vgpr0 = COPY [[FADD]](s32) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_f32_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] ; GFX10-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 + %3:sgpr_64 = COPY $sgpr30_sgpr31 %4:_(s32) = G_FMUL %0, %1 %5:_(s32) = G_FADD %4, %2 $vgpr0 = COPY %5(s32) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %6:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %6, implicit $vgpr0 ... --- name: test_f32_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2 + liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_f32_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] ; GFX9: $vgpr0 = COPY [[FADD]](s32) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] ; GFX9-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-LABEL: name: test_f32_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX10: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] ; GFX10: $vgpr0 = COPY [[FADD]](s32) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[COPY1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] ; GFX10-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 + %3:sgpr_64 = COPY $sgpr30_sgpr31 %4:_(s32) = G_FMUL %0, %1 %5:_(s32) = G_FADD %2, %4 $vgpr0 = COPY %5(s32) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %6:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %6, implicit $vgpr0 ... --- name: test_half_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2 + liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_half_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -171,11 +207,13 @@ body: | ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC2]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_half_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -183,10 +221,12 @@ body: | ; GFX9-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_half_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -194,11 +234,13 @@ body: | ; GFX9-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC2]] ; GFX9-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_half_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -206,10 +248,12 @@ body: | ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-LABEL: name: test_half_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -217,11 +261,13 @@ body: | ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC2]] ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_half_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -229,10 +275,12 @@ body: | ; GFX10-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_half_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -240,11 +288,13 @@ body: | ; GFX10-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[FMUL]], [[TRUNC2]] ; GFX10-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_half_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -252,28 +302,32 @@ body: | ; GFX10-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %4:_(s32) = COPY $vgpr0 %0:_(s16) = G_TRUNC %4(s32) %5:_(s32) = COPY $vgpr1 %1:_(s16) = G_TRUNC %5(s32) %6:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %6(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 %7:_(s16) = G_FMUL %0, %1 %8:_(s16) = G_FADD %7, %2 %10:_(s32) = G_ANYEXT %8(s16) $vgpr0 = COPY %10(s32) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %9:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %9, implicit $vgpr0 ... --- name: test_half_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2 + liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_half_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -282,11 +336,13 @@ body: | ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FMUL]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_half_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -294,10 +350,12 @@ body: | ; GFX9-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -305,11 +363,13 @@ body: | ; GFX9-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FMUL]] ; GFX9-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_half_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -317,10 +377,12 @@ body: | ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-LABEL: name: test_half_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -328,11 +390,13 @@ body: | ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FMUL]] ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_half_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -340,10 +404,12 @@ body: | ; GFX10-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -351,11 +417,13 @@ body: | ; GFX10-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FMUL]] ; GFX10-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_half_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -363,28 +431,32 @@ body: | ; GFX10-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %4:_(s32) = COPY $vgpr0 %0:_(s16) = G_TRUNC %4(s32) %5:_(s32) = COPY $vgpr1 %1:_(s16) = G_TRUNC %5(s32) %6:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %6(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 %7:_(s16) = G_FMUL %0, %1 %8:_(s16) = G_FADD %2, %7 %10:_(s32) = G_ANYEXT %8(s16) $vgpr0 = COPY %10(s32) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %9:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %9, implicit $vgpr0 ... --- name: test_double_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_double_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -396,12 +468,14 @@ body: | ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX9: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[MV2]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_double_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -412,11 +486,13 @@ body: | ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_double_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -427,12 +503,14 @@ body: | ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[MV2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_double_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -443,11 +521,13 @@ body: | ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_double_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -458,12 +538,14 @@ body: | ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX10: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[MV2]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_double_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -474,11 +556,13 @@ body: | ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_double_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -489,12 +573,14 @@ body: | ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[FMUL]], [[MV2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_double_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -505,11 +591,13 @@ body: | ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) @@ -519,19 +607,21 @@ body: | %8:_(s32) = COPY $vgpr4 %9:_(s32) = COPY $vgpr5 %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 %10:_(s64) = G_FMUL %0, %1 %11:_(s64) = G_FADD %10, %2 %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) $vgpr0 = COPY %13(s32) $vgpr1 = COPY %14(s32) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %12:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 ... --- name: test_double_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_double_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -543,12 +633,14 @@ body: | ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX9: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[MV2]], [[FMUL]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_double_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -559,11 +651,13 @@ body: | ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -574,12 +668,14 @@ body: | ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[MV2]], [[FMUL]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_double_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -590,11 +686,13 @@ body: | ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_double_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -605,12 +703,14 @@ body: | ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX10: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[MV2]], [[FMUL]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_double_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -621,11 +721,13 @@ body: | ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -636,12 +738,14 @@ body: | ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[MV]], [[MV1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[MV2]], [[FMUL]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_double_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -652,11 +756,13 @@ body: | ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) @@ -666,19 +772,21 @@ body: | %8:_(s32) = COPY $vgpr4 %9:_(s32) = COPY $vgpr5 %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 %10:_(s64) = G_FMUL %0, %1 %11:_(s64) = G_FADD %2, %10 %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) $vgpr0 = COPY %13(s32) $vgpr1 = COPY %14(s32) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %12:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 ... --- name: test_4xfloat_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_4xfloat_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -696,6 +804,7 @@ body: | ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -703,7 +812,8 @@ body: | ; GFX9: $vgpr1 = COPY [[UV1]](s32) ; GFX9: $vgpr2 = COPY [[UV2]](s32) ; GFX9: $vgpr3 = COPY [[UV3]](s32) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX9: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX9-CONTRACT-LABEL: name: test_4xfloat_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -720,6 +830,7 @@ body: | ; GFX9-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-CONTRACT: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -727,7 +838,8 @@ body: | ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-CONTRACT: $vgpr2 = COPY [[UV2]](s32) ; GFX9-CONTRACT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9-CONTRACT: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX9-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -744,6 +856,7 @@ body: | ; GFX9-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-DENORM: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -751,7 +864,8 @@ body: | ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX9-DENORM: $vgpr2 = COPY [[UV2]](s32) ; GFX9-DENORM: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9-DENORM: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX9-UNSAFE-LABEL: name: test_4xfloat_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -768,6 +882,7 @@ body: | ; GFX9-UNSAFE: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9-UNSAFE: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-UNSAFE: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-UNSAFE: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -775,7 +890,8 @@ body: | ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX9-UNSAFE: $vgpr2 = COPY [[UV2]](s32) ; GFX9-UNSAFE: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9-UNSAFE: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: test_4xfloat_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -792,6 +908,7 @@ body: | ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -799,7 +916,8 @@ body: | ; GFX10: $vgpr1 = COPY [[UV1]](s32) ; GFX10: $vgpr2 = COPY [[UV2]](s32) ; GFX10: $vgpr3 = COPY [[UV3]](s32) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX10: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-CONTRACT-LABEL: name: test_4xfloat_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -816,6 +934,7 @@ body: | ; GFX10-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-CONTRACT: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -823,7 +942,8 @@ body: | ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-CONTRACT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-CONTRACT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-CONTRACT: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -840,6 +960,7 @@ body: | ; GFX10-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-DENORM: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -847,7 +968,8 @@ body: | ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM: $vgpr2 = COPY [[UV2]](s32) ; GFX10-DENORM: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-DENORM: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-UNSAFE-LABEL: name: test_4xfloat_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -864,6 +986,7 @@ body: | ; GFX10-UNSAFE: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10-UNSAFE: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-UNSAFE: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-UNSAFE: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -871,7 +994,8 @@ body: | ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX10-UNSAFE: $vgpr2 = COPY [[UV2]](s32) ; GFX10-UNSAFE: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-UNSAFE: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -887,6 +1011,7 @@ body: | %14:_(s32) = COPY $vgpr10 %15:_(s32) = COPY $vgpr11 %2:_(<4 x s32>) = G_BUILD_VECTOR %12(s32), %13(s32), %14(s32), %15(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 %16:_(<4 x s32>) = G_FMUL %0, %1 %17:_(<4 x s32>) = G_FADD %16, %2 %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32) = G_UNMERGE_VALUES %17(<4 x s32>) @@ -894,14 +1019,15 @@ body: | $vgpr1 = COPY %20(s32) $vgpr2 = COPY %21(s32) $vgpr3 = COPY %22(s32) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + %18:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %18, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ... --- name: test_3xfloat_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -916,13 +1042,15 @@ body: | ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) ; GFX9: $vgpr2 = COPY [[UV2]](s32) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX9: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX9-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -936,13 +1064,15 @@ body: | ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-CONTRACT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-CONTRACT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-CONTRACT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX9-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -956,13 +1086,15 @@ body: | ; GFX9-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-DENORM: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX9-DENORM: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-DENORM: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX9-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -976,13 +1108,15 @@ body: | ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9-UNSAFE: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-UNSAFE: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-UNSAFE: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX9-UNSAFE: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-UNSAFE: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -996,13 +1130,15 @@ body: | ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX10: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) ; GFX10: $vgpr2 = COPY [[UV2]](s32) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX10: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1016,13 +1152,15 @@ body: | ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX10-CONTRACT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-CONTRACT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-CONTRACT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1036,13 +1174,15 @@ body: | ; GFX10-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX10-DENORM: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-DENORM: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1056,13 +1196,15 @@ body: | ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10-UNSAFE: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX10-UNSAFE: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-UNSAFE: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX10-UNSAFE: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-UNSAFE: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -1075,20 +1217,22 @@ body: | %11:_(s32) = COPY $vgpr7 %12:_(s32) = COPY $vgpr8 %2:_(<3 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32), %12(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 %13:_(<3 x s32>) = G_FMUL %0, %1 %14:_(<3 x s32>) = G_FADD %2, %13 %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %14(<3 x s32>) $vgpr0 = COPY %16(s32) $vgpr1 = COPY %17(s32) $vgpr2 = COPY %18(s32) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + %15:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %15, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ... --- name: test_4xhalf_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_4xhalf_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -1100,12 +1244,14 @@ body: | ; GFX9: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX9: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_4xhalf_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1116,12 +1262,14 @@ body: | ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-CONTRACT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1132,12 +1280,14 @@ body: | ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-DENORM: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_4xhalf_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1148,12 +1298,14 @@ body: | ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-UNSAFE: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-UNSAFE: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX9-UNSAFE: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_4xhalf_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1164,12 +1316,14 @@ body: | ; GFX10: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX10: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX10: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_4xhalf_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1180,12 +1334,14 @@ body: | ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-CONTRACT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1196,12 +1352,14 @@ body: | ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-DENORM: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_4xhalf_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1212,12 +1370,14 @@ body: | ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-UNSAFE: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-UNSAFE: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX10-UNSAFE: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 %4:_(<2 x s16>) = COPY $vgpr0 %5:_(<2 x s16>) = COPY $vgpr1 %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>) @@ -1227,19 +1387,21 @@ body: | %8:_(<2 x s16>) = COPY $vgpr4 %9:_(<2 x s16>) = COPY $vgpr5 %2:_(<4 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>) + %3:sgpr_64 = COPY $sgpr30_sgpr31 %10:_(<4 x s16>) = G_FMUL %0, %1 %11:_(<4 x s16>) = G_FADD %10, %2 %13:_(<2 x s16>), %14:_(<2 x s16>) = G_UNMERGE_VALUES %11(<4 x s16>) $vgpr0 = COPY %13(<2 x s16>) $vgpr1 = COPY %14(<2 x s16>) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %12:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 ... --- name: test_3xhalf_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -1261,6 +1423,7 @@ body: | ; GFX9: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX9: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX9: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1268,7 +1431,8 @@ body: | ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1289,6 +1453,7 @@ body: | ; GFX9-CONTRACT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9-CONTRACT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX9-CONTRACT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1296,7 +1461,8 @@ body: | ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1317,6 +1483,7 @@ body: | ; GFX9-DENORM: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9-DENORM: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX9-DENORM: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1324,7 +1491,8 @@ body: | ; GFX9-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1345,6 +1513,7 @@ body: | ; GFX9-UNSAFE: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9-UNSAFE: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX9-UNSAFE: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX9-UNSAFE: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1352,7 +1521,8 @@ body: | ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1373,6 +1543,7 @@ body: | ; GFX10: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX10: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX10: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1380,7 +1551,8 @@ body: | ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1401,6 +1573,7 @@ body: | ; GFX10-CONTRACT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10-CONTRACT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX10-CONTRACT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1408,7 +1581,8 @@ body: | ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1429,6 +1603,7 @@ body: | ; GFX10-DENORM: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10-DENORM: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX10-DENORM: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1436,7 +1611,8 @@ body: | ; GFX10-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1457,6 +1633,7 @@ body: | ; GFX10-UNSAFE: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10-UNSAFE: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX10-UNSAFE: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[BITCAST5]], [[FMUL]] ; GFX10-UNSAFE: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1464,7 +1641,8 @@ body: | ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 %4:_(<2 x s16>) = COPY $vgpr0 %5:_(<2 x s16>) = COPY $vgpr1 %10:_(<2 x s16>) = G_IMPLICIT_DEF @@ -1478,6 +1656,7 @@ body: | %9:_(<2 x s16>) = COPY $vgpr5 %15:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %10(<2 x s16>) %2:_(<3 x s16>), %16:_(<3 x s16>) = G_UNMERGE_VALUES %15(<6 x s16>) + %3:sgpr_64 = COPY $sgpr30_sgpr31 %17:_(<3 x s16>) = G_FMUL %0, %1 %18:_(<3 x s16>) = G_FADD %2, %17 %22:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1485,14 +1664,15 @@ body: | %20:_(<2 x s16>), %21:_(<2 x s16>), %24:_(<2 x s16>) = G_UNMERGE_VALUES %23(<6 x s16>) $vgpr0 = COPY %20(<2 x s16>) $vgpr1 = COPY %21(<2 x s16>) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %19:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %19, implicit $vgpr0, implicit $vgpr1 ... --- name: test_4xdouble_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_4xdouble_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -1534,6 +1714,7 @@ body: | ; GFX9: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX9: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1545,7 +1726,8 @@ body: | ; GFX9: $vgpr5 = COPY [[UV5]](s32) ; GFX9: $vgpr6 = COPY [[UV6]](s32) ; GFX9: $vgpr7 = COPY [[UV7]](s32) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX9: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX9-CONTRACT-LABEL: name: test_4xdouble_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1586,6 +1768,7 @@ body: | ; GFX9-CONTRACT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9-CONTRACT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX9-CONTRACT: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1597,7 +1780,8 @@ body: | ; GFX9-CONTRACT: $vgpr5 = COPY [[UV5]](s32) ; GFX9-CONTRACT: $vgpr6 = COPY [[UV6]](s32) ; GFX9-CONTRACT: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9-CONTRACT: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX9-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1638,6 +1822,7 @@ body: | ; GFX9-DENORM: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9-DENORM: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX9-DENORM: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1649,7 +1834,8 @@ body: | ; GFX9-DENORM: $vgpr5 = COPY [[UV5]](s32) ; GFX9-DENORM: $vgpr6 = COPY [[UV6]](s32) ; GFX9-DENORM: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9-DENORM: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX9-UNSAFE-LABEL: name: test_4xdouble_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1690,6 +1876,7 @@ body: | ; GFX9-UNSAFE: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9-UNSAFE: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX9-UNSAFE: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-UNSAFE: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1701,7 +1888,8 @@ body: | ; GFX9-UNSAFE: $vgpr5 = COPY [[UV5]](s32) ; GFX9-UNSAFE: $vgpr6 = COPY [[UV6]](s32) ; GFX9-UNSAFE: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9-UNSAFE: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-LABEL: name: test_4xdouble_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1742,6 +1930,7 @@ body: | ; GFX10: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX10: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1753,7 +1942,8 @@ body: | ; GFX10: $vgpr5 = COPY [[UV5]](s32) ; GFX10: $vgpr6 = COPY [[UV6]](s32) ; GFX10: $vgpr7 = COPY [[UV7]](s32) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX10: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-CONTRACT-LABEL: name: test_4xdouble_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1794,6 +1984,7 @@ body: | ; GFX10-CONTRACT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10-CONTRACT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX10-CONTRACT: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1805,7 +1996,8 @@ body: | ; GFX10-CONTRACT: $vgpr5 = COPY [[UV5]](s32) ; GFX10-CONTRACT: $vgpr6 = COPY [[UV6]](s32) ; GFX10-CONTRACT: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-CONTRACT: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1846,6 +2038,7 @@ body: | ; GFX10-DENORM: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10-DENORM: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX10-DENORM: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1857,7 +2050,8 @@ body: | ; GFX10-DENORM: $vgpr5 = COPY [[UV5]](s32) ; GFX10-DENORM: $vgpr6 = COPY [[UV6]](s32) ; GFX10-DENORM: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-DENORM: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-UNSAFE-LABEL: name: test_4xdouble_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1898,6 +2092,7 @@ body: | ; GFX10-UNSAFE: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10-UNSAFE: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX10-UNSAFE: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-UNSAFE: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1909,7 +2104,8 @@ body: | ; GFX10-UNSAFE: $vgpr5 = COPY [[UV5]](s32) ; GFX10-UNSAFE: $vgpr6 = COPY [[UV6]](s32) ; GFX10-UNSAFE: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-UNSAFE: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -1949,6 +2145,7 @@ body: | %38:_(s64) = G_MERGE_VALUES %24(s32), %25(s32) %39:_(s64) = G_MERGE_VALUES %26(s32), %27(s32) %2:_(<4 x s64>) = G_BUILD_VECTOR %36(s64), %37(s64), %38(s64), %39(s64) + %3:sgpr_64 = COPY $sgpr30_sgpr31 %40:_(<4 x s64>) = G_FMUL %0, %1 %41:_(<4 x s64>) = G_FADD %40, %2 %43:_(s32), %44:_(s32), %45:_(s32), %46:_(s32), %47:_(s32), %48:_(s32), %49:_(s32), %50:_(s32) = G_UNMERGE_VALUES %41(<4 x s64>) @@ -1960,14 +2157,15 @@ body: | $vgpr5 = COPY %48(s32) $vgpr6 = COPY %49(s32) $vgpr7 = COPY %50(s32) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + %42:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %42, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ... --- name: test_3xdouble_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -2000,6 +2198,7 @@ body: | ; GFX9: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX9: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2009,7 +2208,8 @@ body: | ; GFX9: $vgpr3 = COPY [[UV3]](s32) ; GFX9: $vgpr4 = COPY [[UV4]](s32) ; GFX9: $vgpr5 = COPY [[UV5]](s32) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX9: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX9-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2041,6 +2241,7 @@ body: | ; GFX9-CONTRACT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9-CONTRACT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX9-CONTRACT: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2050,7 +2251,8 @@ body: | ; GFX9-CONTRACT: $vgpr3 = COPY [[UV3]](s32) ; GFX9-CONTRACT: $vgpr4 = COPY [[UV4]](s32) ; GFX9-CONTRACT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9-CONTRACT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX9-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2082,6 +2284,7 @@ body: | ; GFX9-DENORM: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9-DENORM: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX9-DENORM: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2091,7 +2294,8 @@ body: | ; GFX9-DENORM: $vgpr3 = COPY [[UV3]](s32) ; GFX9-DENORM: $vgpr4 = COPY [[UV4]](s32) ; GFX9-DENORM: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9-DENORM: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX9-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2123,6 +2327,7 @@ body: | ; GFX9-UNSAFE: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9-UNSAFE: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX9-UNSAFE: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-UNSAFE: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2132,7 +2337,8 @@ body: | ; GFX9-UNSAFE: $vgpr3 = COPY [[UV3]](s32) ; GFX9-UNSAFE: $vgpr4 = COPY [[UV4]](s32) ; GFX9-UNSAFE: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9-UNSAFE: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2164,6 +2370,7 @@ body: | ; GFX10: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX10: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2173,7 +2380,8 @@ body: | ; GFX10: $vgpr3 = COPY [[UV3]](s32) ; GFX10: $vgpr4 = COPY [[UV4]](s32) ; GFX10: $vgpr5 = COPY [[UV5]](s32) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX10: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2205,6 +2413,7 @@ body: | ; GFX10-CONTRACT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10-CONTRACT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX10-CONTRACT: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-CONTRACT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2214,7 +2423,8 @@ body: | ; GFX10-CONTRACT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-CONTRACT: $vgpr4 = COPY [[UV4]](s32) ; GFX10-CONTRACT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-CONTRACT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2246,6 +2456,7 @@ body: | ; GFX10-DENORM: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10-DENORM: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX10-DENORM: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2255,7 +2466,8 @@ body: | ; GFX10-DENORM: $vgpr3 = COPY [[UV3]](s32) ; GFX10-DENORM: $vgpr4 = COPY [[UV4]](s32) ; GFX10-DENORM: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-DENORM: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2287,6 +2499,7 @@ body: | ; GFX10-UNSAFE: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10-UNSAFE: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX10-UNSAFE: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-UNSAFE: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2296,7 +2509,8 @@ body: | ; GFX10-UNSAFE: $vgpr3 = COPY [[UV3]](s32) ; GFX10-UNSAFE: $vgpr4 = COPY [[UV4]](s32) ; GFX10-UNSAFE: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-UNSAFE: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -2327,6 +2541,7 @@ body: | %29:_(s64) = G_MERGE_VALUES %18(s32), %19(s32) %30:_(s64) = G_MERGE_VALUES %20(s32), %21(s32) %2:_(<3 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64) + %3:sgpr_64 = COPY $sgpr30_sgpr31 %31:_(<3 x s64>) = G_FMUL %0, %1 %32:_(<3 x s64>) = G_FADD %2, %31 %34:_(s32), %35:_(s32), %36:_(s32), %37:_(s32), %38:_(s32), %39:_(s32) = G_UNMERGE_VALUES %32(<3 x s64>) @@ -2336,5 +2551,6 @@ body: | $vgpr3 = COPY %37(s32) $vgpr4 = COPY %38(s32) $vgpr5 = COPY %39(s32) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + %33:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %33, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir index d6ef87759e9d..67d0fd31db6b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir @@ -12,157 +12,193 @@ name: test_f32_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2 + liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_f32_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX9: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[FMUL]], [[COPY2]] ; GFX9: $vgpr0 = COPY [[FADD]](s32) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_f32_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[FMUL]], [[COPY2]] ; GFX9-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-LABEL: name: test_f32_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX10: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[FMUL]], [[COPY2]] ; GFX10: $vgpr0 = COPY [[FADD]](s32) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_f32_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[FMUL]], [[COPY2]] ; GFX10-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 + %3:sgpr_64 = COPY $sgpr30_sgpr31 %4:_(s32) = reassoc G_FMUL %0, %1 %5:_(s32) = reassoc G_FADD %4, %2 $vgpr0 = COPY %5(s32) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %6:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %6, implicit $vgpr0 ... --- name: test_f32_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2 + liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_f32_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX9: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY2]], [[FMUL]] ; GFX9: $vgpr0 = COPY [[FADD]](s32) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY2]], [[FMUL]] ; GFX9-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX9-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-LABEL: name: test_f32_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX10: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY2]], [[FMUL]] ; GFX10: $vgpr0 = COPY [[FADD]](s32) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-CONTRACT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s32) = reassoc G_FMUL [[COPY]], [[COPY1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY2]], [[FMUL]] ; GFX10-DENORM: $vgpr0 = COPY [[FADD]](s32) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] ; GFX10-UNSAFE: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 + %3:sgpr_64 = COPY $sgpr30_sgpr31 %4:_(s32) = reassoc G_FMUL %0, %1 %5:_(s32) = reassoc G_FADD %2, %4 $vgpr0 = COPY %5(s32) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %6:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %6, implicit $vgpr0 ... --- name: test_half_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2 + liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_half_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -171,11 +207,13 @@ body: | ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[FMUL]], [[TRUNC2]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_half_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -183,10 +221,12 @@ body: | ; GFX9-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_half_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -194,11 +234,13 @@ body: | ; GFX9-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[FMUL]], [[TRUNC2]] ; GFX9-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_half_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -206,10 +248,12 @@ body: | ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-LABEL: name: test_half_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -217,11 +261,13 @@ body: | ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[FMUL]], [[TRUNC2]] ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_half_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -229,10 +275,12 @@ body: | ; GFX10-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_half_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -240,11 +288,13 @@ body: | ; GFX10-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[FMUL]], [[TRUNC2]] ; GFX10-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_half_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -252,28 +302,32 @@ body: | ; GFX10-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %4:_(s32) = COPY $vgpr0 %0:_(s16) = G_TRUNC %4(s32) %5:_(s32) = COPY $vgpr1 %1:_(s16) = G_TRUNC %5(s32) %6:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %6(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 %7:_(s16) = reassoc G_FMUL %0, %1 %8:_(s16) = reassoc G_FADD %7, %2 %10:_(s32) = G_ANYEXT %8(s16) $vgpr0 = COPY %10(s32) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %9:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %9, implicit $vgpr0 ... --- name: test_half_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2 + liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_half_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -282,11 +336,13 @@ body: | ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-CONTRACT-LABEL: name: test_half_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -294,10 +350,12 @@ body: | ; GFX9-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -305,11 +363,13 @@ body: | ; GFX9-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] ; GFX9-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX9-UNSAFE-LABEL: name: test_half_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -317,10 +377,12 @@ body: | ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX9-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-LABEL: name: test_half_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -328,11 +390,13 @@ body: | ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-CONTRACT-LABEL: name: test_half_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -340,10 +404,12 @@ body: | ; GFX10-CONTRACT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-CONTRACT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-CONTRACT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-CONTRACT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-CONTRACT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -351,11 +417,13 @@ body: | ; GFX10-DENORM: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-DENORM: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-DENORM: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] ; GFX10-DENORM: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-DENORM: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-DENORM: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 ; GFX10-UNSAFE-LABEL: name: test_half_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -363,28 +431,32 @@ body: | ; GFX10-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-UNSAFE: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-UNSAFE: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] ; GFX10-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) ; GFX10-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %4:_(s32) = COPY $vgpr0 %0:_(s16) = G_TRUNC %4(s32) %5:_(s32) = COPY $vgpr1 %1:_(s16) = G_TRUNC %5(s32) %6:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %6(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 %7:_(s16) = reassoc G_FMUL %0, %1 %8:_(s16) = reassoc G_FADD %2, %7 %10:_(s32) = G_ANYEXT %8(s16) $vgpr0 = COPY %10(s32) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %9:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %9, implicit $vgpr0 ... --- name: test_double_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_double_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -396,12 +468,14 @@ body: | ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX9: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[FMUL]], [[MV2]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_double_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -412,11 +486,13 @@ body: | ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_double_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -427,12 +503,14 @@ body: | ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[FMUL]], [[MV2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_double_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -443,11 +521,13 @@ body: | ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_double_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -458,12 +538,14 @@ body: | ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX10: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[FMUL]], [[MV2]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_double_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -474,11 +556,13 @@ body: | ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_double_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -489,12 +573,14 @@ body: | ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[FMUL]], [[MV2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_double_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -505,11 +591,13 @@ body: | ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) @@ -519,19 +607,21 @@ body: | %8:_(s32) = COPY $vgpr4 %9:_(s32) = COPY $vgpr5 %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 %10:_(s64) = reassoc G_FMUL %0, %1 %11:_(s64) = reassoc G_FADD %10, %2 %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) $vgpr0 = COPY %13(s32) $vgpr1 = COPY %14(s32) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %12:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 ... --- name: test_double_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_double_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -543,12 +633,14 @@ body: | ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX9: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[MV2]], [[FMUL]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_double_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -559,11 +651,13 @@ body: | ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -574,12 +668,14 @@ body: | ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[MV2]], [[FMUL]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_double_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -590,11 +686,13 @@ body: | ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX9-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_double_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -605,12 +703,14 @@ body: | ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX10: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[MV2]], [[FMUL]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_double_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -621,11 +721,13 @@ body: | ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-CONTRACT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -636,12 +738,14 @@ body: | ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-DENORM: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(s64) = reassoc G_FMUL [[MV]], [[MV1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(s64) = reassoc G_FADD [[MV2]], [[FMUL]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](s64) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_double_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -652,11 +756,13 @@ body: | ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-UNSAFE: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-UNSAFE: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) @@ -666,19 +772,21 @@ body: | %8:_(s32) = COPY $vgpr4 %9:_(s32) = COPY $vgpr5 %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 %10:_(s64) = reassoc G_FMUL %0, %1 %11:_(s64) = reassoc G_FADD %2, %10 %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) $vgpr0 = COPY %13(s32) $vgpr1 = COPY %14(s32) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %12:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 ... --- name: test_4xfloat_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_4xfloat_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -696,6 +804,7 @@ body: | ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<4 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<4 x s32>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -703,7 +812,8 @@ body: | ; GFX9: $vgpr1 = COPY [[UV1]](s32) ; GFX9: $vgpr2 = COPY [[UV2]](s32) ; GFX9: $vgpr3 = COPY [[UV3]](s32) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX9: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX9-CONTRACT-LABEL: name: test_4xfloat_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -720,13 +830,15 @@ body: | ; GFX9-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-CONTRACT: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-CONTRACT: $vgpr2 = COPY [[UV2]](s32) ; GFX9-CONTRACT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9-CONTRACT: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX9-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -743,6 +855,7 @@ body: | ; GFX9-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-DENORM: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<4 x s32>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -750,7 +863,8 @@ body: | ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX9-DENORM: $vgpr2 = COPY [[UV2]](s32) ; GFX9-DENORM: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9-DENORM: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX9-UNSAFE-LABEL: name: test_4xfloat_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -767,13 +881,15 @@ body: | ; GFX9-UNSAFE: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX9-UNSAFE: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-UNSAFE: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX9-UNSAFE: $vgpr2 = COPY [[UV2]](s32) ; GFX9-UNSAFE: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX9-UNSAFE: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-LABEL: name: test_4xfloat_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -790,6 +906,7 @@ body: | ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<4 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<4 x s32>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -797,7 +914,8 @@ body: | ; GFX10: $vgpr1 = COPY [[UV1]](s32) ; GFX10: $vgpr2 = COPY [[UV2]](s32) ; GFX10: $vgpr3 = COPY [[UV3]](s32) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX10: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-CONTRACT-LABEL: name: test_4xfloat_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -814,13 +932,15 @@ body: | ; GFX10-CONTRACT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10-CONTRACT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-CONTRACT: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-CONTRACT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-CONTRACT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-CONTRACT: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -837,6 +957,7 @@ body: | ; GFX10-DENORM: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10-DENORM: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-DENORM: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<4 x s32>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) @@ -844,7 +965,8 @@ body: | ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM: $vgpr2 = COPY [[UV2]](s32) ; GFX10-DENORM: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-DENORM: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX10-UNSAFE-LABEL: name: test_4xfloat_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -861,13 +983,15 @@ body: | ; GFX10-UNSAFE: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 ; GFX10-UNSAFE: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-UNSAFE: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX10-UNSAFE: $vgpr2 = COPY [[UV2]](s32) ; GFX10-UNSAFE: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX10-UNSAFE: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -883,6 +1007,7 @@ body: | %14:_(s32) = COPY $vgpr10 %15:_(s32) = COPY $vgpr11 %2:_(<4 x s32>) = G_BUILD_VECTOR %12(s32), %13(s32), %14(s32), %15(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 %16:_(<4 x s32>) = reassoc G_FMUL %0, %1 %17:_(<4 x s32>) = reassoc G_FADD %16, %2 %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32) = G_UNMERGE_VALUES %17(<4 x s32>) @@ -890,14 +1015,15 @@ body: | $vgpr1 = COPY %20(s32) $vgpr2 = COPY %21(s32) $vgpr3 = COPY %22(s32) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + %18:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %18, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ... --- name: test_3xfloat_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -912,13 +1038,15 @@ body: | ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX9: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX9: $vgpr0 = COPY [[UV]](s32) ; GFX9: $vgpr1 = COPY [[UV1]](s32) ; GFX9: $vgpr2 = COPY [[UV2]](s32) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX9: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX9-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -932,12 +1060,14 @@ body: | ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-CONTRACT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-CONTRACT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-CONTRACT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX9-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -951,13 +1081,15 @@ body: | ; GFX9-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-DENORM: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX9-DENORM: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-DENORM: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX9-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -971,12 +1103,14 @@ body: | ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX9-UNSAFE: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-UNSAFE: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX9-UNSAFE: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-UNSAFE: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -990,13 +1124,15 @@ body: | ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX10: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX10: $vgpr0 = COPY [[UV]](s32) ; GFX10: $vgpr1 = COPY [[UV1]](s32) ; GFX10: $vgpr2 = COPY [[UV2]](s32) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX10: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1010,12 +1146,14 @@ body: | ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10-CONTRACT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX10-CONTRACT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-CONTRACT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-CONTRACT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1029,13 +1167,15 @@ body: | ; GFX10-DENORM: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10-DENORM: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX10-DENORM: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-DENORM: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX10-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1049,12 +1189,14 @@ body: | ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; GFX10-UNSAFE: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX10-UNSAFE: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](s32) ; GFX10-UNSAFE: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-UNSAFE: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -1067,20 +1209,22 @@ body: | %11:_(s32) = COPY $vgpr7 %12:_(s32) = COPY $vgpr8 %2:_(<3 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32), %12(s32) + %3:sgpr_64 = COPY $sgpr30_sgpr31 %13:_(<3 x s32>) = reassoc G_FMUL %0, %1 %14:_(<3 x s32>) = reassoc G_FADD %2, %13 %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %14(<3 x s32>) $vgpr0 = COPY %16(s32) $vgpr1 = COPY %17(s32) $vgpr2 = COPY %18(s32) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + %15:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %15, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ... --- name: test_4xhalf_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_4xhalf_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -1092,12 +1236,14 @@ body: | ; GFX9: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<4 x s16>) = reassoc G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX9: [[FADD:%[0-9]+]]:_(<4 x s16>) = reassoc G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX9: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_4xhalf_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1108,11 +1254,13 @@ body: | ; GFX9-CONTRACT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-CONTRACT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-CONTRACT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1123,12 +1271,14 @@ body: | ; GFX9-DENORM: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-DENORM: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-DENORM: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s16>) = reassoc G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<4 x s16>) = reassoc G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_4xhalf_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1139,11 +1289,13 @@ body: | ; GFX9-UNSAFE: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-UNSAFE: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX9-UNSAFE: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_4xhalf_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1154,12 +1306,14 @@ body: | ; GFX10: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<4 x s16>) = reassoc G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX10: [[FADD:%[0-9]+]]:_(<4 x s16>) = reassoc G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX10: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_4xhalf_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1170,11 +1324,13 @@ body: | ; GFX10-CONTRACT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-CONTRACT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-CONTRACT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1185,12 +1341,14 @@ body: | ; GFX10-DENORM: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-DENORM: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-DENORM: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s16>) = reassoc G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<4 x s16>) = reassoc G_FADD [[FMUL]], [[CONCAT_VECTORS2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_4xhalf_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1201,11 +1359,13 @@ body: | ; GFX10-UNSAFE: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX10-UNSAFE: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 ; GFX10-UNSAFE: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 %4:_(<2 x s16>) = COPY $vgpr0 %5:_(<2 x s16>) = COPY $vgpr1 %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>) @@ -1215,19 +1375,21 @@ body: | %8:_(<2 x s16>) = COPY $vgpr4 %9:_(<2 x s16>) = COPY $vgpr5 %2:_(<4 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>) + %3:sgpr_64 = COPY $sgpr30_sgpr31 %10:_(<4 x s16>) = reassoc G_FMUL %0, %1 %11:_(<4 x s16>) = reassoc G_FADD %10, %2 %13:_(<2 x s16>), %14:_(<2 x s16>) = G_UNMERGE_VALUES %11(<4 x s16>) $vgpr0 = COPY %13(<2 x s16>) $vgpr1 = COPY %14(<2 x s16>) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %12:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1 ... --- name: test_3xhalf_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 @@ -1249,6 +1411,7 @@ body: | ; GFX9: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX9: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX9: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[BITCAST5]], [[FMUL]] ; GFX9: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1256,7 +1419,8 @@ body: | ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1277,13 +1441,15 @@ body: | ; GFX9-CONTRACT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9-CONTRACT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX9-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[BITCAST1]], [[BITCAST3]], [[BITCAST5]] ; GFX9-CONTRACT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX9-CONTRACT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1304,6 +1470,7 @@ body: | ; GFX9-DENORM: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-DENORM: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9-DENORM: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX9-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[BITCAST5]], [[FMUL]] ; GFX9-DENORM: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1311,7 +1478,8 @@ body: | ; GFX9-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX9-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1332,13 +1500,15 @@ body: | ; GFX9-UNSAFE: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX9-UNSAFE: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX9-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[BITCAST1]], [[BITCAST3]], [[BITCAST5]] ; GFX9-UNSAFE: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX9-UNSAFE: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX9-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1359,6 +1529,7 @@ body: | ; GFX10: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX10: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX10: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[BITCAST5]], [[FMUL]] ; GFX10: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1366,7 +1537,8 @@ body: | ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1387,13 +1559,15 @@ body: | ; GFX10-CONTRACT: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10-CONTRACT: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10-CONTRACT: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX10-CONTRACT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[BITCAST1]], [[BITCAST3]], [[BITCAST5]] ; GFX10-CONTRACT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX10-CONTRACT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-CONTRACT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-CONTRACT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1414,6 +1588,7 @@ body: | ; GFX10-DENORM: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10-DENORM: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10-DENORM: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX10-DENORM: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s16>) = reassoc G_FMUL [[BITCAST1]], [[BITCAST3]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<3 x s16>) = reassoc G_FADD [[BITCAST5]], [[FMUL]] ; GFX10-DENORM: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1421,7 +1596,8 @@ body: | ; GFX10-DENORM: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10-DENORM: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-DENORM: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-DENORM: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 ; GFX10-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1442,13 +1618,15 @@ body: | ; GFX10-UNSAFE: [[BITCAST4:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) ; GFX10-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[BITCAST4]](s96) ; GFX10-UNSAFE: [[BITCAST5:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC2]](s48) + ; GFX10-UNSAFE: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[BITCAST1]], [[BITCAST3]], [[BITCAST5]] ; GFX10-UNSAFE: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GFX10-UNSAFE: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-UNSAFE: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; GFX10-UNSAFE: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 %4:_(<2 x s16>) = COPY $vgpr0 %5:_(<2 x s16>) = COPY $vgpr1 %10:_(<2 x s16>) = G_IMPLICIT_DEF @@ -1462,6 +1640,7 @@ body: | %9:_(<2 x s16>) = COPY $vgpr5 %15:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %10(<2 x s16>) %2:_(<3 x s16>), %16:_(<3 x s16>) = G_UNMERGE_VALUES %15(<6 x s16>) + %3:sgpr_64 = COPY $sgpr30_sgpr31 %17:_(<3 x s16>) = reassoc G_FMUL %0, %1 %18:_(<3 x s16>) = reassoc G_FADD %2, %17 %22:_(<3 x s16>) = G_IMPLICIT_DEF @@ -1469,14 +1648,15 @@ body: | %20:_(<2 x s16>), %21:_(<2 x s16>), %24:_(<2 x s16>) = G_UNMERGE_VALUES %23(<6 x s16>) $vgpr0 = COPY %20(<2 x s16>) $vgpr1 = COPY %21(<2 x s16>) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %19:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %19, implicit $vgpr0, implicit $vgpr1 ... --- name: test_4xdouble_add_mul body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_4xdouble_add_mul ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -1518,6 +1698,7 @@ body: | ; GFX9: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX9: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<4 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<4 x s64>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1529,7 +1710,8 @@ body: | ; GFX9: $vgpr5 = COPY [[UV5]](s32) ; GFX9: $vgpr6 = COPY [[UV6]](s32) ; GFX9: $vgpr7 = COPY [[UV7]](s32) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX9: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX9-CONTRACT-LABEL: name: test_4xdouble_add_mul ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1570,6 +1752,7 @@ body: | ; GFX9-CONTRACT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9-CONTRACT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX9-CONTRACT: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) @@ -1580,7 +1763,8 @@ body: | ; GFX9-CONTRACT: $vgpr5 = COPY [[UV5]](s32) ; GFX9-CONTRACT: $vgpr6 = COPY [[UV6]](s32) ; GFX9-CONTRACT: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9-CONTRACT: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX9-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1621,6 +1805,7 @@ body: | ; GFX9-DENORM: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9-DENORM: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX9-DENORM: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<4 x s64>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1632,7 +1817,8 @@ body: | ; GFX9-DENORM: $vgpr5 = COPY [[UV5]](s32) ; GFX9-DENORM: $vgpr6 = COPY [[UV6]](s32) ; GFX9-DENORM: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9-DENORM: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX9-UNSAFE-LABEL: name: test_4xdouble_add_mul ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1673,6 +1859,7 @@ body: | ; GFX9-UNSAFE: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX9-UNSAFE: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX9-UNSAFE: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) @@ -1683,7 +1870,8 @@ body: | ; GFX9-UNSAFE: $vgpr5 = COPY [[UV5]](s32) ; GFX9-UNSAFE: $vgpr6 = COPY [[UV6]](s32) ; GFX9-UNSAFE: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX9-UNSAFE: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-LABEL: name: test_4xdouble_add_mul ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1724,6 +1912,7 @@ body: | ; GFX10: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX10: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<4 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<4 x s64>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1735,7 +1924,8 @@ body: | ; GFX10: $vgpr5 = COPY [[UV5]](s32) ; GFX10: $vgpr6 = COPY [[UV6]](s32) ; GFX10: $vgpr7 = COPY [[UV7]](s32) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX10: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-CONTRACT-LABEL: name: test_4xdouble_add_mul ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1776,6 +1966,7 @@ body: | ; GFX10-CONTRACT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10-CONTRACT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX10-CONTRACT: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) @@ -1786,7 +1977,8 @@ body: | ; GFX10-CONTRACT: $vgpr5 = COPY [[UV5]](s32) ; GFX10-CONTRACT: $vgpr6 = COPY [[UV6]](s32) ; GFX10-CONTRACT: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-CONTRACT: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1827,6 +2019,7 @@ body: | ; GFX10-DENORM: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10-DENORM: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX10-DENORM: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<4 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<4 x s64>) = reassoc G_FADD [[FMUL]], [[BUILD_VECTOR2]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) @@ -1838,7 +2031,8 @@ body: | ; GFX10-DENORM: $vgpr5 = COPY [[UV5]](s32) ; GFX10-DENORM: $vgpr6 = COPY [[UV6]](s32) ; GFX10-DENORM: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-DENORM: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX10-UNSAFE-LABEL: name: test_4xdouble_add_mul ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1879,6 +2073,7 @@ body: | ; GFX10-UNSAFE: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) ; GFX10-UNSAFE: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX10-UNSAFE: [[COPY24:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) @@ -1889,7 +2084,8 @@ body: | ; GFX10-UNSAFE: $vgpr5 = COPY [[UV5]](s32) ; GFX10-UNSAFE: $vgpr6 = COPY [[UV6]](s32) ; GFX10-UNSAFE: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX10-UNSAFE: [[COPY25:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY24]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY25]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -1929,6 +2125,7 @@ body: | %38:_(s64) = G_MERGE_VALUES %24(s32), %25(s32) %39:_(s64) = G_MERGE_VALUES %26(s32), %27(s32) %2:_(<4 x s64>) = G_BUILD_VECTOR %36(s64), %37(s64), %38(s64), %39(s64) + %3:sgpr_64 = COPY $sgpr30_sgpr31 %40:_(<4 x s64>) = reassoc G_FMUL %0, %1 %41:_(<4 x s64>) = reassoc G_FADD %40, %2 %43:_(s32), %44:_(s32), %45:_(s32), %46:_(s32), %47:_(s32), %48:_(s32), %49:_(s32), %50:_(s32) = G_UNMERGE_VALUES %41(<4 x s64>) @@ -1940,14 +2137,15 @@ body: | $vgpr5 = COPY %48(s32) $vgpr6 = COPY %49(s32) $vgpr7 = COPY %50(s32) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + %42:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %42, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ... --- name: test_3xdouble_add_mul_rhs body: | bb.1.entry: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $sgpr30_sgpr31 ; GFX9-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -1980,6 +2178,7 @@ body: | ; GFX9: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX9: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9: [[FMUL:%[0-9]+]]:_(<3 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9: [[FADD:%[0-9]+]]:_(<3 x s64>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -1989,7 +2188,8 @@ body: | ; GFX9: $vgpr3 = COPY [[UV3]](s32) ; GFX9: $vgpr4 = COPY [[UV4]](s32) ; GFX9: $vgpr5 = COPY [[UV5]](s32) - ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX9: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX9-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2021,6 +2221,7 @@ body: | ; GFX9-CONTRACT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9-CONTRACT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX9-CONTRACT: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) ; GFX9-CONTRACT: $vgpr0 = COPY [[UV]](s32) @@ -2029,7 +2230,8 @@ body: | ; GFX9-CONTRACT: $vgpr3 = COPY [[UV3]](s32) ; GFX9-CONTRACT: $vgpr4 = COPY [[UV4]](s32) ; GFX9-CONTRACT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9-CONTRACT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX9-CONTRACT: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX9-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2061,6 +2263,7 @@ body: | ; GFX9-DENORM: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9-DENORM: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX9-DENORM: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX9-DENORM: [[FADD:%[0-9]+]]:_(<3 x s64>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX9-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2070,7 +2273,8 @@ body: | ; GFX9-DENORM: $vgpr3 = COPY [[UV3]](s32) ; GFX9-DENORM: $vgpr4 = COPY [[UV4]](s32) ; GFX9-DENORM: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9-DENORM: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX9-DENORM: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX9-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2102,6 +2306,7 @@ body: | ; GFX9-UNSAFE: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX9-UNSAFE: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX9-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX9-UNSAFE: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) ; GFX9-UNSAFE: $vgpr0 = COPY [[UV]](s32) @@ -2110,7 +2315,8 @@ body: | ; GFX9-UNSAFE: $vgpr3 = COPY [[UV3]](s32) ; GFX9-UNSAFE: $vgpr4 = COPY [[UV4]](s32) ; GFX9-UNSAFE: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX9-UNSAFE: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX9-UNSAFE: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2142,6 +2348,7 @@ body: | ; GFX10: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX10: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10: [[FMUL:%[0-9]+]]:_(<3 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10: [[FADD:%[0-9]+]]:_(<3 x s64>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2151,7 +2358,8 @@ body: | ; GFX10: $vgpr3 = COPY [[UV3]](s32) ; GFX10: $vgpr4 = COPY [[UV4]](s32) ; GFX10: $vgpr5 = COPY [[UV5]](s32) - ; GFX10: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX10: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-CONTRACT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-CONTRACT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2183,6 +2391,7 @@ body: | ; GFX10-CONTRACT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10-CONTRACT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10-CONTRACT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX10-CONTRACT: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-CONTRACT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-CONTRACT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) ; GFX10-CONTRACT: $vgpr0 = COPY [[UV]](s32) @@ -2191,7 +2400,8 @@ body: | ; GFX10-CONTRACT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-CONTRACT: $vgpr4 = COPY [[UV4]](s32) ; GFX10-CONTRACT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-CONTRACT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-CONTRACT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX10-CONTRACT: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-DENORM: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-DENORM: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2223,6 +2433,7 @@ body: | ; GFX10-DENORM: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10-DENORM: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10-DENORM: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX10-DENORM: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-DENORM: [[FMUL:%[0-9]+]]:_(<3 x s64>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; GFX10-DENORM: [[FADD:%[0-9]+]]:_(<3 x s64>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] ; GFX10-DENORM: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) @@ -2232,7 +2443,8 @@ body: | ; GFX10-DENORM: $vgpr3 = COPY [[UV3]](s32) ; GFX10-DENORM: $vgpr4 = COPY [[UV4]](s32) ; GFX10-DENORM: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-DENORM: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-DENORM: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX10-DENORM: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; GFX10-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2264,6 +2476,7 @@ body: | ; GFX10-UNSAFE: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; GFX10-UNSAFE: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) ; GFX10-UNSAFE: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX10-UNSAFE: [[COPY18:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX10-UNSAFE: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; GFX10-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) ; GFX10-UNSAFE: $vgpr0 = COPY [[UV]](s32) @@ -2272,7 +2485,8 @@ body: | ; GFX10-UNSAFE: $vgpr3 = COPY [[UV3]](s32) ; GFX10-UNSAFE: $vgpr4 = COPY [[UV4]](s32) ; GFX10-UNSAFE: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-UNSAFE: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; GFX10-UNSAFE: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY18]] + ; GFX10-UNSAFE: S_SETPC_B64_return [[COPY19]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -2303,6 +2517,7 @@ body: | %29:_(s64) = G_MERGE_VALUES %18(s32), %19(s32) %30:_(s64) = G_MERGE_VALUES %20(s32), %21(s32) %2:_(<3 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64) + %3:sgpr_64 = COPY $sgpr30_sgpr31 %31:_(<3 x s64>) = reassoc G_FMUL %0, %1 %32:_(<3 x s64>) = reassoc G_FADD %2, %31 %34:_(s32), %35:_(s32), %36:_(s32), %37:_(s32), %38:_(s32), %39:_(s32) = G_UNMERGE_VALUES %32(<3 x s64>) @@ -2312,5 +2527,6 @@ body: | $vgpr3 = COPY %37(s32) $vgpr4 = COPY %38(s32) $vgpr5 = COPY %39(s32) - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + %33:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %33, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir index dcec23c030c4..e271229aa1fd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-imm-chain-illegal-types.mir @@ -283,9 +283,10 @@ body: | liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-LABEL: name: ushlsat_i44 - ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(s44) = G_TRUNC [[MV]](s64) ; CHECK: [[C:%[0-9]+]]:_(s44) = G_CONSTANT i44 22 @@ -295,9 +296,11 @@ body: | ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0, implicit $vgpr1 %2:_(s32) = COPY $vgpr0 %3:_(s32) = COPY $vgpr1 + %1:sgpr_64 = COPY $sgpr30_sgpr31 %4:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) %0:_(s44) = G_TRUNC %4(s64) %5:_(s44) = G_CONSTANT i44 22 @@ -307,7 +310,8 @@ body: | %10:_(s32), %11:_(s32) = G_UNMERGE_VALUES %9(s64) $vgpr0 = COPY %10(s32) $vgpr1 = COPY %11(s32) - S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %8:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %8, implicit $vgpr0, implicit $vgpr1 ... --- @@ -315,12 +319,13 @@ name: ushlsat_i55 tracksRegLiveness: true body: | bb.0: - liveins: $vgpr0, $vgpr1 + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-LABEL: name: ushlsat_i55 - ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK: [[TRUNC:%[0-9]+]]:_(s55) = G_TRUNC [[MV]](s64) ; CHECK: [[C:%[0-9]+]]:_(s55) = G_CONSTANT i55 53 @@ -329,9 +334,11 @@ body: | ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) ; CHECK: $vgpr0 = COPY [[UV]](s32) ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0, implicit $vgpr1 %2:_(s32) = COPY $vgpr0 %3:_(s32) = COPY $vgpr1 + %1:sgpr_64 = COPY $sgpr30_sgpr31 %4:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) %0:_(s55) = G_TRUNC %4(s64) %5:_(s55) = G_CONSTANT i55 50 @@ -342,6 +349,7 @@ body: | %11:_(s32), %12:_(s32) = G_UNMERGE_VALUES %10(s64) $vgpr0 = COPY %11(s32) $vgpr1 = COPY %12(s32) - S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %9:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %9, implicit $vgpr0, implicit $vgpr1 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll index 5dfde116785d..8a957d101912 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dummy-target.ll @@ -6,14 +6,15 @@ define i16 @vop3p_add_i16(i16 %arg0) #0 { ; CHECK-LABEL: name: vop3p_add_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 %add = add i16 %arg0, %arg0 ret i16 %add } @@ -21,27 +22,28 @@ define i16 @vop3p_add_i16(i16 %arg0) #0 { define <2 x i16> @vop3p_add_v2i16(<2 x i16> %arg0) #0 { ; CHECK-LABEL: name: vop3p_add_v2i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC2]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC3]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC2]] + ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC3]] + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 %add = add <2 x i16> %arg0, %arg0 ret <2 x i16> %add } @@ -49,12 +51,13 @@ define <2 x i16> @vop3p_add_v2i16(<2 x i16> %arg0) #0 { define i16 @halfinsts_add_i16(i16 %arg0) #1 { ; CHECK-LABEL: name: halfinsts_add_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] - ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] + ; CHECK: $vgpr0 = COPY [[ADD]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 %add = add i16 %arg0, %arg0 ret i16 %add } @@ -62,15 +65,16 @@ define i16 @halfinsts_add_i16(i16 %arg0) #1 { define <2 x i16> @halfinsts_add_v2i16(<2 x i16> %arg0) #1 { ; CHECK-LABEL: name: halfinsts_add_v2i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ADD1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY1]] + ; CHECK: $vgpr0 = COPY [[ADD]](s32) + ; CHECK: $vgpr1 = COPY [[ADD1]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0, implicit $vgpr1 %add = add <2 x i16> %arg0, %arg0 ret <2 x i16> %add } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll index 140c3ff69407..7cd475bf0098 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll @@ -667,8 +667,8 @@ define <2 x float> @v_fdiv_v2f32(<2 x float> %a, <2 x float> %b) { ; GFX89-IEEE-NEXT: v_fma_f32 v10, v12, v8, v10 ; GFX89-IEEE-NEXT: v_fma_f32 v11, v13, v9, v11 ; GFX89-IEEE-NEXT: v_fma_f32 v4, -v4, v10, v6 -; GFX89-IEEE-NEXT: v_fma_f32 v5, -v5, v11, v7 ; GFX89-IEEE-NEXT: v_div_fmas_f32 v4, v4, v8, v10 +; GFX89-IEEE-NEXT: v_fma_f32 v5, -v5, v11, v7 ; GFX89-IEEE-NEXT: s_mov_b64 vcc, s[4:5] ; GFX89-IEEE-NEXT: v_div_fmas_f32 v5, v5, v9, v11 ; GFX89-IEEE-NEXT: v_div_fixup_f32 v0, v4, v2, v0 @@ -860,8 +860,8 @@ define <2 x float> @v_fdiv_v2f32_ulp25(<2 x float> %a, <2 x float> %b) { ; GFX89-IEEE-NEXT: v_fma_f32 v10, v12, v8, v10 ; GFX89-IEEE-NEXT: v_fma_f32 v11, v13, v9, v11 ; GFX89-IEEE-NEXT: v_fma_f32 v4, -v4, v10, v6 -; GFX89-IEEE-NEXT: v_fma_f32 v5, -v5, v11, v7 ; GFX89-IEEE-NEXT: v_div_fmas_f32 v4, v4, v8, v10 +; GFX89-IEEE-NEXT: v_fma_f32 v5, -v5, v11, v7 ; GFX89-IEEE-NEXT: s_mov_b64 vcc, s[4:5] ; GFX89-IEEE-NEXT: v_div_fmas_f32 v5, v5, v9, v11 ; GFX89-IEEE-NEXT: v_div_fixup_f32 v0, v4, v2, v0 @@ -1451,8 +1451,8 @@ define <2 x float> @v_fdiv_v2f32_arcp_ulp25(<2 x float> %a, <2 x float> %b) { ; GFX89-IEEE-NEXT: v_fma_f32 v10, v12, v8, v10 ; GFX89-IEEE-NEXT: v_fma_f32 v11, v13, v9, v11 ; GFX89-IEEE-NEXT: v_fma_f32 v4, -v4, v10, v6 -; GFX89-IEEE-NEXT: v_fma_f32 v5, -v5, v11, v7 ; GFX89-IEEE-NEXT: v_div_fmas_f32 v4, v4, v8, v10 +; GFX89-IEEE-NEXT: v_fma_f32 v5, -v5, v11, v7 ; GFX89-IEEE-NEXT: s_mov_b64 vcc, s[4:5] ; GFX89-IEEE-NEXT: v_div_fmas_f32 v5, v5, v9, v11 ; GFX89-IEEE-NEXT: v_div_fixup_f32 v0, v4, v2, v0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll index 39153cb8a744..543174484924 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll @@ -837,34 +837,33 @@ define <2 x double> @v_rcp_v2f64(<2 x double> %x) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0 -; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] ; GFX6-NEXT: v_mov_b32_e32 v18, 0x3ff00000 -; GFX6-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v11 ; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 -; GFX6-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0 ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] -; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0 ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 -; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] -; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v9 -; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 -; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[12:13], v[8:9] -; GFX6-NEXT: v_fma_f64 v[4:5], -v[10:11], v[14:15], 1.0 -; GFX6-NEXT: v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0 -; GFX6-NEXT: v_fma_f64 v[4:5], v[14:15], v[4:5], v[14:15] +; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] +; GFX6-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] +; GFX6-NEXT: v_mul_f64 v[14:15], v[10:11], v[6:7] ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] -; GFX6-NEXT: v_mul_f64 v[14:15], v[16:17], v[4:5] -; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[8:9], v[6:7], v[12:13] -; GFX6-NEXT: v_fma_f64 v[8:9], -v[10:11], v[14:15], v[16:17] +; GFX6-NEXT: v_fma_f64 v[10:11], -v[4:5], v[14:15], v[10:11] +; GFX6-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 +; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[10:11], v[6:7], v[14:15] +; GFX6-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] +; GFX6-NEXT: v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0 +; GFX6-NEXT: v_fma_f64 v[4:5], -v[8:9], v[12:13], 1.0 +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 +; GFX6-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[12:13] ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v17 -; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 +; GFX6-NEXT: v_mul_f64 v[12:13], v[16:17], v[4:5] ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] +; GFX6-NEXT: v_fma_f64 v[10:11], -v[8:9], v[12:13], v[16:17] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 -; GFX6-NEXT: s_nop 0 -; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[8:9], v[4:5], v[14:15] +; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[10:11], v[4:5], v[12:13] ; GFX6-NEXT: v_div_fixup_f64 v[2:3], v[4:5], v[2:3], 1.0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; @@ -961,34 +960,33 @@ define <2 x double> @v_rcp_v2f64_arcp(<2 x double> %x) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0 -; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] ; GFX6-NEXT: v_mov_b32_e32 v18, 0x3ff00000 -; GFX6-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v11 ; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 -; GFX6-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0 ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] -; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0 ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 -; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] -; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v9 -; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 -; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[12:13], v[8:9] -; GFX6-NEXT: v_fma_f64 v[4:5], -v[10:11], v[14:15], 1.0 -; GFX6-NEXT: v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0 -; GFX6-NEXT: v_fma_f64 v[4:5], v[14:15], v[4:5], v[14:15] +; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] +; GFX6-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] +; GFX6-NEXT: v_mul_f64 v[14:15], v[10:11], v[6:7] ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] -; GFX6-NEXT: v_mul_f64 v[14:15], v[16:17], v[4:5] -; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[8:9], v[6:7], v[12:13] -; GFX6-NEXT: v_fma_f64 v[8:9], -v[10:11], v[14:15], v[16:17] +; GFX6-NEXT: v_fma_f64 v[10:11], -v[4:5], v[14:15], v[10:11] +; GFX6-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 +; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[10:11], v[6:7], v[14:15] +; GFX6-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] +; GFX6-NEXT: v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0 +; GFX6-NEXT: v_fma_f64 v[4:5], -v[8:9], v[12:13], 1.0 +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 +; GFX6-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[12:13] ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v17 -; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 +; GFX6-NEXT: v_mul_f64 v[12:13], v[16:17], v[4:5] ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] +; GFX6-NEXT: v_fma_f64 v[10:11], -v[8:9], v[12:13], v[16:17] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 -; GFX6-NEXT: s_nop 0 -; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[8:9], v[4:5], v[14:15] +; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[10:11], v[4:5], v[12:13] ; GFX6-NEXT: v_div_fixup_f64 v[2:3], v[4:5], v[2:3], 1.0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; @@ -1132,34 +1130,33 @@ define <2 x double> @v_rcp_v2f64_ulp25(<2 x double> %x) { ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0 -; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[2:3], v[2:3], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], 1.0, v[0:1], 1.0 ; GFX6-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] ; GFX6-NEXT: v_mov_b32_e32 v18, 0x3ff00000 -; GFX6-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] +; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v11 ; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 -; GFX6-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0 ; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] -; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 +; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0 ; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 -; GFX6-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] -; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] -; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v9 -; GFX6-NEXT: v_mul_f64 v[12:13], v[8:9], v[6:7] ; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 -; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[12:13], v[8:9] -; GFX6-NEXT: v_fma_f64 v[4:5], -v[10:11], v[14:15], 1.0 -; GFX6-NEXT: v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0 -; GFX6-NEXT: v_fma_f64 v[4:5], v[14:15], v[4:5], v[14:15] +; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] +; GFX6-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] +; GFX6-NEXT: v_mul_f64 v[14:15], v[10:11], v[6:7] ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] -; GFX6-NEXT: v_mul_f64 v[14:15], v[16:17], v[4:5] -; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[8:9], v[6:7], v[12:13] -; GFX6-NEXT: v_fma_f64 v[8:9], -v[10:11], v[14:15], v[16:17] +; GFX6-NEXT: v_fma_f64 v[10:11], -v[4:5], v[14:15], v[10:11] +; GFX6-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 +; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[10:11], v[6:7], v[14:15] +; GFX6-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] +; GFX6-NEXT: v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0 +; GFX6-NEXT: v_fma_f64 v[4:5], -v[8:9], v[12:13], 1.0 +; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 +; GFX6-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[12:13] ; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v17 -; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v11 +; GFX6-NEXT: v_mul_f64 v[12:13], v[16:17], v[4:5] ; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] +; GFX6-NEXT: v_fma_f64 v[10:11], -v[8:9], v[12:13], v[16:17] ; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 -; GFX6-NEXT: s_nop 0 -; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[8:9], v[4:5], v[14:15] +; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[10:11], v[4:5], v[12:13] ; GFX6-NEXT: v_div_fixup_f64 v[2:3], v[4:5], v[2:3], 1.0 ; GFX6-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll index 4667975ca0d0..bc2c5c737104 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll @@ -188,11 +188,11 @@ define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 -; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff -; GFX9-NEXT: v_exp_f16_e32 v1, v1 +; GFX9-NEXT: v_exp_f16_e32 v1, v2 ; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff ; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -274,11 +274,11 @@ define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) { ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 -; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 +; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff -; GFX9-NEXT: v_exp_f16_e32 v1, v1 +; GFX9-NEXT: v_exp_f16_e32 v1, v2 ; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff ; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -358,8 +358,8 @@ define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) { ; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 ; GFX9-NEXT: v_cvt_f32_f16_e32 v3, v1 ; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v2 -; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v2, v3 ; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll index f0f61e1803bd..09e49cedca48 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll @@ -2094,18 +2094,18 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { ; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 ; GFX9-NEXT: v_mul_f32_e32 v9, 0x4f7ffffe, v9 -; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v9 -; GFX9-NEXT: v_mul_lo_u32 v8, v7, v6 ; GFX9-NEXT: v_bfe_u32 v2, v2, 1, 23 +; GFX9-NEXT: v_mul_lo_u32 v8, v7, v6 ; GFX9-NEXT: v_bfe_u32 v3, v3, 1, 23 -; GFX9-NEXT: v_mul_lo_u32 v7, v7, v9 ; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX9-NEXT: v_mul_hi_u32 v7, v9, v7 ; GFX9-NEXT: v_add_u32_e32 v6, v6, v8 +; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v9 ; GFX9-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX9-NEXT: v_mov_b32_e32 v8, 0xffffff -; GFX9-NEXT: v_and_b32_e32 v5, v5, v8 +; GFX9-NEXT: v_mov_b32_e32 v9, 0xffffff +; GFX9-NEXT: v_and_b32_e32 v5, v5, v9 +; GFX9-NEXT: v_mul_lo_u32 v7, v7, v8 ; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 +; GFX9-NEXT: v_mul_hi_u32 v7, v8, v7 ; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6 ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 @@ -2113,11 +2113,11 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GFX9-NEXT: v_add_u32_e32 v6, v9, v7 +; GFX9-NEXT: v_add_u32_e32 v6, v8, v7 ; GFX9-NEXT: v_mul_hi_u32 v6, v5, v6 ; GFX9-NEXT: v_sub_u32_e32 v7, 23, v4 -; GFX9-NEXT: v_and_b32_e32 v7, v7, v8 -; GFX9-NEXT: v_and_b32_e32 v4, v4, v8 +; GFX9-NEXT: v_and_b32_e32 v7, v7, v9 +; GFX9-NEXT: v_and_b32_e32 v4, v4, v9 ; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v7, v2 ; GFX9-NEXT: v_lshl_or_b32 v0, v0, v4, v2 @@ -2129,8 +2129,8 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX9-NEXT: v_sub_u32_e32 v4, 23, v2 -; GFX9-NEXT: v_and_b32_e32 v4, v4, v8 -; GFX9-NEXT: v_and_b32_e32 v2, v2, v8 +; GFX9-NEXT: v_and_b32_e32 v4, v4, v9 +; GFX9-NEXT: v_and_b32_e32 v2, v2, v9 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, v4, v3 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, v2, v3 ; GFX9-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll index ff6618f8534b..b570be394ca4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll @@ -2100,40 +2100,40 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GFX9-NEXT: v_mov_b32_e32 v7, 0xffffffe8 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v9, v9 -; GFX9-NEXT: v_mov_b32_e32 v7, 0xffffffe8 ; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GFX9-NEXT: v_mul_f32_e32 v9, 0x4f7ffffe, v9 -; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v9 ; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 -; GFX9-NEXT: v_mul_lo_u32 v8, v7, v6 +; GFX9-NEXT: v_mul_f32_e32 v9, 0x4f7ffffe, v9 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 -; GFX9-NEXT: v_mul_lo_u32 v7, v7, v9 +; GFX9-NEXT: v_mul_lo_u32 v8, v7, v6 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 1, v1 ; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8 -; GFX9-NEXT: v_mul_hi_u32 v7, v9, v7 ; GFX9-NEXT: v_add_u32_e32 v6, v6, v8 +; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v9 ; GFX9-NEXT: v_mul_hi_u32 v6, v4, v6 -; GFX9-NEXT: v_mov_b32_e32 v8, 0xffffff -; GFX9-NEXT: v_and_b32_e32 v5, v5, v8 -; GFX9-NEXT: v_add_u32_e32 v7, v9, v7 +; GFX9-NEXT: v_mov_b32_e32 v9, 0xffffff +; GFX9-NEXT: v_and_b32_e32 v5, v5, v9 +; GFX9-NEXT: v_mul_lo_u32 v7, v7, v8 ; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 -; GFX9-NEXT: v_mul_hi_u32 v7, v5, v7 -; GFX9-NEXT: v_and_b32_e32 v2, v2, v8 -; GFX9-NEXT: v_and_b32_e32 v3, v3, v8 +; GFX9-NEXT: v_and_b32_e32 v2, v2, v9 +; GFX9-NEXT: v_and_b32_e32 v3, v3, v9 +; GFX9-NEXT: v_mul_hi_u32 v7, v8, v7 ; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6 ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 +; GFX9-NEXT: v_add_u32_e32 v7, v8, v7 +; GFX9-NEXT: v_mul_hi_u32 v7, v5, v7 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 ; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX9-NEXT: v_sub_u32_e32 v6, 23, v4 -; GFX9-NEXT: v_and_b32_e32 v4, v4, v8 -; GFX9-NEXT: v_and_b32_e32 v6, v6, v8 +; GFX9-NEXT: v_and_b32_e32 v4, v4, v9 +; GFX9-NEXT: v_and_b32_e32 v6, v6, v9 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v4, v2 ; GFX9-NEXT: v_lshl_or_b32 v0, v0, v6, v2 ; GFX9-NEXT: v_sub_u32_e32 v2, v5, v7 @@ -2144,8 +2144,8 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { ; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc ; GFX9-NEXT: v_sub_u32_e32 v4, 23, v2 -; GFX9-NEXT: v_and_b32_e32 v2, v2, v8 -; GFX9-NEXT: v_and_b32_e32 v4, v4, v8 +; GFX9-NEXT: v_and_b32_e32 v2, v2, v9 +; GFX9-NEXT: v_and_b32_e32 v4, v4, v9 ; GFX9-NEXT: v_lshrrev_b32_e32 v2, v2, v3 ; GFX9-NEXT: v_lshl_or_b32 v1, v1, v4, v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -4095,8 +4095,8 @@ define <4 x half> @v_fshr_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) ; GFX10-NEXT: v_and_b32_e32 v4, s4, v4 ; GFX10-NEXT: v_and_b32_e32 v6, s4, v6 ; GFX10-NEXT: v_and_b32_e32 v5, s4, v5 -; GFX10-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1] ; GFX10-NEXT: v_and_b32_e32 v7, s4, v7 +; GFX10-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshrrev_b16 v2, v4, v2 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, v6, v0 ; GFX10-NEXT: v_pk_lshrrev_b16 v3, v5, v3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll index 53c3fa1064a6..3dfd3ab5a15a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll @@ -6,11 +6,14 @@ define i1 @i1_func_void() #0 { ; CHECK-LABEL: name: i1_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s1) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i1, i1 addrspace(1)* undef ret i1 %val } @@ -18,11 +21,14 @@ define i1 @i1_func_void() #0 { define zeroext i1 @i1_zeroext_func_void() #0 { ; CHECK-LABEL: name: i1_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) + ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i1, i1 addrspace(1)* undef ret i1 %val } @@ -30,11 +36,14 @@ define zeroext i1 @i1_zeroext_func_void() #0 { define signext i1 @i1_signext_func_void() #0 { ; CHECK-LABEL: name: i1_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) + ; CHECK: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i1, i1 addrspace(1)* undef ret i1 %val } @@ -42,11 +51,14 @@ define signext i1 @i1_signext_func_void() #0 { define i7 @i7_func_void() #0 { ; CHECK-LABEL: name: i7_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s7) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s7) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i7, i7 addrspace(1)* undef ret i7 %val } @@ -54,11 +66,14 @@ define i7 @i7_func_void() #0 { define zeroext i7 @i7_zeroext_func_void() #0 { ; CHECK-LABEL: name: i7_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s7) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s7) + ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i7, i7 addrspace(1)* undef ret i7 %val } @@ -66,11 +81,14 @@ define zeroext i7 @i7_zeroext_func_void() #0 { define signext i7 @i7_signext_func_void() #0 { ; CHECK-LABEL: name: i7_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s7) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s7) + ; CHECK: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i7, i7 addrspace(1)* undef ret i7 %val } @@ -78,11 +96,14 @@ define signext i7 @i7_signext_func_void() #0 { define i8 @i8_func_void() #0 { ; CHECK-LABEL: name: i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val } @@ -90,11 +111,14 @@ define i8 @i8_func_void() #0 { define zeroext i8 @i8_zeroext_func_void() #0 { ; CHECK-LABEL: name: i8_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8) + ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val } @@ -102,11 +126,14 @@ define zeroext i8 @i8_zeroext_func_void() #0 { define signext i8 @i8_signext_func_void() #0 { ; CHECK-LABEL: name: i8_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8) + ; CHECK: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val } @@ -114,11 +141,14 @@ define signext i8 @i8_signext_func_void() #0 { define i16 @i16_func_void() #0 { ; CHECK-LABEL: name: i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i16, i16 addrspace(1)* undef ret i16 %val } @@ -126,11 +156,14 @@ define i16 @i16_func_void() #0 { define zeroext i16 @i16_zeroext_func_void() #0 { ; CHECK-LABEL: name: i16_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) + ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i16, i16 addrspace(1)* undef ret i16 %val } @@ -138,11 +171,14 @@ define zeroext i16 @i16_zeroext_func_void() #0 { define signext i16 @i16_signext_func_void() #0 { ; CHECK-LABEL: name: i16_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) + ; CHECK: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i16, i16 addrspace(1)* undef ret i16 %val } @@ -150,11 +186,14 @@ define signext i16 @i16_signext_func_void() #0 { define half @f16_func_void() #0 { ; CHECK-LABEL: name: f16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `half addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `half addrspace(1)* undef`, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load half, half addrspace(1)* undef ret half %val } @@ -162,11 +201,14 @@ define half @f16_func_void() #0 { define i24 @i24_func_void() #0 { ; CHECK-LABEL: name: i24_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s24) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s24) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i24, i24 addrspace(1)* undef ret i24 %val } @@ -174,11 +216,14 @@ define i24 @i24_func_void() #0 { define zeroext i24 @i24_zeroext_func_void() #0 { ; CHECK-LABEL: name: i24_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s24) - ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s24) + ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i24, i24 addrspace(1)* undef ret i24 %val } @@ -186,11 +231,14 @@ define zeroext i24 @i24_zeroext_func_void() #0 { define signext i24 @i24_signext_func_void() #0 { ; CHECK-LABEL: name: i24_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s24) - ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s24) + ; CHECK: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i24, i24 addrspace(1)* undef ret i24 %val } @@ -198,14 +246,17 @@ define signext i24 @i24_signext_func_void() #0 { define <2 x i24> @v2i24_func_void() #0 { ; CHECK-LABEL: name: v2i24_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s24>) = G_LOAD [[DEF]](p1) :: (load (<2 x s24>) from `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<2 x s24>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s24>) = G_LOAD [[DEF]](p1) :: (load (<2 x s24>) from `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<2 x s24>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load <2 x i24>, <2 x i24> addrspace(1)* undef ret <2 x i24> %val } @@ -213,16 +264,19 @@ define <2 x i24> @v2i24_func_void() #0 { define <3 x i24> @v3i24_func_void() #0 { ; CHECK-LABEL: name: v3i24_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s24>) = G_LOAD [[DEF]](p1) :: (load (<3 x s24>) from `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24), [[UV2:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<3 x s24>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s24) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s24>) = G_LOAD [[DEF]](p1) :: (load (<3 x s24>) from `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24), [[UV2:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<3 x s24>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s24) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i24>, <3 x i24> addrspace(1)* undef ret <3 x i24> %val } @@ -230,10 +284,13 @@ define <3 x i24> @v3i24_func_void() #0 { define i32 @i32_func_void() #0 { ; CHECK-LABEL: name: i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i32, i32 addrspace(1)* undef ret i32 %val } @@ -241,13 +298,16 @@ define i32 @i32_func_void() #0 { define i48 @i48_func_void() #0 { ; CHECK-LABEL: name: i48_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load i48, i48 addrspace(1)* undef, align 8 ret i48 %val } @@ -255,13 +315,16 @@ define i48 @i48_func_void() #0 { define signext i48 @i48_signext_func_void() #0 { ; CHECK-LABEL: name: i48_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load i48, i48 addrspace(1)* undef, align 8 ret i48 %val } @@ -269,13 +332,16 @@ define signext i48 @i48_signext_func_void() #0 { define zeroext i48 @i48_zeroext_func_void() #0 { ; CHECK-LABEL: name: i48_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load i48, i48 addrspace(1)* undef, align 8 ret i48 %val } @@ -283,12 +349,15 @@ define zeroext i48 @i48_zeroext_func_void() #0 { define i64 @i64_func_void() #0 { ; CHECK-LABEL: name: i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load i64, i64 addrspace(1)* undef ret i64 %val } @@ -296,14 +365,17 @@ define i64 @i64_func_void() #0 { define i65 @i65_func_void() #0 { ; CHECK-LABEL: name: i65_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s96) = G_ANYEXT [[LOAD]](s65) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s96) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s96) = G_ANYEXT [[LOAD]](s65) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s96) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, i65 addrspace(1)* undef ret i65 %val } @@ -311,14 +383,17 @@ define i65 @i65_func_void() #0 { define signext i65 @i65_signext_func_void() #0 { ; CHECK-LABEL: name: i65_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[LOAD]](s65) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s96) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[LOAD]](s65) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s96) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, i65 addrspace(1)* undef ret i65 %val } @@ -326,14 +401,17 @@ define signext i65 @i65_signext_func_void() #0 { define zeroext i65 @i65_zeroext_func_void() #0 { ; CHECK-LABEL: name: i65_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[LOAD]](s65) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s96) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[LOAD]](s65) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s96) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, i65 addrspace(1)* undef ret i65 %val } @@ -341,10 +419,13 @@ define zeroext i65 @i65_zeroext_func_void() #0 { define float @f32_func_void() #0 { ; CHECK-LABEL: name: f32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `float addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `float addrspace(1)* undef`, addrspace 1) + ; CHECK: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load float, float addrspace(1)* undef ret float %val } @@ -352,12 +433,15 @@ define float @f32_func_void() #0 { define double @f64_func_void() #0 { ; CHECK-LABEL: name: f64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `double addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `double addrspace(1)* undef`, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load double, double addrspace(1)* undef ret double %val } @@ -365,14 +449,17 @@ define double @f64_func_void() #0 { define <2 x double> @v2f64_func_void() #0 { ; CHECK-LABEL: name: v2f64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <2 x double>, <2 x double> addrspace(1)* undef ret <2 x double> %val } @@ -380,12 +467,15 @@ define <2 x double> @v2f64_func_void() #0 { define <2 x i32> @v2i32_func_void() #0 { ; CHECK-LABEL: name: v2i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `<2 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load <2 x i32>, <2 x i32> addrspace(1)* undef ret <2 x i32> %val } @@ -393,13 +483,16 @@ define <2 x i32> @v2i32_func_void() #0 { define <3 x i32> @v3i32_func_void() #0 { ; CHECK-LABEL: name: v3i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[DEF]](p1) :: (load (<3 x s32>) from `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<3 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[DEF]](p1) :: (load (<3 x s32>) from `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<3 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i32>, <3 x i32> addrspace(1)* undef ret <3 x i32> %val } @@ -407,14 +500,17 @@ define <3 x i32> @v3i32_func_void() #0 { define <4 x i32> @v4i32_func_void() #0 { ; CHECK-LABEL: name: v4i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `<4 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <4 x i32>, <4 x i32> addrspace(1)* undef ret <4 x i32> %val } @@ -422,15 +518,18 @@ define <4 x i32> @v4i32_func_void() #0 { define <5 x i32> @v5i32_func_void() #0 { ; CHECK-LABEL: name: v5i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<5 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<5 x s32>) from `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<5 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<5 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<5 x s32>) from `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<5 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef ret <5 x i32> %val } @@ -438,19 +537,22 @@ define <5 x i32> @v5i32_func_void() #0 { define <8 x i32> @v8i32_func_void() #0 { ; CHECK-LABEL: name: v8i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: $vgpr6 = COPY [[UV6]](s32) + ; CHECK: $vgpr7 = COPY [[UV7]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr ret <8 x i32> %val @@ -459,27 +561,30 @@ define <8 x i32> @v8i32_func_void() #0 { define <16 x i32> @v16i32_func_void() #0 { ; CHECK-LABEL: name: v16i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: $vgpr6 = COPY [[UV6]](s32) + ; CHECK: $vgpr7 = COPY [[UV7]](s32) + ; CHECK: $vgpr8 = COPY [[UV8]](s32) + ; CHECK: $vgpr9 = COPY [[UV9]](s32) + ; CHECK: $vgpr10 = COPY [[UV10]](s32) + ; CHECK: $vgpr11 = COPY [[UV11]](s32) + ; CHECK: $vgpr12 = COPY [[UV12]](s32) + ; CHECK: $vgpr13 = COPY [[UV13]](s32) + ; CHECK: $vgpr14 = COPY [[UV14]](s32) + ; CHECK: $vgpr15 = COPY [[UV15]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr ret <16 x i32> %val @@ -488,43 +593,46 @@ define <16 x i32> @v16i32_func_void() #0 { define <32 x i32> @v32i32_func_void() #0 { ; CHECK-LABEL: name: v32i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: $vgpr6 = COPY [[UV6]](s32) + ; CHECK: $vgpr7 = COPY [[UV7]](s32) + ; CHECK: $vgpr8 = COPY [[UV8]](s32) + ; CHECK: $vgpr9 = COPY [[UV9]](s32) + ; CHECK: $vgpr10 = COPY [[UV10]](s32) + ; CHECK: $vgpr11 = COPY [[UV11]](s32) + ; CHECK: $vgpr12 = COPY [[UV12]](s32) + ; CHECK: $vgpr13 = COPY [[UV13]](s32) + ; CHECK: $vgpr14 = COPY [[UV14]](s32) + ; CHECK: $vgpr15 = COPY [[UV15]](s32) + ; CHECK: $vgpr16 = COPY [[UV16]](s32) + ; CHECK: $vgpr17 = COPY [[UV17]](s32) + ; CHECK: $vgpr18 = COPY [[UV18]](s32) + ; CHECK: $vgpr19 = COPY [[UV19]](s32) + ; CHECK: $vgpr20 = COPY [[UV20]](s32) + ; CHECK: $vgpr21 = COPY [[UV21]](s32) + ; CHECK: $vgpr22 = COPY [[UV22]](s32) + ; CHECK: $vgpr23 = COPY [[UV23]](s32) + ; CHECK: $vgpr24 = COPY [[UV24]](s32) + ; CHECK: $vgpr25 = COPY [[UV25]](s32) + ; CHECK: $vgpr26 = COPY [[UV26]](s32) + ; CHECK: $vgpr27 = COPY [[UV27]](s32) + ; CHECK: $vgpr28 = COPY [[UV28]](s32) + ; CHECK: $vgpr29 = COPY [[UV29]](s32) + ; CHECK: $vgpr30 = COPY [[UV30]](s32) + ; CHECK: $vgpr31 = COPY [[UV31]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr ret <32 x i32> %val @@ -533,14 +641,17 @@ define <32 x i32> @v32i32_func_void() #0 { define <2 x i64> @v2i64_func_void() #0 { ; CHECK-LABEL: name: v2i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <2 x i64>, <2 x i64> addrspace(1)* undef ret <2 x i64> %val } @@ -548,17 +659,20 @@ define <2 x i64> @v2i64_func_void() #0 { define <3 x i64> @v3i64_func_void() #0 { ; CHECK-LABEL: name: v3i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<3 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s64>) from %ir.ptr, align 32, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<3 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<3 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s64>) from %ir.ptr, align 32, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s64>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr ret <3 x i64> %val @@ -567,19 +681,22 @@ define <3 x i64> @v3i64_func_void() #0 { define <4 x i64> @v4i64_func_void() #0 { ; CHECK-LABEL: name: v4i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s64>) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s64>) from %ir.ptr, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s64>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: $vgpr6 = COPY [[UV6]](s32) + ; CHECK: $vgpr7 = COPY [[UV7]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr ret <4 x i64> %val @@ -588,21 +705,24 @@ define <4 x i64> @v4i64_func_void() #0 { define <5 x i64> @v5i64_func_void() #0 { ; CHECK-LABEL: name: v5i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s64>) from %ir.ptr, align 64, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<5 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<5 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s64>) from %ir.ptr, align 64, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<5 x s64>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: $vgpr6 = COPY [[UV6]](s32) + ; CHECK: $vgpr7 = COPY [[UV7]](s32) + ; CHECK: $vgpr8 = COPY [[UV8]](s32) + ; CHECK: $vgpr9 = COPY [[UV9]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9 %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr ret <5 x i64> %val @@ -611,27 +731,30 @@ define <5 x i64> @v5i64_func_void() #0 { define <8 x i64> @v8i64_func_void() #0 { ; CHECK-LABEL: name: v8i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s64>) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s64>) from %ir.ptr, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s64>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: $vgpr6 = COPY [[UV6]](s32) + ; CHECK: $vgpr7 = COPY [[UV7]](s32) + ; CHECK: $vgpr8 = COPY [[UV8]](s32) + ; CHECK: $vgpr9 = COPY [[UV9]](s32) + ; CHECK: $vgpr10 = COPY [[UV10]](s32) + ; CHECK: $vgpr11 = COPY [[UV11]](s32) + ; CHECK: $vgpr12 = COPY [[UV12]](s32) + ; CHECK: $vgpr13 = COPY [[UV13]](s32) + ; CHECK: $vgpr14 = COPY [[UV14]](s32) + ; CHECK: $vgpr15 = COPY [[UV15]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr ret <8 x i64> %val @@ -640,43 +763,46 @@ define <8 x i64> @v8i64_func_void() #0 { define <16 x i64> @v16i64_func_void() #0 { ; CHECK-LABEL: name: v16i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s64>) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s64>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s64>) from %ir.ptr, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s64>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: $vgpr6 = COPY [[UV6]](s32) + ; CHECK: $vgpr7 = COPY [[UV7]](s32) + ; CHECK: $vgpr8 = COPY [[UV8]](s32) + ; CHECK: $vgpr9 = COPY [[UV9]](s32) + ; CHECK: $vgpr10 = COPY [[UV10]](s32) + ; CHECK: $vgpr11 = COPY [[UV11]](s32) + ; CHECK: $vgpr12 = COPY [[UV12]](s32) + ; CHECK: $vgpr13 = COPY [[UV13]](s32) + ; CHECK: $vgpr14 = COPY [[UV14]](s32) + ; CHECK: $vgpr15 = COPY [[UV15]](s32) + ; CHECK: $vgpr16 = COPY [[UV16]](s32) + ; CHECK: $vgpr17 = COPY [[UV17]](s32) + ; CHECK: $vgpr18 = COPY [[UV18]](s32) + ; CHECK: $vgpr19 = COPY [[UV19]](s32) + ; CHECK: $vgpr20 = COPY [[UV20]](s32) + ; CHECK: $vgpr21 = COPY [[UV21]](s32) + ; CHECK: $vgpr22 = COPY [[UV22]](s32) + ; CHECK: $vgpr23 = COPY [[UV23]](s32) + ; CHECK: $vgpr24 = COPY [[UV24]](s32) + ; CHECK: $vgpr25 = COPY [[UV25]](s32) + ; CHECK: $vgpr26 = COPY [[UV26]](s32) + ; CHECK: $vgpr27 = COPY [[UV27]](s32) + ; CHECK: $vgpr28 = COPY [[UV28]](s32) + ; CHECK: $vgpr29 = COPY [[UV29]](s32) + ; CHECK: $vgpr30 = COPY [[UV30]](s32) + ; CHECK: $vgpr31 = COPY [[UV31]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr ret <16 x i64> %val @@ -685,10 +811,13 @@ define <16 x i64> @v16i64_func_void() #0 { define <2 x i16> @v2i16_func_void() #0 { ; CHECK-LABEL: name: v2i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load <2 x i16>, <2 x i16> addrspace(1)* undef ret <2 x i16> %val } @@ -696,10 +825,13 @@ define <2 x i16> @v2i16_func_void() #0 { define <2 x half> @v2f16_func_void() #0 { ; CHECK-LABEL: name: v2f16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load <2 x half>, <2 x half> addrspace(1)* undef ret <2 x half> %val } @@ -707,14 +839,17 @@ define <2 x half> @v2f16_func_void() #0 { define <3 x i16> @v3i16_func_void() #0 { ; CHECK-LABEL: name: v3i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef ret <3 x i16> %val } @@ -722,12 +857,15 @@ define <3 x i16> @v3i16_func_void() #0 { define <4 x i16> @v4i16_func_void() #0 { ; CHECK-LABEL: name: v4i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load <4 x i16>, <4 x i16> addrspace(1)* undef ret <4 x i16> %val } @@ -735,12 +873,15 @@ define <4 x i16> @v4i16_func_void() #0 { define <4 x half> @v4f16_func_void() #0 { ; CHECK-LABEL: name: v4f16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load <4 x half>, <4 x half> addrspace(1)* undef ret <4 x half> %val } @@ -748,16 +889,19 @@ define <4 x half> @v4f16_func_void() #0 { define <5 x i16> @v5i16_func_void() #0 { ; CHECK-LABEL: name: v5i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s16>) from %ir.ptr, align 16, addrspace 1) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD1]](<5 x s16>), [[DEF1]](<5 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s16>) from %ir.ptr, align 16, addrspace 1) + ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF + ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD1]](<5 x s16>), [[DEF1]](<5 x s16>) + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) + ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK: $vgpr2 = COPY [[UV2]](<2 x s16>) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr ret <5 x i16> %val @@ -766,15 +910,18 @@ define <5 x i16> @v5i16_func_void() #0 { define <8 x i16> @v8i16_func_void() #0 { ; CHECK-LABEL: name: v8i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s16>) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<8 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s16>) from %ir.ptr, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<8 x s16>) + ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK: $vgpr2 = COPY [[UV2]](<2 x s16>) + ; CHECK: $vgpr3 = COPY [[UV3]](<2 x s16>) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr ret <8 x i16> %val @@ -783,19 +930,22 @@ define <8 x i16> @v8i16_func_void() #0 { define <16 x i16> @v16i16_func_void() #0 { ; CHECK-LABEL: name: v16i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s16>) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](<2 x s16>) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](<2 x s16>) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](<2 x s16>) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](<2 x s16>) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s16>) from %ir.ptr, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s16>) + ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK: $vgpr2 = COPY [[UV2]](<2 x s16>) + ; CHECK: $vgpr3 = COPY [[UV3]](<2 x s16>) + ; CHECK: $vgpr4 = COPY [[UV4]](<2 x s16>) + ; CHECK: $vgpr5 = COPY [[UV5]](<2 x s16>) + ; CHECK: $vgpr6 = COPY [[UV6]](<2 x s16>) + ; CHECK: $vgpr7 = COPY [[UV7]](<2 x s16>) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr ret <16 x i16> %val @@ -804,59 +954,62 @@ define <16 x i16> @v16i16_func_void() #0 { define <16 x i8> @v16i8_func_void() #0 { ; CHECK-LABEL: name: v16i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s8>) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) - ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) - ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) - ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) - ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) - ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) - ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) - ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) - ; CHECK-NEXT: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT16]](s32) - ; CHECK-NEXT: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT17]](s32) - ; CHECK-NEXT: [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT18]](s32) - ; CHECK-NEXT: [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT19]](s32) - ; CHECK-NEXT: [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16) - ; CHECK-NEXT: $vgpr4 = COPY [[ANYEXT20]](s32) - ; CHECK-NEXT: [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16) - ; CHECK-NEXT: $vgpr5 = COPY [[ANYEXT21]](s32) - ; CHECK-NEXT: [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16) - ; CHECK-NEXT: $vgpr6 = COPY [[ANYEXT22]](s32) - ; CHECK-NEXT: [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16) - ; CHECK-NEXT: $vgpr7 = COPY [[ANYEXT23]](s32) - ; CHECK-NEXT: [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT8]](s16) - ; CHECK-NEXT: $vgpr8 = COPY [[ANYEXT24]](s32) - ; CHECK-NEXT: [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT9]](s16) - ; CHECK-NEXT: $vgpr9 = COPY [[ANYEXT25]](s32) - ; CHECK-NEXT: [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT10]](s16) - ; CHECK-NEXT: $vgpr10 = COPY [[ANYEXT26]](s32) - ; CHECK-NEXT: [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT11]](s16) - ; CHECK-NEXT: $vgpr11 = COPY [[ANYEXT27]](s32) - ; CHECK-NEXT: [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT12]](s16) - ; CHECK-NEXT: $vgpr12 = COPY [[ANYEXT28]](s32) - ; CHECK-NEXT: [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT13]](s16) - ; CHECK-NEXT: $vgpr13 = COPY [[ANYEXT29]](s32) - ; CHECK-NEXT: [[ANYEXT30:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT14]](s16) - ; CHECK-NEXT: $vgpr14 = COPY [[ANYEXT30]](s32) - ; CHECK-NEXT: [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16) - ; CHECK-NEXT: $vgpr15 = COPY [[ANYEXT31]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s8>) from %ir.ptr, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) + ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) + ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) + ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) + ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) + ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) + ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) + ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) + ; CHECK: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT16]](s32) + ; CHECK: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK: $vgpr1 = COPY [[ANYEXT17]](s32) + ; CHECK: [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) + ; CHECK: $vgpr2 = COPY [[ANYEXT18]](s32) + ; CHECK: [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) + ; CHECK: $vgpr3 = COPY [[ANYEXT19]](s32) + ; CHECK: [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16) + ; CHECK: $vgpr4 = COPY [[ANYEXT20]](s32) + ; CHECK: [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16) + ; CHECK: $vgpr5 = COPY [[ANYEXT21]](s32) + ; CHECK: [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16) + ; CHECK: $vgpr6 = COPY [[ANYEXT22]](s32) + ; CHECK: [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16) + ; CHECK: $vgpr7 = COPY [[ANYEXT23]](s32) + ; CHECK: [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT8]](s16) + ; CHECK: $vgpr8 = COPY [[ANYEXT24]](s32) + ; CHECK: [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT9]](s16) + ; CHECK: $vgpr9 = COPY [[ANYEXT25]](s32) + ; CHECK: [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT10]](s16) + ; CHECK: $vgpr10 = COPY [[ANYEXT26]](s32) + ; CHECK: [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT11]](s16) + ; CHECK: $vgpr11 = COPY [[ANYEXT27]](s32) + ; CHECK: [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT12]](s16) + ; CHECK: $vgpr12 = COPY [[ANYEXT28]](s32) + ; CHECK: [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT13]](s16) + ; CHECK: $vgpr13 = COPY [[ANYEXT29]](s32) + ; CHECK: [[ANYEXT30:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT14]](s16) + ; CHECK: $vgpr14 = COPY [[ANYEXT30]](s32) + ; CHECK: [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16) + ; CHECK: $vgpr15 = COPY [[ANYEXT31]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr ret <16 x i8> %val @@ -865,16 +1018,19 @@ define <16 x i8> @v16i8_func_void() #0 { define <2 x i8> @v2i8_func_void() #0 { ; CHECK-LABEL: name: v2i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[DEF]](p1) :: (load (<2 x s8>) from `<2 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<2 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT2]](s32) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT3]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[DEF]](p1) :: (load (<2 x s8>) from `<2 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<2 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT2]](s32) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK: $vgpr1 = COPY [[ANYEXT3]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load <2 x i8>, <2 x i8> addrspace(1)* undef ret <2 x i8> %val } @@ -882,19 +1038,22 @@ define <2 x i8> @v2i8_func_void() #0 { define <3 x i8> @v3i8_func_void() #0 { ; CHECK-LABEL: name: v3i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[DEF]](p1) :: (load (<3 x s8>) from `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<3 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT3]](s32) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT4]](s32) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT5]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[DEF]](p1) :: (load (<3 x s8>) from `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<3 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT3]](s32) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK: $vgpr1 = COPY [[ANYEXT4]](s32) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) + ; CHECK: $vgpr2 = COPY [[ANYEXT5]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i8>, <3 x i8> addrspace(1)* undef ret <3 x i8> %val } @@ -902,23 +1061,26 @@ define <3 x i8> @v3i8_func_void() #0 { define <4 x i8> @v4i8_func_void() #0 { ; CHECK-LABEL: name: v4i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s8>) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT4]](s32) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT5]](s32) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT6]](s32) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT7]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s8>) from %ir.ptr, addrspace 1) + ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK: $vgpr0 = COPY [[ANYEXT4]](s32) + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK: $vgpr1 = COPY [[ANYEXT5]](s32) + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) + ; CHECK: $vgpr2 = COPY [[ANYEXT6]](s32) + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) + ; CHECK: $vgpr3 = COPY [[ANYEXT7]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef %val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr ret <4 x i8> %val @@ -927,15 +1089,18 @@ define <4 x i8> @v4i8_func_void() #0 { define {i8, i32} @struct_i8_i32_func_void() #0 { ; CHECK-LABEL: name: struct_i8_i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[LOAD1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) + ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK: $vgpr1 = COPY [[LOAD1]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef ret { i8, i32 } %val } @@ -943,18 +1108,19 @@ define {i8, i32} @struct_i8_i32_func_void() #0 { define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }) %arg0) #0 { ; CHECK-LABEL: name: void_func_sret_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (volatile load (s32) from `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store (s8) into %ir.gep01, addrspace 5) - ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5) - ; CHECK-NEXT: SI_RETURN + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[COPY2:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p1) :: (volatile load (s32) from `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK: G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store (s8) into %ir.gep01, addrspace 5) + ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5) + ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY3]] %val0 = load volatile i8, i8 addrspace(1)* undef %val1 = load volatile i32, i32 addrspace(1)* undef %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 @@ -971,14 +1137,15 @@ define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i define <33 x i32> @v33i32_func_void() #0 { ; CHECK-LABEL: name: v33i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<33 x s32>) from %ir.ptr, align 256, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) - ; CHECK-NEXT: SI_RETURN + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<33 x s32>) from %ir.ptr, align 256, addrspace 1) + ; CHECK: G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr ret <33 x i32> %val @@ -987,21 +1154,22 @@ define <33 x i32> @v33i32_func_void() #0 { define <33 x i32> @v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx) #0 { ; CHECK-LABEL: name: v33i32_func_v33i32_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY3]](s32) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[MUL]](s64) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p1) = COPY [[PTR_ADD]](p1) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[COPY4]](p1) :: (load (<33 x s32>) from %ir.gep, align 256, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) - ; CHECK-NEXT: SI_RETURN + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY3]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 + ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[MUL]](s64) + ; CHECK: [[COPY5:%[0-9]+]]:_(p1) = COPY [[PTR_ADD]](p1) + ; CHECK: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[COPY5]](p1) :: (load (<33 x s32>) from %ir.gep, align 256, addrspace 1) + ; CHECK: G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) + ; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY6]] %gep = getelementptr inbounds <33 x i32>, <33 x i32> addrspace(1)* %p, i32 %idx %val = load <33 x i32>, <33 x i32> addrspace(1)* %gep ret <33 x i32> %val @@ -1010,20 +1178,21 @@ define <33 x i32> @v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx) define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { ; CHECK-LABEL: name: struct_v32i32_i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr + 128, align 128, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p5) :: (store (s32), align 128, addrspace 5) - ; CHECK-NEXT: SI_RETURN + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr + 128, align 128, addrspace 1) + ; CHECK: G_STORE [[LOAD1]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK: G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p5) :: (store (s32), align 128, addrspace 5) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr ret { <32 x i32>, i32 }%val @@ -1032,20 +1201,21 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { ; CHECK-LABEL: name: struct_i32_v32i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ i32, <32 x i32> } addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p1) :: (load (s32) from %ir.ptr, align 128, addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<32 x s32>) from %ir.ptr + 128, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[COPY]](p5) :: (store (s32), align 128, addrspace 5) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK-NEXT: G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (<32 x s32>), addrspace 5) - ; CHECK-NEXT: SI_RETURN + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ i32, <32 x i32> } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p1) :: (load (s32) from %ir.ptr, align 128, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<32 x s32>) from %ir.ptr + 128, addrspace 1) + ; CHECK: G_STORE [[LOAD1]](s32), [[COPY]](p5) :: (store (s32), align 128, addrspace 5) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK: G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (<32 x s32>), addrspace 5) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr ret { i32, <32 x i32> }%val @@ -1055,25 +1225,28 @@ define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { ; CHECK-LABEL: name: v3i32_struct_func_void_wasted_reg ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) - ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) - ; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[LOAD3]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) + ; CHECK: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) + ; CHECK: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[LOAD3]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %load0 = load volatile i32, i32 addrspace(3)* undef %load1 = load volatile i32, i32 addrspace(3)* undef %load2 = load volatile i32, i32 addrspace(3)* undef @@ -1090,26 +1263,29 @@ define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { ; CHECK-LABEL: name: v3f32_struct_func_void_wasted_reg ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) - ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) - ; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[LOAD3]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK: [[COPY1:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) + ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) + ; CHECK: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) + ; CHECK: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[LOAD3]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %load0 = load volatile float, float addrspace(3)* undef %load1 = load volatile float, float addrspace(3)* undef %load2 = load volatile float, float addrspace(3)* undef @@ -1126,21 +1302,22 @@ define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret(i8) %arg0) #0 { ; CHECK-LABEL: name: void_func_sret_max_known_zero_bits ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5) - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) - ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[LSHR1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[LSHR2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: SI_RETURN + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5) + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) + ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32) + ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) + ; CHECK: G_STORE [[LSHR]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: G_STORE [[LSHR1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: G_STORE [[LSHR2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] %arg0.int = ptrtoint i8 addrspace(5)* %arg0 to i32 %lshr0 = lshr i32 %arg0.int, 16 @@ -1156,43 +1333,46 @@ define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret(i8) %arg0) define i1022 @i1022_func_void() #0 { ; CHECK-LABEL: name: i1022_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s1024) = G_ANYEXT [[LOAD]](s1022) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s1024) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s1024) = G_ANYEXT [[LOAD]](s1022) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s1024) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: $vgpr6 = COPY [[UV6]](s32) + ; CHECK: $vgpr7 = COPY [[UV7]](s32) + ; CHECK: $vgpr8 = COPY [[UV8]](s32) + ; CHECK: $vgpr9 = COPY [[UV9]](s32) + ; CHECK: $vgpr10 = COPY [[UV10]](s32) + ; CHECK: $vgpr11 = COPY [[UV11]](s32) + ; CHECK: $vgpr12 = COPY [[UV12]](s32) + ; CHECK: $vgpr13 = COPY [[UV13]](s32) + ; CHECK: $vgpr14 = COPY [[UV14]](s32) + ; CHECK: $vgpr15 = COPY [[UV15]](s32) + ; CHECK: $vgpr16 = COPY [[UV16]](s32) + ; CHECK: $vgpr17 = COPY [[UV17]](s32) + ; CHECK: $vgpr18 = COPY [[UV18]](s32) + ; CHECK: $vgpr19 = COPY [[UV19]](s32) + ; CHECK: $vgpr20 = COPY [[UV20]](s32) + ; CHECK: $vgpr21 = COPY [[UV21]](s32) + ; CHECK: $vgpr22 = COPY [[UV22]](s32) + ; CHECK: $vgpr23 = COPY [[UV23]](s32) + ; CHECK: $vgpr24 = COPY [[UV24]](s32) + ; CHECK: $vgpr25 = COPY [[UV25]](s32) + ; CHECK: $vgpr26 = COPY [[UV26]](s32) + ; CHECK: $vgpr27 = COPY [[UV27]](s32) + ; CHECK: $vgpr28 = COPY [[UV28]](s32) + ; CHECK: $vgpr29 = COPY [[UV29]](s32) + ; CHECK: $vgpr30 = COPY [[UV30]](s32) + ; CHECK: $vgpr31 = COPY [[UV31]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, i1022 addrspace(1)* undef ret i1022 %val } @@ -1200,43 +1380,46 @@ define i1022 @i1022_func_void() #0 { define signext i1022 @i1022_signext_func_void() #0 { ; CHECK-LABEL: name: i1022_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[LOAD]](s1022) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s1024) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[LOAD]](s1022) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s1024) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: $vgpr6 = COPY [[UV6]](s32) + ; CHECK: $vgpr7 = COPY [[UV7]](s32) + ; CHECK: $vgpr8 = COPY [[UV8]](s32) + ; CHECK: $vgpr9 = COPY [[UV9]](s32) + ; CHECK: $vgpr10 = COPY [[UV10]](s32) + ; CHECK: $vgpr11 = COPY [[UV11]](s32) + ; CHECK: $vgpr12 = COPY [[UV12]](s32) + ; CHECK: $vgpr13 = COPY [[UV13]](s32) + ; CHECK: $vgpr14 = COPY [[UV14]](s32) + ; CHECK: $vgpr15 = COPY [[UV15]](s32) + ; CHECK: $vgpr16 = COPY [[UV16]](s32) + ; CHECK: $vgpr17 = COPY [[UV17]](s32) + ; CHECK: $vgpr18 = COPY [[UV18]](s32) + ; CHECK: $vgpr19 = COPY [[UV19]](s32) + ; CHECK: $vgpr20 = COPY [[UV20]](s32) + ; CHECK: $vgpr21 = COPY [[UV21]](s32) + ; CHECK: $vgpr22 = COPY [[UV22]](s32) + ; CHECK: $vgpr23 = COPY [[UV23]](s32) + ; CHECK: $vgpr24 = COPY [[UV24]](s32) + ; CHECK: $vgpr25 = COPY [[UV25]](s32) + ; CHECK: $vgpr26 = COPY [[UV26]](s32) + ; CHECK: $vgpr27 = COPY [[UV27]](s32) + ; CHECK: $vgpr28 = COPY [[UV28]](s32) + ; CHECK: $vgpr29 = COPY [[UV29]](s32) + ; CHECK: $vgpr30 = COPY [[UV30]](s32) + ; CHECK: $vgpr31 = COPY [[UV31]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, i1022 addrspace(1)* undef ret i1022 %val } @@ -1244,43 +1427,46 @@ define signext i1022 @i1022_signext_func_void() #0 { define zeroext i1022 @i1022_zeroext_func_void() #0 { ; CHECK-LABEL: name: i1022_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s1024) = G_ZEXT [[LOAD]](s1022) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s1024) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s1024) = G_ZEXT [[LOAD]](s1022) + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s1024) + ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK: $vgpr1 = COPY [[UV1]](s32) + ; CHECK: $vgpr2 = COPY [[UV2]](s32) + ; CHECK: $vgpr3 = COPY [[UV3]](s32) + ; CHECK: $vgpr4 = COPY [[UV4]](s32) + ; CHECK: $vgpr5 = COPY [[UV5]](s32) + ; CHECK: $vgpr6 = COPY [[UV6]](s32) + ; CHECK: $vgpr7 = COPY [[UV7]](s32) + ; CHECK: $vgpr8 = COPY [[UV8]](s32) + ; CHECK: $vgpr9 = COPY [[UV9]](s32) + ; CHECK: $vgpr10 = COPY [[UV10]](s32) + ; CHECK: $vgpr11 = COPY [[UV11]](s32) + ; CHECK: $vgpr12 = COPY [[UV12]](s32) + ; CHECK: $vgpr13 = COPY [[UV13]](s32) + ; CHECK: $vgpr14 = COPY [[UV14]](s32) + ; CHECK: $vgpr15 = COPY [[UV15]](s32) + ; CHECK: $vgpr16 = COPY [[UV16]](s32) + ; CHECK: $vgpr17 = COPY [[UV17]](s32) + ; CHECK: $vgpr18 = COPY [[UV18]](s32) + ; CHECK: $vgpr19 = COPY [[UV19]](s32) + ; CHECK: $vgpr20 = COPY [[UV20]](s32) + ; CHECK: $vgpr21 = COPY [[UV21]](s32) + ; CHECK: $vgpr22 = COPY [[UV22]](s32) + ; CHECK: $vgpr23 = COPY [[UV23]](s32) + ; CHECK: $vgpr24 = COPY [[UV24]](s32) + ; CHECK: $vgpr25 = COPY [[UV25]](s32) + ; CHECK: $vgpr26 = COPY [[UV26]](s32) + ; CHECK: $vgpr27 = COPY [[UV27]](s32) + ; CHECK: $vgpr28 = COPY [[UV28]](s32) + ; CHECK: $vgpr29 = COPY [[UV29]](s32) + ; CHECK: $vgpr30 = COPY [[UV30]](s32) + ; CHECK: $vgpr31 = COPY [[UV31]](s32) + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, i1022 addrspace(1)* undef ret i1022 %val } @@ -1290,31 +1476,32 @@ define zeroext i1022 @i1022_zeroext_func_void() #0 { define %struct.with.ptrs @ptr_in_struct_func_void() #0 { ; CHECK-LABEL: name: ptr_in_struct_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<32 x s32>) from `%struct.with.ptrs addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p1) :: (volatile load (p3) from `%struct.with.ptrs addrspace(1)* undef` + 128, align 128, addrspace 1) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p1) :: (volatile load (p1) from `%struct.with.ptrs addrspace(1)* undef` + 136, addrspace 1) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD2]](p1) :: (volatile load (<2 x p1>) from `%struct.with.ptrs addrspace(1)* undef` + 144, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CHECK-NEXT: G_STORE [[LOAD1]](p3), [[PTR_ADD3]](p5) :: (store (p3), align 128, addrspace 5) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CHECK-NEXT: G_STORE [[LOAD2]](p1), [[PTR_ADD4]](p5) :: (store (p1), addrspace 5) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CHECK-NEXT: G_STORE [[LOAD3]](<2 x p1>), [[PTR_ADD5]](p5) :: (store (<2 x p1>), addrspace 5) - ; CHECK-NEXT: SI_RETURN + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<32 x s32>) from `%struct.with.ptrs addrspace(1)* undef`, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p1) :: (volatile load (p3) from `%struct.with.ptrs addrspace(1)* undef` + 128, align 128, addrspace 1) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p1) :: (volatile load (p1) from `%struct.with.ptrs addrspace(1)* undef` + 136, addrspace 1) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; CHECK: [[LOAD3:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD2]](p1) :: (volatile load (<2 x p1>) from `%struct.with.ptrs addrspace(1)* undef` + 144, addrspace 1) + ; CHECK: G_STORE [[LOAD]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; CHECK: G_STORE [[LOAD1]](p3), [[PTR_ADD3]](p5) :: (store (p3), align 128, addrspace 5) + ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 + ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; CHECK: G_STORE [[LOAD2]](p1), [[PTR_ADD4]](p5) :: (store (p1), addrspace 5) + ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 + ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; CHECK: G_STORE [[LOAD3]](<2 x p1>), [[PTR_ADD5]](p5) :: (store (<2 x p1>), addrspace 5) + ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK: S_SETPC_B64_return [[COPY2]] %val = load volatile %struct.with.ptrs, %struct.with.ptrs addrspace(1)* undef ret %struct.with.ptrs %val } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll index 17a1a86cc649..420b9ef437b1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll @@ -16,89 +16,113 @@ define i32 addrspace(4)* @external_constant_got() { ; GCN-LABEL: name: external_constant_got ; GCN: bb.1 (%ir-block.0): + ; GCN: liveins: $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant + 4, target-flags(amdgpu-gotprel32-hi) @external_constant + 12, implicit-def $scc ; GCN: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p4) from got, addrspace 4) ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p4) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(4)* @external_constant } define i32 addrspace(1)* @external_global_got() { ; GCN-LABEL: name: external_global_got ; GCN: bb.1 (%ir-block.0): + ; GCN: liveins: $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_global + 4, target-flags(amdgpu-gotprel32-hi) @external_global + 12, implicit-def $scc ; GCN: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p1) from got, addrspace 4) ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p1) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(1)* @external_global } define i32 addrspace(999)* @external_other_got() { ; GCN-LABEL: name: external_other_got ; GCN: bb.1 (%ir-block.0): + ; GCN: liveins: $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_other + 4, target-flags(amdgpu-gotprel32-hi) @external_other + 12, implicit-def $scc ; GCN: [[LOAD:%[0-9]+]]:_(p999) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p999) from got, addrspace 4) ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](p999) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(999)* @external_other } define i32 addrspace(4)* @internal_constant_pcrel() { ; GCN-LABEL: name: internal_constant_pcrel ; GCN: bb.1 (%ir-block.0): + ; GCN: liveins: $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant + 4, target-flags(amdgpu-rel32-hi) @internal_constant + 12, implicit-def $scc ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p4) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(4)* @internal_constant } define i32 addrspace(1)* @internal_global_pcrel() { ; GCN-LABEL: name: internal_global_pcrel ; GCN: bb.1 (%ir-block.0): + ; GCN: liveins: $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p1) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_global + 4, target-flags(amdgpu-rel32-hi) @internal_global + 12, implicit-def $scc ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p1) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(1)* @internal_global } define i32 addrspace(999)* @internal_other_pcrel() { ; GCN-LABEL: name: internal_other_pcrel ; GCN: bb.1 (%ir-block.0): + ; GCN: liveins: $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p999) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_other + 4, target-flags(amdgpu-rel32-hi) @internal_other + 12, implicit-def $scc ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SI_PC_ADD_REL_OFFSET]](p999) ; GCN: $vgpr0 = COPY [[UV]](s32) ; GCN: $vgpr1 = COPY [[UV1]](s32) - ; GCN: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 ret i32 addrspace(999)* @internal_other } define i32 addrspace(6)* @external_constant32_got() { ; GCN-LABEL: name: external_constant32_got ; GCN: bb.1 (%ir-block.0): + ; GCN: liveins: $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant32 + 4, target-flags(amdgpu-gotprel32-hi) @external_constant32 + 12, implicit-def $scc ; GCN: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[SI_PC_ADD_REL_OFFSET]](p4) :: (dereferenceable invariant load (p4) from got, addrspace 4) ; GCN: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[LOAD]](p4), 0 ; GCN: $vgpr0 = COPY [[EXTRACT]](p6) - ; GCN: SI_RETURN implicit $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 ret i32 addrspace(6)* @external_constant32 } define i32 addrspace(6)* @internal_constant32_pcrel() { ; GCN-LABEL: name: internal_constant32_pcrel ; GCN: bb.1 (%ir-block.0): + ; GCN: liveins: $sgpr30_sgpr31 + ; GCN: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN: [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant32 + 4, target-flags(amdgpu-rel32-hi) @internal_constant32 + 12, implicit-def $scc ; GCN: [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0 ; GCN: $vgpr0 = COPY [[EXTRACT]](p6) - ; GCN: SI_RETURN implicit $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; GCN: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 ret i32 addrspace(6)* @internal_constant32 } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll index 5206284cf572..9ed3ec23a181 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll @@ -4,13 +4,15 @@ define float @test_atomicrmw_fadd(float addrspace(3)* %addr) { ; CHECK-LABEL: name: test_atomicrmw_fadd ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK-NEXT: [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[C]] :: (load store seq_cst (s32) on %ir.addr, addrspace 3) ; CHECK-NEXT: $vgpr0 = COPY [[ATOMICRMW_FADD]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 %oldval = atomicrmw fadd float addrspace(3)* %addr, float 1.0 seq_cst ret float %oldval } @@ -19,9 +21,10 @@ define float @test_atomicrmw_fsub(float addrspace(3)* %addr) { ; CHECK-LABEL: name: test_atomicrmw_fsub ; CHECK: bb.1 (%ir-block.0): ; CHECK-NEXT: successors: %bb.2(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load (s32) from %ir.addr, addrspace 3) @@ -30,8 +33,8 @@ define float @test_atomicrmw_fsub(float addrspace(3)* %addr) { ; CHECK-NEXT: bb.2.atomicrmw.start: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %8(s64), %bb.2, [[C1]](s64), %bb.1 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %6(s32), %bb.2 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI %9(s64), %bb.2, [[C1]](s64), %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %7(s32), %bb.2 ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[PHI1]], [[C]] ; CHECK-NEXT: [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst (s32) on %ir.2, addrspace 3) ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64) @@ -44,7 +47,8 @@ define float @test_atomicrmw_fsub(float addrspace(3)* %addr) { ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(s64) = G_PHI [[INT]](s64), %bb.2 ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[PHI2]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 %oldval = atomicrmw fsub float addrspace(3)* %addr, float 1.0 seq_cst ret float %oldval } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll index 7015bd8f9c96..2e647da9b5df 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-abi-attribute-hints.ll @@ -125,7 +125,7 @@ define amdgpu_kernel void @kernel_call_no_other_sgprs() { define void @func_call_no_workitem_ids() { ; CHECK-LABEL: name: func_call_no_workitem_ids ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr14 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr13 @@ -134,27 +134,29 @@ define void @func_call_no_workitem_ids() { ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s64) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY14]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY7]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY10]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY11]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY13]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY15]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY7]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY16]] call void @extern() "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" ret void } @@ -162,30 +164,32 @@ define void @func_call_no_workitem_ids() { define void @func_call_no_workgroup_ids() { ; CHECK-LABEL: name: func_call_no_workgroup_ids ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p4) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s64) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY10]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY5]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY6]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY7]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY8]](s64) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY9]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(p4) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p4) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s64) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY11]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY6]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY7]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY8]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY9]](s64) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY10]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY12]] call void @extern() "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" ret void } @@ -193,21 +197,23 @@ define void @func_call_no_workgroup_ids() { define void @func_call_no_other_sgprs() { ; CHECK-LABEL: name: func_call_no_other_sgprs ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr31, $sgpr8_sgpr9 + ; CHECK-NEXT: liveins: $vgpr31, $sgpr8_sgpr9, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @extern - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY2]](p4) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY3]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY3]](p4) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY4]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @extern, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr8_sgpr9, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY6]] call void @extern() "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll index 851e893bead7..00f2e4b620c2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll @@ -116,7 +116,7 @@ define amdgpu_kernel void @test_call_external_void_func_i32([17 x i8]) #0 { define void @test_func_call_external_void_func_i32() #0 { ; GFX900-LABEL: name: test_func_call_external_void_func_i32 ; GFX900: bb.1 (%ir-block.0): - ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -126,34 +126,36 @@ define void @test_func_call_external_void_func_i32() #0 { ; GFX900-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; GFX900-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX900-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX900-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99 ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX900-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX900-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX900-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GFX900-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX900-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX900-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[COPY15]](s32) + ; GFX900-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; GFX900-NEXT: $vgpr31 = COPY [[COPY16]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GFX900-NEXT: SI_RETURN + ; GFX900-NEXT: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; GFX900-NEXT: S_SETPC_B64_return [[COPY18]] ; GFX908-LABEL: name: test_func_call_external_void_func_i32 ; GFX908: bb.1 (%ir-block.0): - ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -163,31 +165,33 @@ define void @test_func_call_external_void_func_i32() #0 { ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; GFX908-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 99 ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i32 - ; GFX908-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX908-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX908-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX908-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: $vgpr0 = COPY [[C]](s32) - ; GFX908-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[COPY15]](s32) + ; GFX908-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; GFX908-NEXT: $vgpr31 = COPY [[COPY16]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GFX908-NEXT: SI_RETURN + ; GFX908-NEXT: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; GFX908-NEXT: S_SETPC_B64_return [[COPY18]] call void @external_void_func_i32(i32 99) ret void } @@ -374,7 +378,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32([17 x i8]) #0 { define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX900-LABEL: name: test_func_call_external_void_func_v32i32 ; GFX900: bb.1 (%ir-block.1): - ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX900-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GFX900-NEXT: {{ $}} ; GFX900-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX900-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -435,22 +439,23 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX900-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 ; GFX900-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32) ; GFX900-NEXT: [[TRUNC33:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC32]](s16) + ; GFX900-NEXT: [[COPY25:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX900-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX900-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; GFX900-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX900-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 - ; GFX900-NEXT: [[COPY25:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX900-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX900-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX900-NEXT: [[COPY28:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX900-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX900-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX900-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX900-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX900-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX900-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX900-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GFX900-NEXT: [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GFX900-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX900-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX900-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX900-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX900-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) - ; GFX900-NEXT: [[COPY33:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GFX900-NEXT: [[COPY34:%[0-9]+]]:_(p5) = COPY $sgpr32 ; GFX900-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY33]], [[C1]](s32) + ; GFX900-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY34]], [[C1]](s32) ; GFX900-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; GFX900-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX900-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -483,22 +488,23 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX900-NEXT: $vgpr28 = COPY [[UV28]](s32) ; GFX900-NEXT: $vgpr29 = COPY [[UV29]](s32) ; GFX900-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX900-NEXT: [[COPY34:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY34]](<4 x s32>) - ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY25]](p4) - ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY26]](p4) - ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY27]](p4) - ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY28]](s64) - ; GFX900-NEXT: $sgpr12 = COPY [[COPY29]](s32) - ; GFX900-NEXT: $sgpr13 = COPY [[COPY30]](s32) - ; GFX900-NEXT: $sgpr14 = COPY [[COPY31]](s32) - ; GFX900-NEXT: $vgpr31 = COPY [[COPY32]](s32) + ; GFX900-NEXT: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX900-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>) + ; GFX900-NEXT: $sgpr4_sgpr5 = COPY [[COPY26]](p4) + ; GFX900-NEXT: $sgpr6_sgpr7 = COPY [[COPY27]](p4) + ; GFX900-NEXT: $sgpr8_sgpr9 = COPY [[COPY28]](p4) + ; GFX900-NEXT: $sgpr10_sgpr11 = COPY [[COPY29]](s64) + ; GFX900-NEXT: $sgpr12 = COPY [[COPY30]](s32) + ; GFX900-NEXT: $sgpr13 = COPY [[COPY31]](s32) + ; GFX900-NEXT: $sgpr14 = COPY [[COPY32]](s32) + ; GFX900-NEXT: $vgpr31 = COPY [[COPY33]](s32) ; GFX900-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GFX900-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc - ; GFX900-NEXT: SI_RETURN + ; GFX900-NEXT: [[COPY36:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY25]] + ; GFX900-NEXT: S_SETPC_B64_return [[COPY36]] ; GFX908-LABEL: name: test_func_call_external_void_func_v32i32 ; GFX908: bb.1 (%ir-block.1): - ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GFX908-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GFX908-NEXT: {{ $}} ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -559,22 +565,23 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX908-NEXT: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr16 ; GFX908-NEXT: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[COPY24]](s32) ; GFX908-NEXT: [[TRUNC33:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC32]](s16) + ; GFX908-NEXT: [[COPY25:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX908-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX908-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; GFX908-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GFX908-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32 - ; GFX908-NEXT: [[COPY25:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GFX908-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GFX908-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GFX908-NEXT: [[COPY28:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GFX908-NEXT: [[COPY29:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GFX908-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GFX908-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GFX908-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GFX908-NEXT: [[COPY26:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GFX908-NEXT: [[COPY27:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GFX908-NEXT: [[COPY28:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GFX908-NEXT: [[COPY29:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GFX908-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GFX908-NEXT: [[COPY31:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GFX908-NEXT: [[COPY32:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GFX908-NEXT: [[COPY33:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GFX908-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) - ; GFX908-NEXT: [[COPY33:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GFX908-NEXT: [[COPY34:%[0-9]+]]:_(p5) = COPY $sgpr32 ; GFX908-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY33]], [[C1]](s32) + ; GFX908-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY34]], [[C1]](s32) ; GFX908-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; GFX908-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX908-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -607,19 +614,20 @@ define void @test_func_call_external_void_func_v32i32([17 x i8]) #0 { ; GFX908-NEXT: $vgpr28 = COPY [[UV28]](s32) ; GFX908-NEXT: $vgpr29 = COPY [[UV29]](s32) ; GFX908-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; GFX908-NEXT: [[COPY34:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY34]](<4 x s32>) - ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY25]](p4) - ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY26]](p4) - ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY27]](p4) - ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY28]](s64) - ; GFX908-NEXT: $sgpr12 = COPY [[COPY29]](s32) - ; GFX908-NEXT: $sgpr13 = COPY [[COPY30]](s32) - ; GFX908-NEXT: $sgpr14 = COPY [[COPY31]](s32) - ; GFX908-NEXT: $vgpr31 = COPY [[COPY32]](s32) + ; GFX908-NEXT: [[COPY35:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GFX908-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY35]](<4 x s32>) + ; GFX908-NEXT: $sgpr4_sgpr5 = COPY [[COPY26]](p4) + ; GFX908-NEXT: $sgpr6_sgpr7 = COPY [[COPY27]](p4) + ; GFX908-NEXT: $sgpr8_sgpr9 = COPY [[COPY28]](p4) + ; GFX908-NEXT: $sgpr10_sgpr11 = COPY [[COPY29]](s64) + ; GFX908-NEXT: $sgpr12 = COPY [[COPY30]](s32) + ; GFX908-NEXT: $sgpr13 = COPY [[COPY31]](s32) + ; GFX908-NEXT: $sgpr14 = COPY [[COPY32]](s32) + ; GFX908-NEXT: $vgpr31 = COPY [[COPY33]](s32) ; GFX908-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; GFX908-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc - ; GFX908-NEXT: SI_RETURN + ; GFX908-NEXT: [[COPY36:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY25]] + ; GFX908-NEXT: S_SETPC_B64_return [[COPY36]] call void @external_void_func_v32i32(<32 x i32> zeroinitializer) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll index cb06c4752520..67cb7683fbf5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll @@ -11,13 +11,17 @@ declare hidden amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_void ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] call amdgpu_gfx void @external_gfx_void_func_void() ret void } @@ -25,18 +29,20 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 { define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32 ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY3]] call amdgpu_gfx void @external_gfx_void_func_i32(i32 42) ret void } @@ -44,18 +50,20 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 { define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg) #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm_inreg ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr4 + ; CHECK-NEXT: liveins: $sgpr4, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg ; CHECK-NEXT: $sgpr4 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY3]] call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42) ret void } @@ -63,6 +71,9 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) @@ -75,11 +86,12 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val) @@ -89,6 +101,9 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32_inreg ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) @@ -101,11 +116,12 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) ; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32) ; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll index 1ae8ee55c787..17d8f497cbcd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -129,22 +129,24 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(i32 addrspace(1)* %out) #0 { ; GCN-LABEL: name: test_gfx_call_external_i32_func_i32_imm ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $vgpr0, $vgpr1 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_i32 ; GCN-NEXT: $vgpr0 = COPY [[C]](s32) - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY3]](s32), [[MV]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) - ; GCN-NEXT: SI_RETURN + ; GCN-NEXT: G_STORE [[COPY4]](s32), [[MV]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) + ; GCN-NEXT: [[COPY5:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY2]] + ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY5]] %val = call amdgpu_gfx i32 @external_gfx_i32_func_i32(i32 42) store volatile i32 %val, i32 addrspace(1)* %out ret void @@ -210,17 +212,21 @@ define amdgpu_kernel void @test_call_external_i1_func_void() #0 { define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 { ; GCN-LABEL: name: test_gfx_call_external_i1_func_void ; GCN: bb.1 (%ir-block.0): + ; GCN-NEXT: liveins: $sgpr30_sgpr31 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i1_func_void - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i1_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: SI_RETURN + ; GCN-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] + ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY3]] %val = call amdgpu_gfx i1 @external_gfx_i1_func_void() store volatile i1 %val, i1 addrspace(1)* undef ret void @@ -405,18 +411,22 @@ define amdgpu_kernel void @test_call_external_i8_func_void() #0 { define amdgpu_gfx void @test_gfx_call_external_i8_func_void() #0 { ; GCN-LABEL: name: test_gfx_call_external_i8_func_void ; GCN: bb.1 (%ir-block.0): + ; GCN-NEXT: liveins: $sgpr30_sgpr31 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i8_func_void - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i8_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) ; GCN-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: SI_RETURN + ; GCN-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] + ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY3]] %val = call amdgpu_gfx i8 @external_gfx_i8_func_void() store volatile i8 %val, i8 addrspace(1)* undef ret void @@ -776,16 +786,20 @@ define amdgpu_kernel void @test_call_external_i32_func_void() #0 { define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 { ; GCN-LABEL: name: test_gfx_call_external_i32_func_void ; GCN: bb.1 (%ir-block.0): + ; GCN-NEXT: liveins: $sgpr30_sgpr31 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_func_void - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY1]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: SI_RETURN + ; GCN-NEXT: G_STORE [[COPY2]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] + ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY3]] %val = call amdgpu_gfx i32 @external_gfx_i32_func_void() store volatile i32 %val, i32 addrspace(1)* undef ret void @@ -2495,21 +2509,25 @@ define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 { define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 { ; GCN-LABEL: name: test_gfx_call_external_i32_i64_func_void ; GCN: bb.1 (%ir-block.0): + ; GCN-NEXT: liveins: $sgpr30_sgpr31 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; GCN-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_i64_func_void - ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_i64_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[COPY2]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: G_STORE [[MV]](s64), [[COPY]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; GCN-NEXT: SI_RETURN + ; GCN-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: G_STORE [[MV]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; GCN-NEXT: [[COPY6:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] + ; GCN-NEXT: S_SETPC_B64_return_gfx [[COPY6]] %val = call amdgpu_gfx { i32, i64 } @external_gfx_i32_i64_func_void() %val.0 = extractvalue { i32, i64 } %val, 0 %val.1 = extractvalue { i32, i64 } %val, 1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index 9a7552591c5a..d81a40bfe6d0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -149,13 +149,17 @@ define amdgpu_kernel void @test_call_external_void_func_void() #0 { define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_void ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_void - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] call amdgpu_gfx void @external_gfx_void_func_void() ret void } @@ -163,7 +167,7 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_void() #0 { define void @test_func_call_external_void_func_void() #0 { ; CHECK-LABEL: name: test_func_call_external_void_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -173,29 +177,31 @@ define void @test_func_call_external_void_func_void() #0 { ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_void - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY18]] call void @external_void_func_void() ret void } @@ -883,18 +889,20 @@ define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 { define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32 ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY3]] call amdgpu_gfx void @external_gfx_void_func_i32(i32 42) ret void } @@ -902,18 +910,20 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm(i32) #0 { define amdgpu_gfx void @test_gfx_call_external_void_func_i32_imm_inreg(i32 inreg) #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_i32_imm_inreg ; CHECK: bb.1 (%ir-block.1): - ; CHECK-NEXT: liveins: $sgpr4 + ; CHECK-NEXT: liveins: $sgpr4, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_void_func_i32_inreg ; CHECK-NEXT: $sgpr4 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY3]] call amdgpu_gfx void @external_gfx_void_func_i32_inreg(i32 inreg 42) ret void } @@ -3866,6 +3876,9 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) @@ -3878,11 +3891,12 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT1]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val) @@ -3892,6 +3906,9 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 { define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #0 { ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32_inreg ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1) @@ -3904,11 +3921,12 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() # ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) ; CHECK-NEXT: $sgpr4 = COPY [[ANYEXT1]](s32) ; CHECK-NEXT: $sgpr5 = COPY [[LOAD2]](s32) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY2]] %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val) @@ -3989,7 +4007,7 @@ declare void @void_func_byval_a3i32_byval_i8_align32([3 x i32] addrspace(5)* byv define void @call_byval_3ai32_byval_i8_align32([3 x i32] addrspace(5)* %incoming0, i8 addrspace(5)* align 32 %incoming1) #0 { ; CHECK-LABEL: name: call_byval_3ai32_byval_i8_align32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4001,40 +4019,42 @@ define void @call_byval_3ai32_byval_i8_align32([3 x i32] addrspace(5)* %incoming ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p5) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 999 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a3i32_byval_i8_align32 - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY18]], [[C1]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C2]](s32), 0 :: (dereferenceable store (s96) into stack, align 4, addrspace 5), (dereferenceable load (s96) from %ir.incoming0, align 4, addrspace 5) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY18]], [[C3]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY19]], [[C3]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: G_MEMCPY [[PTR_ADD1]](p5), [[COPY9]](p5), [[C4]](s32), 0 :: (dereferenceable store (s8) into stack + 32, align 32, addrspace 5), (dereferenceable load (s8) from %ir.incoming1, align 32, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[C]](s32) - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY17]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY18]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a3i32_byval_i8_align32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY21]] call void @void_func_byval_a3i32_byval_i8_align32([3 x i32] addrspace(5)* byval([3 x i32]) %incoming0, i8 addrspace(5)* align 32 %incoming1, i32 999) ret void } @@ -4046,7 +4066,7 @@ declare void @void_func_byval_a4i64_align4([4 x i64] addrspace(5)* byval([4 x i6 define void @call_byval_a4i64_align4_higher_source_align([4 x i64] addrspace(5)* align 256 %incoming_high_align) #0 { ; CHECK-LABEL: name: call_byval_a4i64_align4_higher_source_align ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4057,34 +4077,36 @@ define void @call_byval_a4i64_align4_higher_source_align([4 x i64] addrspace(5)* ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @void_func_byval_a4i64_align4 - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY18]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 ; CHECK-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY8]](p5), [[C1]](s32), 0 :: (dereferenceable store (s256) into stack, align 4, addrspace 5), (dereferenceable load (s256) from %ir.incoming_high_align, align 256, addrspace 5) - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY17]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 32, implicit-def $scc - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY9]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY20]] call void @void_func_byval_a4i64_align4([4 x i64] addrspace(5)* byval([4 x i64]) align 4 %incoming_high_align) ret void } @@ -4582,7 +4604,7 @@ entry: define void @stack_12xv3i32() #0 { ; CHECK-LABEL: name: stack_12xv3i32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4592,6 +4614,7 @@ define void @stack_12xv3i32() #0 { ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -4622,14 +4645,14 @@ define void @stack_12xv3i32() #0 { ; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3i32 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>) ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>) @@ -4641,22 +4664,22 @@ define void @stack_12xv3i32() #0 { ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>) ; CHECK-NEXT: [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>) ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C16]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C17]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK-NEXT: [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C18]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C19]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C20]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -4689,19 +4712,20 @@ define void @stack_12xv3i32() #0 { ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY19]] entry: call void @external_void_func_12xv3i32( <3 x i32> , @@ -4722,7 +4746,7 @@ entry: define void @stack_12xv3f32() #0 { ; CHECK-LABEL: name: stack_12xv3f32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4732,6 +4756,7 @@ define void @stack_12xv3f32() #0 { ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 @@ -4762,14 +4787,14 @@ define void @stack_12xv3f32() #0 { ; CHECK-NEXT: [[BUILD_VECTOR11:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_12xv3f32 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s32>) ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<3 x s32>) ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<3 x s32>) @@ -4781,22 +4806,22 @@ define void @stack_12xv3f32() #0 { ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR8]](<3 x s32>) ; CHECK-NEXT: [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR9]](<3 x s32>) ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR10]](<3 x s32>) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C16]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C17]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK-NEXT: [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR11]](<3 x s32>) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C18]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C19]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C20]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -4829,19 +4854,20 @@ define void @stack_12xv3f32() #0 { ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_12xv3f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 20, implicit-def $scc - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY19]] entry: call void @external_void_func_12xv3f32( <3 x float> , @@ -4862,7 +4888,7 @@ entry: define void @stack_8xv5i32() #0 { ; CHECK-LABEL: name: stack_8xv5i32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -4872,6 +4898,7 @@ define void @stack_8xv5i32() #0 { ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 @@ -4898,14 +4925,14 @@ define void @stack_8xv5i32() #0 { ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5i32 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>) ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>) @@ -4913,34 +4940,34 @@ define void @stack_8xv5i32() #0 { ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>) ; CHECK-NEXT: [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>) ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C16]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C17]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C18]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C19]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK-NEXT: [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C20]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C21]](s32) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C21]](s32) ; CHECK-NEXT: G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store (s32) into stack + 20, addrspace 5) ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C22]](s32) + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C22]](s32) ; CHECK-NEXT: G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store (s32) into stack + 24, align 8, addrspace 5) ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C23]](s32) + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C23]](s32) ; CHECK-NEXT: G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store (s32) into stack + 28, addrspace 5) ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C24]](s32) + ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C24]](s32) ; CHECK-NEXT: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -4973,19 +5000,20 @@ define void @stack_8xv5i32() #0 { ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY19]] entry: call void @external_void_func_8xv5i32( <5 x i32> , @@ -5002,7 +5030,7 @@ entry: define void @stack_8xv5f32() #0 { ; CHECK-LABEL: name: stack_8xv5f32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -5012,6 +5040,7 @@ define void @stack_8xv5f32() #0 { ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 @@ -5038,14 +5067,14 @@ define void @stack_8xv5f32() #0 { ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[C11]](s32), [[C12]](s32), [[C13]](s32), [[C14]](s32), [[C15]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_8xv5f32 - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<5 x s32>) ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<5 x s32>) ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR2]](<5 x s32>) @@ -5053,34 +5082,34 @@ define void @stack_8xv5f32() #0 { ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR4]](<5 x s32>) ; CHECK-NEXT: [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR5]](<5 x s32>) ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR6]](<5 x s32>) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(p5) = COPY $sgpr32 ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C16]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C16]](s32) ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C17]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C17]](s32) ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C18]](s32) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C18]](s32) ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C19]](s32) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C19]](s32) ; CHECK-NEXT: G_STORE [[UV34]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 12, addrspace 5) ; CHECK-NEXT: [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR7]](<5 x s32>) ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C20]](s32) + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C20]](s32) ; CHECK-NEXT: G_STORE [[UV35]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 16, align 16, addrspace 5) ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C21]](s32) + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C21]](s32) ; CHECK-NEXT: G_STORE [[UV36]](s32), [[PTR_ADD5]](p5) :: (store (s32) into stack + 20, addrspace 5) ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C22]](s32) + ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C22]](s32) ; CHECK-NEXT: G_STORE [[UV37]](s32), [[PTR_ADD6]](p5) :: (store (s32) into stack + 24, align 8, addrspace 5) ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C23]](s32) + ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C23]](s32) ; CHECK-NEXT: G_STORE [[UV38]](s32), [[PTR_ADD7]](p5) :: (store (s32) into stack + 28, addrspace 5) ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY16]], [[C24]](s32) + ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY17]], [[C24]](s32) ; CHECK-NEXT: G_STORE [[UV39]](s32), [[PTR_ADD8]](p5) :: (store (s32) into stack + 32, align 16, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) @@ -5113,19 +5142,20 @@ define void @stack_8xv5f32() #0 { ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_8xv5f32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 36, implicit-def $scc - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY19]] entry: call void @external_void_func_8xv5f32( <5 x float> , diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll index 138e66ffd6c7..78ecae87d06f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll @@ -6,17 +6,21 @@ define i32 @test() { ; CHECK-LABEL: name: test ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 ; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[C]](s32) ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[INTTOPTR]](p0), [[GV]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[ZEXT]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ZEXT]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) - ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY4]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0 ret i32 bitcast (<1 x i32> bitcast (i32 zext (i1 icmp eq (i32* @var, i32* inttoptr (i32 -1 to i32*)) to i32) to <1 x i32>), i64 0)> to i32) } @@ -62,6 +66,9 @@ define amdgpu_kernel void @constantexpr_select_1() { define i32 @test_fcmp_constexpr() { ; CHECK-LABEL: name: test_fcmp_constexpr ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[C]](s64) @@ -72,7 +79,8 @@ define i32 @test_fcmp_constexpr() { ; CHECK-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oeq), [[UITOFP]](s32), [[C1]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FCMP]](s1) ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 entry: ret i32 zext (i1 fcmp oeq (float uitofp (i1 icmp eq (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @a, i64 0, i64 1), i32* @var) to float), float 0.000000e+00) to i32) } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll index e01d9c399e93..10218b9a88d0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll @@ -4,13 +4,15 @@ define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_strict ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = G_STRICT_FADD [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") ret float %val } @@ -18,13 +20,15 @@ define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) #0 { define float @v_constained_fadd_f32_fpexcept_strict_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_strict_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = nsz G_STRICT_FADD [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict") ret float %val } @@ -32,13 +36,15 @@ define float @v_constained_fadd_f32_fpexcept_strict_flags(float %x, float %y) #0 define float @v_constained_fadd_f32_fpexcept_ignore(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_ignore ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %2:_(s32) = nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %2(s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: %3:_(s32) = nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -46,13 +52,15 @@ define float @v_constained_fadd_f32_fpexcept_ignore(float %x, float %y) #0 { define float @v_constained_fadd_f32_fpexcept_ignore_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %2(s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -60,13 +68,15 @@ define float @v_constained_fadd_f32_fpexcept_ignore_flags(float %x, float %y) #0 define float @v_constained_fadd_f32_fpexcept_maytrap(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_f32_fpexcept_maytrap ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(s32) = G_STRICT_FADD [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FADD]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") ret float %val } @@ -74,7 +84,7 @@ define float @v_constained_fadd_f32_fpexcept_maytrap(float %x, float %y) #0 { define <2 x float> @v_constained_fadd_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_v2f32_fpexcept_strict ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -82,11 +92,13 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_strict(<2 x float> %x, <2 x ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(<2 x s32>) = G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") ret <2 x float> %val } @@ -94,7 +106,7 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_strict(<2 x float> %x, <2 x define <2 x float> @v_constained_fadd_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_v2f32_fpexcept_ignore ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -102,11 +114,13 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_ignore(<2 x float> %x, <2 x ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK-NEXT: %6:_(<2 x s32>) = nofpexcept G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %6(<2 x s32>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: %7:_(<2 x s32>) = nofpexcept G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %7(<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x float> %val } @@ -114,7 +128,7 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_ignore(<2 x float> %x, <2 x define <2 x float> @v_constained_fadd_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) #0 { ; CHECK-LABEL: name: v_constained_fadd_v2f32_fpexcept_maytrap ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -122,11 +136,13 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_maytrap(<2 x float> %x, <2 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FADD:%[0-9]+]]:_(<2 x s32>) = G_STRICT_FADD [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[STRICT_FADD]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") ret <2 x float> %val } @@ -134,13 +150,15 @@ define <2 x float> @v_constained_fadd_v2f32_fpexcept_maytrap(<2 x float> %x, <2 define float @v_constained_fsub_f32_fpexcept_ignore_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fsub_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FSUB [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %2(s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FSUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fsub.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -148,13 +166,15 @@ define float @v_constained_fsub_f32_fpexcept_ignore_flags(float %x, float %y) #0 define float @v_constained_fmul_f32_fpexcept_ignore_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fmul_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FMUL [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %2(s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FMUL [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fmul.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -162,13 +182,15 @@ define float @v_constained_fmul_f32_fpexcept_ignore_flags(float %x, float %y) #0 define float @v_constained_fdiv_f32_fpexcept_ignore_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_fdiv_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FDIV [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %2(s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FDIV [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fdiv.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -176,13 +198,15 @@ define float @v_constained_fdiv_f32_fpexcept_ignore_flags(float %x, float %y) #0 define float @v_constained_frem_f32_fpexcept_ignore_flags(float %x, float %y) #0 { ; CHECK-LABEL: name: v_constained_frem_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: %2:_(s32) = nsz nofpexcept G_STRICT_FREM [[COPY]], [[COPY1]] - ; CHECK-NEXT: $vgpr0 = COPY %2(s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FREM [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY %3(s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.frem.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -190,14 +214,16 @@ define float @v_constained_frem_f32_fpexcept_ignore_flags(float %x, float %y) #0 define float @v_constained_fma_f32_fpexcept_ignore_flags(float %x, float %y, float %z) #0 { ; CHECK-LABEL: name: v_constained_fma_f32_fpexcept_ignore_flags ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: %3:_(s32) = nsz nofpexcept G_STRICT_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; CHECK-NEXT: $vgpr0 = COPY %3(s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: %4:_(s32) = nsz nofpexcept G_STRICT_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; CHECK-NEXT: $vgpr0 = COPY %4(s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %val = call nsz float @llvm.experimental.constrained.fma.f32(float %x, float %y, float %z, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret float %val } @@ -205,12 +231,14 @@ define float @v_constained_fma_f32_fpexcept_ignore_flags(float %x, float %y, flo define float @v_constained_sqrt_f32_fpexcept_strict(float %x) #0 { ; CHECK-LABEL: name: v_constained_sqrt_f32_fpexcept_strict ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[STRICT_FSQRT:%[0-9]+]]:_(s32) = G_STRICT_FSQRT [[COPY]] ; CHECK-NEXT: $vgpr0 = COPY [[STRICT_FSQRT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 %val = call float @llvm.experimental.constrained.sqrt.f32(float %x, metadata !"round.tonearest", metadata !"fpexcept.strict") ret float %val } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll index af0dd868c909..ceba50dc1b7b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll @@ -7,7 +7,7 @@ define void @void_a31i32_i32([31 x i32] %arg0, i32 %arg1) { ; FIXED-LABEL: name: void_a31i32_i32 ; FIXED: bb.1 (%ir-block.0): - ; FIXED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; FIXED-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; FIXED-NEXT: {{ $}} ; FIXED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; FIXED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -42,9 +42,11 @@ define void @void_a31i32_i32([31 x i32] %arg0, i32 %arg1) { ; FIXED-NEXT: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 ; FIXED-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; FIXED-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) + ; FIXED-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; FIXED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; FIXED-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; FIXED-NEXT: SI_RETURN + ; FIXED-NEXT: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] + ; FIXED-NEXT: S_SETPC_B64_return [[COPY32]] store i32 %arg1, i32 addrspace(1)* undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll index fa5daf763c3e..70a41f9dafaa 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -8,12 +8,14 @@ define void @void_func_empty_arg({} %arg0, i32 %arg1) #0 { ; CHECK-LABEL: name: void_func_empty_arg ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] store i32 %arg1, i32 addrspace(1)* undef ret void } @@ -21,12 +23,14 @@ define void @void_func_empty_arg({} %arg0, i32 %arg1) #0 { define void @void_func_empty_array([0 x i8] %arg0, i32 %arg1) #0 { ; CHECK-LABEL: name: void_func_empty_array ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] store i32 %arg1, i32 addrspace(1)* undef ret void } @@ -34,13 +38,15 @@ define void @void_func_empty_array([0 x i8] %arg0, i32 %arg1) #0 { define void @void_func_i1(i1 %arg0) #0 { ; CHECK-LABEL: name: void_func_i1 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (store (s1) into `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] store i1 %arg0, i1 addrspace(1)* undef ret void } @@ -48,17 +54,19 @@ define void @void_func_i1(i1 %arg0) #0 { define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i1_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] %ext = zext i1 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -68,17 +76,19 @@ define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 { define void @void_func_i1_signext(i1 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i1_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] %ext = sext i1 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -89,10 +99,11 @@ define void @i1_arg_i1_use(i1 %arg) #0 { ; CHECK-LABEL: name: i1_arg_i1_use ; CHECK: bb.1.bb: ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF @@ -109,7 +120,8 @@ define void @i1_arg_i1_use(i1 %arg) #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.bb2: ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INT1]](s64) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] bb: br i1 %arg, label %bb2, label %bb1 @@ -124,14 +136,16 @@ bb2: define void @void_func_i8(i8 %arg0) #0 { ; CHECK-LABEL: name: void_func_i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] store i8 %arg0, i8 addrspace(1)* undef ret void } @@ -139,17 +153,19 @@ define void @void_func_i8(i8 %arg0) #0 { define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i8_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 8 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] %ext = zext i8 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -159,17 +175,19 @@ define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 { define void @void_func_i8_signext(i8 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i8_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 8 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] %ext = sext i8 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -179,13 +197,15 @@ define void @void_func_i8_signext(i8 signext %arg0) #0 { define void @void_func_i16(i16 %arg0) #0 { ; CHECK-LABEL: name: void_func_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] store i16 %arg0, i16 addrspace(1)* undef ret void } @@ -193,17 +213,19 @@ define void @void_func_i16(i16 %arg0) #0 { define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i16_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 16 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]] ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] %ext = zext i16 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -213,17 +235,19 @@ define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 { define void @void_func_i16_signext(i16 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i16_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 16 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]] ; CHECK-NEXT: G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] %ext = sext i16 %arg0 to i32 %add = add i32 %ext, 12 store i32 %add, i32 addrspace(1)* undef @@ -233,13 +257,15 @@ define void @void_func_i16_signext(i16 signext %arg0) #0 { define void @void_func_i24(i24 %arg0) #0 { ; CHECK-LABEL: name: void_func_i24 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] store i24 %arg0, i24 addrspace(1)* undef ret void } @@ -247,14 +273,16 @@ define void @void_func_i24(i24 %arg0) #0 { define void @void_func_i24_zeroext(i24 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i24_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 24 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] store i24 %arg0, i24 addrspace(1)* undef ret void } @@ -262,14 +290,16 @@ define void @void_func_i24_zeroext(i24 zeroext %arg0) #0 { define void @void_func_i24_signext(i24 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i24_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 24 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_SEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] store i24 %arg0, i24 addrspace(1)* undef ret void } @@ -277,12 +307,14 @@ define void @void_func_i24_signext(i24 signext %arg0) #0 { define void @void_func_i32(i32 %arg0) #0 { ; CHECK-LABEL: name: void_func_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] store i32 %arg0, i32 addrspace(1)* undef ret void } @@ -291,12 +323,14 @@ define void @void_func_i32(i32 %arg0) #0 { define void @void_func_i32_signext(i32 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i32_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] store i32 %arg0, i32 addrspace(1)* undef ret void } @@ -305,12 +339,14 @@ define void @void_func_i32_signext(i32 signext %arg0) #0 { define void @void_func_i32_zeroext(i32 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i32_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] store i32 %arg0, i32 addrspace(1)* undef ret void } @@ -318,12 +354,14 @@ define void @void_func_i32_zeroext(i32 zeroext %arg0) #0 { define void @void_func_p3i8(i8 addrspace(3)* %arg0) #0 { ; CHECK-LABEL: name: void_func_p3i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[COPY]](p3), [[DEF]](p1) :: (store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] store i8 addrspace(3)* %arg0, i8 addrspace(3)* addrspace(1)* undef ret void } @@ -331,15 +369,17 @@ define void @void_func_p3i8(i8 addrspace(3)* %arg0) #0 { define void @void_func_i48(i48 %arg0) #0 { ; CHECK-LABEL: name: void_func_i48 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (store (s48) into `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] store i48 %arg0, i48 addrspace(1)* undef ret void } @@ -347,18 +387,20 @@ define void @void_func_i48(i48 %arg0) #0 { define void @void_func_i48_zeroext(i48 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i48_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ZEXT]], [[C]] ; CHECK-NEXT: G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] %ext = zext i48 %arg0 to i64 %add = add i64 %ext, 12 store i64 %add, i64 addrspace(1)* undef @@ -368,18 +410,20 @@ define void @void_func_i48_zeroext(i48 zeroext %arg0) #0 { define void @void_func_i48_signext(i48 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i48_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[SEXT]], [[C]] ; CHECK-NEXT: G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] %ext = sext i48 %arg0 to i64 %add = add i64 %ext, 12 store i64 %add, i64 addrspace(1)* undef @@ -389,14 +433,16 @@ define void @void_func_i48_signext(i48 signext %arg0) #0 { define void @void_func_i64(i64 %arg0) #0 { ; CHECK-LABEL: name: void_func_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] store i64 %arg0, i64 addrspace(1)* undef ret void } @@ -404,16 +450,18 @@ define void @void_func_i64(i64 %arg0) #0 { define void @void_func_i95(i95 %arg0) #0 { ; CHECK-LABEL: name: void_func_i95 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s95), [[DEF]](p1) :: (store (s95) into `i95 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] store i95 %arg0, i95 addrspace(1)* undef ret void } @@ -421,19 +469,21 @@ define void @void_func_i95(i95 %arg0) #0 { define void @void_func_i95_zeroext(i95 zeroext %arg0) #0 { ; CHECK-LABEL: name: void_func_i95_zeroext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[TRUNC]](s95) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s96) = G_ADD [[ZEXT]], [[C]] ; CHECK-NEXT: G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] %ext = zext i95 %arg0 to i96 %add = add i96 %ext, 12 store i96 %add, i96 addrspace(1)* undef @@ -443,19 +493,21 @@ define void @void_func_i95_zeroext(i95 zeroext %arg0) #0 { define void @void_func_i95_signext(i95 signext %arg0) #0 { ; CHECK-LABEL: name: void_func_i95_signext ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s96) = G_CONSTANT i96 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[TRUNC]](s95) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s96) = G_ADD [[SEXT]], [[C]] ; CHECK-NEXT: G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] %ext = sext i95 %arg0 to i96 %add = add i96 %ext, 12 store i96 %add, i96 addrspace(1)* undef @@ -465,15 +517,17 @@ define void @void_func_i95_signext(i95 signext %arg0) #0 { define void @void_func_i96(i96 %arg0) #0 { ; CHECK-LABEL: name: void_func_i96 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] store i96 %arg0, i96 addrspace(1)* undef ret void } @@ -481,14 +535,16 @@ define void @void_func_i96(i96 %arg0) #0 { define void @void_func_p0i8(i8* %arg0) #0 { ; CHECK-LABEL: name: void_func_p0i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](p0), [[DEF]](p1) :: (store (p0) into `i8* addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] store i8* %arg0, i8* addrspace(1)* undef ret void } @@ -496,14 +552,16 @@ define void @void_func_p0i8(i8* %arg0) #0 { define void @void_func_p1i8(i8 addrspace(1)* %arg0) #0 { ; CHECK-LABEL: name: void_func_p1i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](p1), [[DEF]](p1) :: (store (p1) into `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] store i8 addrspace(1)* %arg0, i8 addrspace(1)* addrspace(1)* undef ret void } @@ -511,13 +569,15 @@ define void @void_func_p1i8(i8 addrspace(1)* %arg0) #0 { define void @void_func_f16(half %arg0) #0 { ; CHECK-LABEL: name: void_func_f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `half addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] store half %arg0, half addrspace(1)* undef ret void } @@ -525,12 +585,14 @@ define void @void_func_f16(half %arg0) #0 { define void @void_func_f32(float %arg0) #0 { ; CHECK-LABEL: name: void_func_f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `float addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] store float %arg0, float addrspace(1)* undef ret void } @@ -538,14 +600,16 @@ define void @void_func_f32(float %arg0) #0 { define void @void_func_f64(double %arg0) #0 { ; CHECK-LABEL: name: void_func_f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `double addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] store double %arg0, double addrspace(1)* undef ret void } @@ -553,14 +617,16 @@ define void @void_func_f64(double %arg0) #0 { define void @void_func_v2i32(<2 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] store <2 x i32> %arg0, <2 x i32> addrspace(1)* undef ret void } @@ -568,15 +634,17 @@ define void @void_func_v2i32(<2 x i32> %arg0) #0 { define void @void_func_v2i24(<2 x i24> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2i24 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s24>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](<2 x s24>), [[DEF]](p1) :: (store (<2 x s24>) into `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] store <2 x i24> %arg0, <2 x i24> addrspace(1)* undef ret void } @@ -584,16 +652,18 @@ define void @void_func_v2i24(<2 x i24> %arg0) #0 { define void @void_func_v3i24(<3 x i24> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3i24 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s24>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC]](<3 x s24>), [[DEF]](p1) :: (store (<3 x s24>) into `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] store <3 x i24> %arg0, <3 x i24> addrspace(1)* undef ret void } @@ -601,7 +671,7 @@ define void @void_func_v3i24(<3 x i24> %arg0) #0 { define void @void_func_v2i8(<2 x i8> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -609,9 +679,11 @@ define void @void_func_v2i8(<2 x i8> %arg0) #0 { ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s16>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC2]](<2 x s8>), [[DEF]](p1) :: (store (<2 x s8>) into `<2 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] store <2 x i8> %arg0, <2 x i8> addrspace(1)* undef ret void } @@ -619,7 +691,7 @@ define void @void_func_v2i8(<2 x i8> %arg0) #0 { define void @void_func_v3i8(<3 x i8> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -629,9 +701,11 @@ define void @void_func_v3i8(<3 x i8> %arg0) #0 { ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s16>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC3]](<3 x s8>), [[DEF]](p1) :: (store (<3 x s8>) into `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] store <3 x i8> %arg0, <3 x i8> addrspace(1)* undef ret void } @@ -639,7 +713,7 @@ define void @void_func_v3i8(<3 x i8> %arg0) #0 { define void @void_func_v4i8(<4 x i8> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -651,9 +725,11 @@ define void @void_func_v4i8(<4 x i8> %arg0) #0 { ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16) ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s16>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC4]](<4 x s8>), [[DEF]](p1) :: (store (<4 x s8>) into `<4 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]] store <4 x i8> %arg0, <4 x i8> addrspace(1)* undef ret void } @@ -661,14 +737,16 @@ define void @void_func_v4i8(<4 x i8> %arg0) #0 { define void @void_func_v2p3i8(<2 x i8 addrspace(3)*> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2p3i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY]](p3), [[COPY1]](p3) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] store <2 x i8 addrspace(3)*> %arg0, <2 x i8 addrspace(3)*> addrspace(1)* undef ret void } @@ -676,15 +754,17 @@ define void @void_func_v2p3i8(<2 x i8 addrspace(3)*> %arg0) #0 { define void @void_func_v3i32(<3 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] store <3 x i32> %arg0, <3 x i32> addrspace(1)* undef ret void } @@ -692,16 +772,18 @@ define void @void_func_v3i32(<3 x i32> %arg0) #0 { define void @void_func_v4i32(<4 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]] store <4 x i32> %arg0, <4 x i32> addrspace(1)* undef ret void } @@ -709,7 +791,7 @@ define void @void_func_v4i32(<4 x i32> %arg0) #0 { define void @void_func_v5i32(<5 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v5i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -717,9 +799,11 @@ define void @void_func_v5i32(<5 x i32> %arg0) #0 { ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (store (<5 x s32>) into `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY6]] store <5 x i32> %arg0, <5 x i32> addrspace(1)* undef ret void } @@ -727,7 +811,7 @@ define void @void_func_v5i32(<5 x i32> %arg0) #0 { define void @void_func_v8i32(<8 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -738,9 +822,11 @@ define void @void_func_v8i32(<8 x i32> %arg0) #0 { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY9]] store <8 x i32> %arg0, <8 x i32> addrspace(1)* undef ret void } @@ -748,7 +834,7 @@ define void @void_func_v8i32(<8 x i32> %arg0) #0 { define void @void_func_v16i32(<16 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -767,9 +853,11 @@ define void @void_func_v16i32(<16 x i32> %arg0) #0 { ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY17]] store <16 x i32> %arg0, <16 x i32> addrspace(1)* undef ret void } @@ -777,7 +865,7 @@ define void @void_func_v16i32(<16 x i32> %arg0) #0 { define void @void_func_v32i32(<32 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v32i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -813,9 +901,11 @@ define void @void_func_v32i32(<32 x i32> %arg0) #0 { ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32) + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY32]] store <32 x i32> %arg0, <32 x i32> addrspace(1)* undef ret void } @@ -824,7 +914,7 @@ define void @void_func_v32i32(<32 x i32> %arg0) #0 { define void @void_func_v33i32(<33 x i32> %arg0) #0 { ; CHECK-LABEL: name: void_func_v33i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -862,9 +952,11 @@ define void @void_func_v33i32(<33 x i32> %arg0) #0 { ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32), [[LOAD1]](s32) + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<33 x s32>), [[DEF]](p1) :: (store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 256, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY32]] store <33 x i32> %arg0, <33 x i32> addrspace(1)* undef ret void } @@ -872,7 +964,7 @@ define void @void_func_v33i32(<33 x i32> %arg0) #0 { define void @void_func_v2i64(<2 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -881,9 +973,11 @@ define void @void_func_v2i64(<2 x i64> %arg0) #0 { ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]] store <2 x i64> %arg0, <2 x i64> addrspace(1)* undef ret void } @@ -891,7 +985,7 @@ define void @void_func_v2i64(<2 x i64> %arg0) #0 { define void @void_func_v2p0i8(<2 x i8*> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2p0i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -900,9 +994,11 @@ define void @void_func_v2p0i8(<2 x i8*> %arg0) #0 { ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[MV]](p0), [[MV1]](p0) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p0>), [[DEF]](p1) :: (store (<2 x p0>) into `<2 x i8*> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]] store <2 x i8*> %arg0, <2 x i8*> addrspace(1)* undef ret void } @@ -910,7 +1006,7 @@ define void @void_func_v2p0i8(<2 x i8*> %arg0) #0 { define void @void_func_v2p1i8(<2 x i8 addrspace(1)*> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2p1i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -919,9 +1015,11 @@ define void @void_func_v2p1i8(<2 x i8 addrspace(1)*> %arg0) #0 { ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]] store <2 x i8 addrspace(1)*> %arg0, <2 x i8 addrspace(1)*> addrspace(1)* undef ret void } @@ -929,7 +1027,7 @@ define void @void_func_v2p1i8(<2 x i8 addrspace(1)*> %arg0) #0 { define void @void_func_v3i64(<3 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -941,9 +1039,11 @@ define void @void_func_v3i64(<3 x i64> %arg0) #0 { ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x i64> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY7]] store <3 x i64> %arg0, <3 x i64> addrspace(1)* undef ret void } @@ -951,7 +1051,7 @@ define void @void_func_v3i64(<3 x i64> %arg0) #0 { define void @void_func_v4i64(<4 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -966,9 +1066,11 @@ define void @void_func_v4i64(<4 x i64> %arg0) #0 { ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY9]] store <4 x i64> %arg0, <4 x i64> addrspace(1)* undef ret void } @@ -976,7 +1078,7 @@ define void @void_func_v4i64(<4 x i64> %arg0) #0 { define void @void_func_v5i64(<5 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v5i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -994,9 +1096,11 @@ define void @void_func_v5i64(<5 x i64> %arg0) #0 { ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s64>), [[DEF]](p1) :: (store (<5 x s64>) into `<5 x i64> addrspace(1)* undef`, align 64, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY11]] store <5 x i64> %arg0, <5 x i64> addrspace(1)* undef ret void } @@ -1004,7 +1108,7 @@ define void @void_func_v5i64(<5 x i64> %arg0) #0 { define void @void_func_v8i64(<8 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1031,9 +1135,11 @@ define void @void_func_v8i64(<8 x i64> %arg0) #0 { ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY17]] store <8 x i64> %arg0, <8 x i64> addrspace(1)* undef ret void } @@ -1041,7 +1147,7 @@ define void @void_func_v8i64(<8 x i64> %arg0) #0 { define void @void_func_v16i64(<16 x i64> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1093,9 +1199,11 @@ define void @void_func_v16i64(<16 x i64> %arg0) #0 { ; CHECK-NEXT: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32) ; CHECK-NEXT: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY32]] store <16 x i64> %arg0, <16 x i64> addrspace(1)* undef ret void } @@ -1103,12 +1211,14 @@ define void @void_func_v16i64(<16 x i64> %arg0) #0 { define void @void_func_v2i16(<2 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] store <2 x i16> %arg0, <2 x i16> addrspace(1)* undef ret void } @@ -1116,16 +1226,18 @@ define void @void_func_v2i16(<2 x i16> %arg0) #0 { define void @void_func_v3i16(<3 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] store <3 x i16> %arg0, <3 x i16> addrspace(1)* undef ret void } @@ -1133,14 +1245,16 @@ define void @void_func_v3i16(<3 x i16> %arg0) #0 { define void @void_func_v4i16(<4 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] store <4 x i16> %arg0, <4 x i16> addrspace(1)* undef ret void } @@ -1148,7 +1262,7 @@ define void @void_func_v4i16(<4 x i16> %arg0) #0 { define void @void_func_v5i16(<5 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v5i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1156,9 +1270,11 @@ define void @void_func_v5i16(<5 x i16> %arg0) #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[UV]](<5 x s16>), [[DEF1]](p1) :: (store (<5 x s16>) into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] store <5 x i16> %arg0, <5 x i16> addrspace(1)* undef ret void } @@ -1166,16 +1282,18 @@ define void @void_func_v5i16(<5 x i16> %arg0) #0 { define void @void_func_v8i16(<8 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]] store <8 x i16> %arg0, <8 x i16> addrspace(1)* undef ret void } @@ -1183,7 +1301,7 @@ define void @void_func_v8i16(<8 x i16> %arg0) #0 { define void @void_func_v16i16(<16 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1194,9 +1312,11 @@ define void @void_func_v16i16(<16 x i16> %arg0) #0 { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY9]] store <16 x i16> %arg0, <16 x i16> addrspace(1)* undef ret void } @@ -1206,7 +1326,7 @@ define void @void_func_v16i16(<16 x i16> %arg0) #0 { define void @void_func_v65i16(<65 x i16> %arg0) #0 { ; CHECK-LABEL: name: void_func_v65i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1246,9 +1366,11 @@ define void @void_func_v65i16(<65 x i16> %arg0) #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<65 x s16>), [[UV1:%[0-9]+]]:_(<65 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>) + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[UV]](<65 x s16>), [[DEF1]](p1) :: (store (<65 x s16>) into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY32]] store <65 x i16> %arg0, <65 x i16> addrspace(1)* undef ret void } @@ -1256,14 +1378,16 @@ define void @void_func_v65i16(<65 x i16> %arg0) #0 { define void @void_func_v2f32(<2 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] store <2 x float> %arg0, <2 x float> addrspace(1)* undef ret void } @@ -1271,15 +1395,17 @@ define void @void_func_v2f32(<2 x float> %arg0) #0 { define void @void_func_v3f32(<3 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] store <3 x float> %arg0, <3 x float> addrspace(1)* undef ret void } @@ -1287,16 +1413,18 @@ define void @void_func_v3f32(<3 x float> %arg0) #0 { define void @void_func_v4f32(<4 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]] store <4 x float> %arg0, <4 x float> addrspace(1)* undef ret void } @@ -1304,7 +1432,7 @@ define void @void_func_v4f32(<4 x float> %arg0) #0 { define void @void_func_v8f32(<8 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1315,9 +1443,11 @@ define void @void_func_v8f32(<8 x float> %arg0) #0 { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY9]] store <8 x float> %arg0, <8 x float> addrspace(1)* undef ret void } @@ -1325,7 +1455,7 @@ define void @void_func_v8f32(<8 x float> %arg0) #0 { define void @void_func_v16f32(<16 x float> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1344,9 +1474,11 @@ define void @void_func_v16f32(<16 x float> %arg0) #0 { ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY17]] store <16 x float> %arg0, <16 x float> addrspace(1)* undef ret void } @@ -1354,7 +1486,7 @@ define void @void_func_v16f32(<16 x float> %arg0) #0 { define void @void_func_v2f64(<2 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1363,9 +1495,11 @@ define void @void_func_v2f64(<2 x double> %arg0) #0 { ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]] store <2 x double> %arg0, <2 x double> addrspace(1)* undef ret void } @@ -1373,7 +1507,7 @@ define void @void_func_v2f64(<2 x double> %arg0) #0 { define void @void_func_v3f64(<3 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1385,9 +1519,11 @@ define void @void_func_v3f64(<3 x double> %arg0) #0 { ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x double> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY7]] store <3 x double> %arg0, <3 x double> addrspace(1)* undef ret void } @@ -1395,7 +1531,7 @@ define void @void_func_v3f64(<3 x double> %arg0) #0 { define void @void_func_v4f64(<4 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1410,9 +1546,11 @@ define void @void_func_v4f64(<4 x double> %arg0) #0 { ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY9]] store <4 x double> %arg0, <4 x double> addrspace(1)* undef ret void } @@ -1420,7 +1558,7 @@ define void @void_func_v4f64(<4 x double> %arg0) #0 { define void @void_func_v8f64(<8 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1447,9 +1585,11 @@ define void @void_func_v8f64(<8 x double> %arg0) #0 { ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY17]] store <8 x double> %arg0, <8 x double> addrspace(1)* undef ret void } @@ -1457,7 +1597,7 @@ define void @void_func_v8f64(<8 x double> %arg0) #0 { define void @void_func_v16f64(<16 x double> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1509,9 +1649,11 @@ define void @void_func_v16f64(<16 x double> %arg0) #0 { ; CHECK-NEXT: [[MV14:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY28]](s32), [[COPY29]](s32) ; CHECK-NEXT: [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[LOAD]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64) + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY32]] store <16 x double> %arg0, <16 x double> addrspace(1)* undef ret void } @@ -1519,12 +1661,14 @@ define void @void_func_v16f64(<16 x double> %arg0) #0 { define void @void_func_v2f16(<2 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v2f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] store <2 x half> %arg0, <2 x half> addrspace(1)* undef ret void } @@ -1532,16 +1676,18 @@ define void @void_func_v2f16(<2 x half> %arg0) #0 { define void @void_func_v3f16(<3 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v3f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] store <3 x half> %arg0, <3 x half> addrspace(1)* undef ret void } @@ -1549,14 +1695,16 @@ define void @void_func_v3f16(<3 x half> %arg0) #0 { define void @void_func_v4f16(<4 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v4f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] store <4 x half> %arg0, <4 x half> addrspace(1)* undef ret void } @@ -1564,16 +1712,18 @@ define void @void_func_v4f16(<4 x half> %arg0) #0 { define void @void_func_v8f16(<8 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v8f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]] store <8 x half> %arg0, <8 x half> addrspace(1)* undef ret void } @@ -1581,7 +1731,7 @@ define void @void_func_v8f16(<8 x half> %arg0) #0 { define void @void_func_v16f16(<16 x half> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1592,9 +1742,11 @@ define void @void_func_v16f16(<16 x half> %arg0) #0 { ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr6 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7 ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY9]] store <16 x half> %arg0, <16 x half> addrspace(1)* undef ret void } @@ -1603,19 +1755,21 @@ define void @void_func_v16f16(<16 x half> %arg0) #0 { define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 { ; CHECK-LABEL: name: void_func_i32_i64_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[MV]](s64), [[COPY4]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](s64), [[COPY5]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY6]] store volatile i32 %arg0, i32 addrspace(1)* undef store volatile i64 %arg1, i64 addrspace(1)* undef store volatile i32 %arg2, i32 addrspace(1)* undef @@ -1625,12 +1779,14 @@ define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 { define void @void_func_struct_i32({ i32 } %arg0) #0 { ; CHECK-LABEL: name: void_func_struct_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `{ i32 } addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] store { i32 } %arg0, { i32 } addrspace(1)* undef ret void } @@ -1638,18 +1794,20 @@ define void @void_func_struct_i32({ i32 } %arg0) #0 { define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 { ; CHECK-LABEL: name: void_func_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) ; CHECK-NEXT: G_STORE [[COPY1]](s32), [[PTR_ADD]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] store { i8, i32 } %arg0, { i8, i32 } addrspace(1)* undef ret void } @@ -1657,8 +1815,11 @@ define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 { define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0) #0 { ; CHECK-LABEL: name: void_func_byval_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 @@ -1668,7 +1829,8 @@ define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] %arg0.load = load { i8, i32 }, { i8, i32 } addrspace(5)* %arg0 store { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef ret void @@ -1677,13 +1839,14 @@ define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0, { i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg1, i32 %arg2) #0 { ; CHECK-LABEL: name: void_func_byval_struct_i8_i32_x2 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (volatile dereferenceable load (s8) from %ir.arg0, align 4, addrspace 5) @@ -1701,7 +1864,8 @@ define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval({ ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) ; CHECK-NEXT: G_STORE [[LOAD3]](s32), [[PTR_ADD3]](p1) :: (volatile store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) ; CHECK-NEXT: G_STORE [[COPY2]](s32), [[DEF1]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] %arg0.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg0 %arg1.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg1 store volatile { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef @@ -1713,17 +1877,21 @@ define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval({ define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval(i32) %arg0, i64 addrspace(5)* byval(i64) %arg1) #0 { ; CHECK-LABEL: name: void_func_byval_i32_byval_i64 ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s64) from %ir.arg1, addrspace 5) ; CHECK-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](s64), [[COPY2]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: G_STORE [[LOAD1]](s64), [[COPY3]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] %arg0.load = load i32, i32 addrspace(5)* %arg0 %arg1.load = load i64, i64 addrspace(5)* %arg1 store i32 %arg0.load, i32 addrspace(1)* undef @@ -1734,17 +1902,21 @@ define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval(i32) %arg0, i define void @void_func_byval_i8_align32_i16_align64(i8 addrspace(5)* byval(i8) %arg0, i16 addrspace(5)* byval(i16) align 64 %arg1) #0 { ; CHECK-LABEL: name: void_func_byval_i8_align32_i16_align64 ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p1) = COPY [[C]](p1) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, addrspace 5) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5) ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[C]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](s16), [[COPY2]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: G_STORE [[LOAD1]](s16), [[COPY3]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] %arg0.load = load i8, i8 addrspace(5)* %arg0 %arg1.load = load i16, i16 addrspace(5)* %arg1 store i8 %arg0.load, i8 addrspace(1)* null @@ -1756,12 +1928,15 @@ define void @void_func_byval_i8_align32_i16_align64(i8 addrspace(5)* byval(i8) % define void @byval_a3i32_align128_byval_i16_align64([3 x i32] addrspace(5)* byval([3 x i32]) align 128 %arg0, i16 addrspace(5)* byval(i16) align 64 %arg1) #0 { ; CHECK-LABEL: name: byval_a3i32_align128_byval_i16_align64 ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p1) = COPY [[C]](p1) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) @@ -1777,8 +1952,9 @@ define void @byval_a3i32_align128_byval_i16_align64([3 x i32] addrspace(5)* byva ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[C]], [[C4]](s64) ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[PTR_ADD3]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null` + 8, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[COPY2]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[COPY3]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] %arg0.load = load [3 x i32], [3 x i32] addrspace(5)* %arg0 %arg1.load = load i16, i16 addrspace(5)* %arg1 store [3 x i32] %arg0.load, [3 x i32] addrspace(1)* null @@ -1790,7 +1966,7 @@ define void @byval_a3i32_align128_byval_i16_align64([3 x i32] addrspace(5)* byva define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, i8 addrspace(5)* byval(i8) align 8 %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_i32_byval_i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1830,12 +2006,14 @@ define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, i8 addrs ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX2]](p5) + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[C]](p1) ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg2, addrspace 5) - ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[COPY32]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[COPY33]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY34]] store i32 %arg1, i32 addrspace(1)* null %arg2.load = load i8, i8 addrspace(5)* %arg2 store i8 %arg2.load, i8 addrspace(1)* null @@ -1846,7 +2024,7 @@ define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, i8 addrs define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, i8 addrspace(5)* byval(i8) %arg1, i32 %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_byval_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1886,12 +2064,14 @@ define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, i8 addrspace(5)* by ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5) + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[C]](p1) ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1) ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg1, addrspace 5) - ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[COPY32]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: G_STORE [[LOAD2]](s8), [[COPY33]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY32]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY34]] store i32 %arg2, i32 addrspace(1)* null %arg1.load = load i8, i8 addrspace(5)* %arg1 store i8 %arg1.load, i8 addrspace(1)* null @@ -1901,7 +2081,7 @@ define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, i8 addrspace(5)* by define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_i32_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1944,13 +2124,15 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 ; CHECK-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5) ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32) + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[COPY31]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[MV]](s64), [[COPY32]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[COPY32]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](s64), [[COPY33]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY34]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile i32 %arg1, i32 addrspace(1)* undef store volatile i64 %arg2, i64 addrspace(1)* undef @@ -1961,7 +2143,7 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4) #0 { ; CHECK-LABEL: name: void_func_v32i32_i1_i8_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2007,17 +2189,19 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1 ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s16) from %fixed-stack.1, align 4, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s16) from %fixed-stack.0, align 16, addrspace 5) + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY35:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[TRUNC]](s1), [[COPY31]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[COPY32]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[COPY33]](p1) :: (volatile store (s16) into `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD4]](s16), [[COPY34]](p1) :: (volatile store (s16) into `half addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: G_STORE [[TRUNC]](s1), [[COPY32]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[COPY33]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[COPY34]](p1) :: (volatile store (s16) into `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD4]](s16), [[COPY35]](p1) :: (volatile store (s16) into `half addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[COPY36:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY36]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile i1 %arg1, i1 addrspace(1)* undef store volatile i8 %arg2, i8 addrspace(1)* undef @@ -2029,7 +2213,7 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1 define void @void_func_v32i32_p3_p5_i16(<32 x i32> %arg0, i8 addrspace(3)* %arg1, i8 addrspace(5)* %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_p3_p5_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2069,13 +2253,15 @@ define void @void_func_v32i32_p3_p5_i16(<32 x i32> %arg0, i8 addrspace(3)* %arg1 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (p3) from %fixed-stack.1, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (p5) from %fixed-stack.0, align 8, addrspace 5) + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](p3), [[COPY31]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD2]](p5), [[COPY32]](p1) :: (volatile store (p5) into `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: G_STORE [[LOAD1]](p3), [[COPY32]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD2]](p5), [[COPY33]](p1) :: (volatile store (p5) into `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY34]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile i8 addrspace(3)* %arg1, i8 addrspace(3)* addrspace(1)* undef store volatile i8 addrspace(5)* %arg2, i8 addrspace(5)* addrspace(1)* undef @@ -2085,7 +2271,7 @@ define void @void_func_v32i32_p3_p5_i16(<32 x i32> %arg0, i8 addrspace(3)* %arg1 define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v2i32_v2f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2131,13 +2317,15 @@ define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 ; CHECK-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32) + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[COPY31]](p1) :: (volatile store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x s32>), [[COPY32]](p1) :: (volatile store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[COPY32]](p1) :: (volatile store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x s32>), [[COPY33]](p1) :: (volatile store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY34]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <2 x i32> %arg1, <2 x i32> addrspace(1)* undef store volatile <2 x float> %arg2, <2 x float> addrspace(1)* undef @@ -2147,7 +2335,7 @@ define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v2i16_v2f16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2187,13 +2375,15 @@ define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, align 8, addrspace 5) + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD1]](<2 x s16>), [[COPY31]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[LOAD2]](<2 x s16>), [[COPY32]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: G_STORE [[LOAD1]](<2 x s16>), [[COPY32]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD2]](<2 x s16>), [[COPY33]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY34]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <2 x i16> %arg1, <2 x i16> addrspace(1)* undef store volatile <2 x half> %arg2, <2 x half> addrspace(1)* undef @@ -2203,7 +2393,7 @@ define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2 define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v2i64_v2f64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2261,13 +2451,15 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD5]](s32), [[LOAD6]](s32) ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD7]](s32), [[LOAD8]](s32) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[COPY31]](p1) :: (volatile store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[COPY32]](p1) :: (volatile store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[COPY32]](p1) :: (volatile store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[COPY33]](p1) :: (volatile store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY34]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <2 x i64> %arg1, <2 x i64> addrspace(1)* undef store volatile <2 x double> %arg2, <2 x double> addrspace(1)* undef @@ -2277,7 +2469,7 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v4i32_v4f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2331,13 +2523,15 @@ define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 ; CHECK-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32) + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[COPY31]](p1) :: (volatile store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[COPY32]](p1) :: (volatile store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[COPY32]](p1) :: (volatile store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[COPY33]](p1) :: (volatile store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY34]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <4 x i32> %arg1, <4 x i32> addrspace(1)* undef store volatile <4 x float> %arg2, <4 x float> addrspace(1)* undef @@ -2347,7 +2541,7 @@ define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v8i32_v8f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2417,13 +2611,15 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 ; CHECK-NEXT: [[FRAME_INDEX16:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32), [[LOAD16]](s32) + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<8 x s32>), [[COPY31]](p1) :: (volatile store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<8 x s32>), [[COPY32]](p1) :: (volatile store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<8 x s32>), [[COPY32]](p1) :: (volatile store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<8 x s32>), [[COPY33]](p1) :: (volatile store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY34]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <8 x i32> %arg1, <8 x i32> addrspace(1)* undef store volatile <8 x float> %arg2, <8 x float> addrspace(1)* undef @@ -2433,7 +2629,7 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) #0 { ; CHECK-LABEL: name: void_func_v32i32_v16i32_v16f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2535,13 +2731,15 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, ; CHECK-NEXT: [[FRAME_INDEX32:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32), [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32), [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32), [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32), [[LOAD32]](s32) + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<16 x s32>), [[COPY31]](p1) :: (volatile store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<16 x s32>), [[COPY32]](p1) :: (volatile store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<16 x s32>), [[COPY32]](p1) :: (volatile store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<16 x s32>), [[COPY33]](p1) :: (volatile store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[COPY34:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY34]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <16 x i32> %arg1, <16 x i32> addrspace(1)* undef store volatile <16 x float> %arg2, <16 x float> addrspace(1)* undef @@ -2552,26 +2750,28 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 { ; CHECK-LABEL: name: void_func_v3f32_wasted_reg ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32) ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32) ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32) ; CHECK-NEXT: G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) ; CHECK-NEXT: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) ; CHECK-NEXT: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[COPY4]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[COPY5]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY6]] %arg0.0 = extractelement <3 x float> %arg0, i32 0 %arg0.1 = extractelement <3 x float> %arg0, i32 1 %arg0.2 = extractelement <3 x float> %arg0, i32 2 @@ -2585,13 +2785,14 @@ define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 { define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 { ; CHECK-LABEL: name: void_func_v3i32_wasted_reg ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 @@ -2603,7 +2804,8 @@ define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 { ; CHECK-NEXT: G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) ; CHECK-NEXT: G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) ; CHECK-NEXT: G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]] %arg0.0 = extractelement <3 x i32> %arg0, i32 0 %arg0.1 = extractelement <3 x i32> %arg0, i32 1 %arg0.2 = extractelement <3 x i32> %arg0, i32 2 @@ -2618,7 +2820,7 @@ define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 { define void @void_func_v16i8(<16 x i8> %arg0) #0 { ; CHECK-LABEL: name: void_func_v16i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) @@ -2654,9 +2856,11 @@ define void @void_func_v16i8(<16 x i8> %arg0) #0 { ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[COPY15]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16), [[TRUNC12]](s16), [[TRUNC13]](s16), [[TRUNC14]](s16), [[TRUNC15]](s16) ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s16>) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[TRUNC16]](<16 x s8>), [[DEF]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY16]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY17]] store volatile <16 x i8> %arg0, <16 x i8> addrspace(1)* undef ret void } @@ -2665,7 +2869,7 @@ define void @void_func_v16i8(<16 x i8> %arg0) #0 { define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; CHECK-LABEL: name: void_func_v32i32_v16i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2735,11 +2939,13 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { ; CHECK-NEXT: [[LOAD16:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s16) from %fixed-stack.0, align 16, addrspace 5) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[LOAD1]](s16), [[LOAD2]](s16), [[LOAD3]](s16), [[LOAD4]](s16), [[LOAD5]](s16), [[LOAD6]](s16), [[LOAD7]](s16), [[LOAD8]](s16), [[LOAD9]](s16), [[LOAD10]](s16), [[LOAD11]](s16), [[LOAD12]](s16), [[LOAD13]](s16), [[LOAD14]](s16), [[LOAD15]](s16), [[LOAD16]](s16) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<16 x s16>) + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: G_STORE [[TRUNC]](<16 x s8>), [[COPY31]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: G_STORE [[TRUNC]](<16 x s8>), [[COPY32]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY33]] store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef store volatile <16 x i8> %arg1, <16 x i8> addrspace(1)* undef ret void @@ -2748,7 +2954,7 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 { define void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addrspace(1)*} %arg0, i8 %pad, {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1) { ; CHECK-LABEL: name: pointer_in_struct_argument ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2761,16 +2967,18 @@ define void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addrspace(1)*} %ar ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1234) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(p1) = COPY [[C]](p1) ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p1) = COPY [[C]](p1) ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p1) = COPY [[C]](p1) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p1) = COPY [[C]](p1) ; CHECK-NEXT: G_STORE [[COPY]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: G_STORE [[MV]](p1), [[COPY7]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[COPY8]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[MV]](p1), [[COPY8]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: G_STORE [[TRUNC1]](s8), [[COPY9]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1) ; CHECK-NEXT: G_STORE [[COPY4]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: G_STORE [[MV1]](p1234), [[COPY9]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: G_STORE [[MV1]](p1234), [[COPY10]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY7]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY11]] %val0 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 0 %val1 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 1 %val2 = extractvalue {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1, 0 @@ -2786,7 +2994,7 @@ define void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addrspace(1)*} %ar define void @vector_ptr_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg) { ; CHECK-LABEL: name: vector_ptr_in_struct_arg ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -2798,12 +3006,14 @@ define void @vector_ptr_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i8 addrspac ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p3) = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p3) = COPY $vgpr5 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY4]](p3), [[COPY5]](p3) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x p3>), [[PTR_ADD]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY7]] store { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg, { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll index 2e660884e7db..c3cc9ce6e54c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll @@ -5,7 +5,7 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64(<2 x i32 addrspace(1)*> %ptr, <2 x i64> %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_v2i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -21,17 +21,19 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64(<2 x i32 addrspace(1 ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY8]](<2 x p1>) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY9]](<2 x p1>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, <2 x i64> %idx ret <2 x i32 addrspace(1)*> %gep } @@ -40,7 +42,7 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64(<2 x i32 addrspace(1 define <2 x i32 addrspace(3)*> @vector_gep_v2p3_index_v2i32(<2 x i32 addrspace(3)*> %ptr, <2 x i32> %idx) { ; CHECK-LABEL: name: vector_gep_v2p3_index_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1 @@ -48,15 +50,17 @@ define <2 x i32 addrspace(3)*> @vector_gep_v2p3_index_v2i32(<2 x i32 addrspace(3 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s32>) = G_MUL [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p3>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s32>) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<2 x p3>) = COPY [[PTR_ADD]](<2 x p3>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY4]](<2 x p3>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x p3>) = COPY [[PTR_ADD]](<2 x p3>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY5]](<2 x p3>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY6]], implicit $vgpr0, implicit $vgpr1 %gep = getelementptr i32, <2 x i32 addrspace(3)*> %ptr, <2 x i32> %idx ret <2 x i32 addrspace(3)*> %gep } @@ -65,7 +69,7 @@ define <2 x i32 addrspace(3)*> @vector_gep_v2p3_index_v2i32(<2 x i32 addrspace(3 define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i32(<2 x i32 addrspace(1)*> %ptr, <2 x i32> %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -77,18 +81,20 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i32(<2 x i32 addrspace(1 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(<2 x s64>) = G_SEXT [[BUILD_VECTOR1]](<2 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[SEXT]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY6]](<2 x p1>) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY7]](<2 x p1>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY8]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, <2 x i32> %idx ret <2 x i32 addrspace(1)*> %gep } @@ -97,7 +103,7 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i32(<2 x i32 addrspace(1 define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i64(<2 x i32 addrspace(1)*> %ptr, i64 %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -109,19 +115,21 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i64(<2 x i32 addrspace(1)* ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV2]](s64) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x s64>) = COPY [[BUILD_VECTOR1]](<2 x s64>) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x s64>) = COPY [[BUILD_VECTOR1]](<2 x s64>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) - ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[COPY6]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[COPY7]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY7]](<2 x p1>) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY8]](<2 x p1>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY9]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, i64 %idx ret <2 x i32 addrspace(1)*> %gep } @@ -130,7 +138,7 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i64(<2 x i32 addrspace(1)* define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i32(<2 x i32 addrspace(1)*> %ptr, i32 %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -140,19 +148,21 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i32(<2 x i32 addrspace(1)* ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY4]](s32) ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(<2 x s64>) = G_SEXT [[BUILD_VECTOR1]](<2 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(<2 x s64>) = G_MUL [[SEXT]], [[BUILD_VECTOR2]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[MUL]](<2 x s64>) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY5]](<2 x p1>) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY6]](<2 x p1>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY5]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, i32 %idx ret <2 x i32 addrspace(1)*> %gep } @@ -161,7 +171,7 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i32(<2 x i32 addrspace(1)* define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64_constant(<2 x i32 addrspace(1)*> %ptr, <2 x i64> %idx) { ; CHECK-LABEL: name: vector_gep_v2p1_index_v2i64_constant ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -177,6 +187,7 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64_constant(<2 x i32 ad ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64) @@ -185,13 +196,14 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64_constant(<2 x i32 ad ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C3]](s64) ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR4]](<2 x s64>) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY8]](<2 x p1>) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY9]](<2 x p1>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, <2 x i64> ret <2 x i32 addrspace(1)*> %gep } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll index f0383259abb7..cc2c95324c2e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll @@ -57,17 +57,19 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(void()* %fptr) { define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(void()* %fptr) { ; CHECK-LABEL: name: test_gfx_indirect_call_sgpr_ptr ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[MV]](p0), 0, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gfx_ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return_gfx [[COPY4]] call amdgpu_gfx void %fptr() ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll index b5d9b33122ba..d31e73e91b60 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll @@ -51,12 +51,16 @@ define amdgpu_kernel void @asm_simple_agpr_clobber() { define i32 @asm_vgpr_early_clobber() { ; CHECK-LABEL: name: asm_vgpr_early_clobber ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %0, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %1, !0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7; v_mov_b32 $1, 7", 1 /* sideeffect attdialect */, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %1, 1835019 /* regdef-ec:VGPR_32 */, def early-clobber %2, !0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 call { i32, i32 } asm sideeffect "v_mov_b32 $0, 7; v_mov_b32 $1, 7", "=&v,=&v"(), !srcloc !0 %asmresult = extractvalue { i32, i32 } %1, 0 %asmresult1 = extractvalue { i32, i32 } %1, 1 @@ -67,10 +71,14 @@ define i32 @asm_vgpr_early_clobber() { define i32 @test_specific_vgpr_output() nounwind { ; CHECK-LABEL: name: test_specific_vgpr_output ; CHECK: bb.1.entry: + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: INLINEASM &"v_mov_b32 v1, 7", 0 /* attdialect */, 10 /* regdef */, implicit-def $vgpr1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 entry: %0 = tail call i32 asm "v_mov_b32 v1, 7", "={v1}"() nounwind ret i32 %0 @@ -79,10 +87,14 @@ entry: define i32 @test_single_vgpr_output() nounwind { ; CHECK-LABEL: name: test_single_vgpr_output ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 7", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 entry: %0 = tail call i32 asm "v_mov_b32 $0, 7", "=v"() nounwind ret i32 %0 @@ -91,10 +103,14 @@ entry: define i32 @test_single_sgpr_output_s32() nounwind { ; CHECK-LABEL: name: test_single_sgpr_output_s32 ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 entry: %0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind ret i32 %0 @@ -104,12 +120,16 @@ entry: define float @test_multiple_register_outputs_same() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_same ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0, 1835018 /* regdef:VGPR_32 */, def %1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_mov_b32 $1, 1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 1835018 /* regdef:VGPR_32 */, def %2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 + ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY1]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[FADD]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %1 = call { float, float } asm "v_mov_b32 $0, 0; v_mov_b32 $1, 1", "=v,=v"() %asmresult = extractvalue { float, float } %1, 0 %asmresult1 = extractvalue { float, float } %1, 1 @@ -121,13 +141,17 @@ define float @test_multiple_register_outputs_same() #0 { define double @test_multiple_register_outputs_mixed() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_mixed ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %0, 2949130 /* regdef:VReg_64 */, def %1 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY %1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 2949130 /* regdef:VReg_64 */, def %2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY %2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0, implicit $vgpr1 %1 = call { float, double } asm "v_mov_b32 $0, 0; v_add_f64 $1, 0, 0", "=v,=v"() %asmresult = extractvalue { float, double } %1, 1 ret double %asmresult @@ -137,12 +161,16 @@ define double @test_multiple_register_outputs_mixed() #0 { define float @test_vector_output() nounwind { ; CHECK-LABEL: name: test_vector_output ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: INLINEASM &"v_add_f64 $0, 0, 0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr14_vgpr15 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr14_vgpr15 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr14_vgpr15 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<2 x s32>), [[C]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 %1 = tail call <2 x float> asm sideeffect "v_add_f64 $0, 0, 0", "={v[14:15]}"() nounwind %2 = extractelement <2 x float> %1, i32 0 ret float %2 @@ -184,14 +212,16 @@ define amdgpu_kernel void @test_input_imm() { define float @test_input_vgpr(i32 %src) nounwind { ; CHECK-LABEL: name: test_input_vgpr ; CHECK: bb.1.entry: - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 1835017 /* reguse:VGPR_32 */, [[COPY1]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) + ; CHECK-NEXT: INLINEASM &"v_add_f32 $0, 1.0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %2, 1835017 /* reguse:VGPR_32 */, [[COPY2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %2 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 entry: %0 = tail call float asm "v_add_f32 $0, 1.0, $1", "=v,v"(i32 %src) nounwind ret float %0 @@ -200,13 +230,15 @@ entry: define i32 @test_memory_constraint(i32 addrspace(3)* %a) nounwind { ; CHECK-LABEL: name: test_memory_constraint ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1835018 /* regdef:VGPR_32 */, def %1, 196622 /* mem:m */, [[COPY]](p3) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: INLINEASM &"ds_read_b32 $0, $1", 8 /* mayload attdialect */, 1835018 /* regdef:VGPR_32 */, def %2, 196622 /* mem:m */, [[COPY]](p3) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %1 = tail call i32 asm "ds_read_b32 $0, $1", "=v,*m"(i32 addrspace(3)* %a) ret i32 %1 } @@ -214,16 +246,18 @@ define i32 @test_memory_constraint(i32 addrspace(3)* %a) nounwind { define i32 @test_vgpr_matching_constraint(i32 %a) nounwind { ; CHECK-LABEL: name: test_vgpr_matching_constraint ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32) - ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %3, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %3 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[AND]](s32) + ; CHECK-NEXT: INLINEASM &";", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %4 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %and = and i32 %a, 1 %asm = call i32 asm sideeffect ";", "=v,0"(i32 %and) ret i32 %asm @@ -232,16 +266,20 @@ define i32 @test_vgpr_matching_constraint(i32 %a) nounwind { define i32 @test_sgpr_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_matching_constraint ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %2 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]](s32) + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 8", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %3 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %4, 1966089 /* reguse:SReg_32 */, [[COPY2]], 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY %4 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY4]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY2]](s32) + ; CHECK-NEXT: INLINEASM &"s_add_u32 $0, $1, $2", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %5, 1966089 /* reguse:SReg_32 */, [[COPY3]], 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY %5 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY5]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY6]], implicit $vgpr0 entry: %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind %asm1 = tail call i32 asm "s_mov_b32 $0, 8", "=s"() nounwind @@ -252,23 +290,25 @@ entry: define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: name: test_many_matching_constraints ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32) - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) - ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %3, 1835018 /* regdef:VGPR_32 */, def %4, 1835018 /* regdef:VGPR_32 */, def %5, 2147483657 /* reguse tiedto:$0 */, [[COPY3]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY4]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY5]](tied-def 5) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY %3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY2]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) + ; CHECK-NEXT: INLINEASM &"; ", 1 /* sideeffect attdialect */, 1835018 /* regdef:VGPR_32 */, def %4, 1835018 /* regdef:VGPR_32 */, def %5, 1835018 /* regdef:VGPR_32 */, def %6, 2147483657 /* reguse tiedto:$0 */, [[COPY4]](tied-def 3), 2147614729 /* reguse tiedto:$2 */, [[COPY5]](tied-def 7), 2147549193 /* reguse tiedto:$1 */, [[COPY6]](tied-def 5) ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY %4 ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY %5 - ; CHECK-NEXT: G_STORE [[COPY6]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY %6 ; CHECK-NEXT: G_STORE [[COPY7]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; CHECK-NEXT: G_STORE [[COPY8]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: G_STORE [[COPY9]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY10]] %asm = call {i32, i32, i32} asm sideeffect "; ", "=v,=v,=v,0,2,1"(i32 %c, i32 %a, i32 %b) %asmresult0 = extractvalue {i32, i32, i32} %asm, 0 store i32 %asmresult0, i32 addrspace(1)* undef @@ -282,13 +322,17 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind { define i32 @test_sgpr_to_vgpr_move_matching_constraint() nounwind { ; CHECK-LABEL: name: test_sgpr_to_vgpr_move_matching_constraint ; CHECK: bb.1.entry: - ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %0 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY %0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]](s32) - ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY %2 - ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: INLINEASM &"s_mov_b32 $0, 7", 0 /* attdialect */, 1966090 /* regdef:SReg_32 */, def %1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]](s32) + ; CHECK-NEXT: INLINEASM &"v_mov_b32 $0, $1", 0 /* attdialect */, 1835018 /* regdef:VGPR_32 */, def %3, 2147483657 /* reguse tiedto:$0 */, [[COPY2]](tied-def 3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY %3 + ; CHECK-NEXT: $vgpr0 = COPY [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 entry: %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind %asm1 = tail call i32 asm "v_mov_b32 $0, $1", "=v,0"(i32 %asm0) nounwind diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll index 8e90bd593a10..ec903d346288 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll @@ -6,16 +6,18 @@ define void @test_memcpy_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memcpy_p1_p3_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] call void @llvm.memcpy.p1i8.p3i8.i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i64 256, i1 false) ret void } @@ -23,15 +25,17 @@ define void @test_memcpy_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) define void @test_memcpy_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memcpy_p1_p3_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 256, i1 false) ret void } @@ -39,16 +43,18 @@ define void @test_memcpy_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) define void @test_memcpy_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memcpy_p1_p3_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) ; CHECK-NEXT: G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] call void @llvm.memcpy.p1i8.p3i8.i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i16 256, i1 false) ret void } @@ -56,16 +62,18 @@ define void @test_memcpy_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) define void @test_memcpy_p3_p1_i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) { ; CHECK-LABEL: name: test_memcpy_p3_p1_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] call void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i64 256, i1 false) ret void } @@ -73,15 +81,17 @@ define void @test_memcpy_p3_p1_i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) define void @test_memcpy_p3_p1_i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) { ; CHECK-LABEL: name: test_memcpy_p3_p1_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 256, i1 false) ret void } @@ -89,16 +99,18 @@ define void @test_memcpy_p3_p1_i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) define void @test_memcpy_p3_p1_i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) { ; CHECK-LABEL: name: test_memcpy_p3_p1_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) ; CHECK-NEXT: G_MEMCPY [[COPY]](p3), [[MV]](p1), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] call void @llvm.memcpy.p3i8.p1i8.i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i16 256, i1 false) ret void } @@ -106,16 +118,18 @@ define void @test_memcpy_p3_p1_i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) define void @test_memmove_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memmove_p1_p3_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] call void @llvm.memmove.p1i8.p3i8.i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i64 256, i1 false) ret void } @@ -123,15 +137,17 @@ define void @test_memmove_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src define void @test_memmove_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memmove_p1_p3_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 256, i1 false) ret void } @@ -139,16 +155,18 @@ define void @test_memmove_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src define void @test_memmove_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memmove_p1_p3_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) ; CHECK-NEXT: G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] call void @llvm.memmove.p1i8.p3i8.i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i16 256, i1 false) ret void } @@ -156,16 +174,18 @@ define void @test_memmove_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src define void @test_memset_p1_i64(i8 addrspace(1)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p1_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 256, i1 false) ret void } @@ -173,17 +193,19 @@ define void @test_memset_p1_i64(i8 addrspace(1)* %dst, i8 %val) { define void @test_memset_p1_i32(i8 addrspace(1)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p1_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] call void @llvm.memset.p1i8.i32(i8 addrspace(1)* %dst, i8 %val, i32 256, i1 false) ret void } @@ -191,17 +213,19 @@ define void @test_memset_p1_i32(i8 addrspace(1)* %dst, i8 %val) { define void @test_memset_p1_i16(i8 addrspace(1)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p1_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s16) ; CHECK-NEXT: G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] call void @llvm.memset.p1i8.i16(i8 addrspace(1)* %dst, i8 %val, i16 256, i1 false) ret void } @@ -209,15 +233,17 @@ define void @test_memset_p1_i16(i8 addrspace(1)* %dst, i8 %val) { define void @test_memset_p3_i64(i8 addrspace(3)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p3_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[TRUNC1]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %dst, i8 %val, i64 256, i1 false) ret void } @@ -225,14 +251,16 @@ define void @test_memset_p3_i64(i8 addrspace(3)* %dst, i8 %val) { define void @test_memset_p3_i32(i8 addrspace(3)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p3_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] call void @llvm.memset.p3i8.i32(i8 addrspace(3)* %dst, i8 %val, i32 256, i1 false) ret void } @@ -240,15 +268,17 @@ define void @test_memset_p3_i32(i8 addrspace(3)* %dst, i8 %val) { define void @test_memset_p3_i16(i8 addrspace(3)* %dst, i8 %val) { ; CHECK-LABEL: name: test_memset_p3_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 256 ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) ; CHECK-NEXT: G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3) - ; CHECK-NEXT: SI_RETURN + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] call void @llvm.memset.p3i8.i16(i8 addrspace(3)* %dst, i8 %val, i16 256, i1 false) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll index a1a72c07cb63..99a2875e8b16 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll @@ -5,12 +5,15 @@ define i32 @reloc_constant() { ; CHECK-LABEL: name: reloc_constant ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK: [[INT0:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), !0 ; We cannot have any specific metadata check here as ConstantAsMetadata is printed as ; CHECK: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.reloc.constant), <0x{{[0-9a-f]+}}> ; CHECK: [[SUM:%[0-9]+]]:_(s32) = G_ADD [[INT0]], [[INT1]] ; CHECK: $vgpr0 = COPY [[SUM]](s32) - ; CHECK: SI_RETURN implicit $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val0 = call i32 @llvm.amdgcn.reloc.constant(metadata !0) %val1 = call i32 @llvm.amdgcn.reloc.constant(metadata i32 4) %res = add i32 %val0, %val1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll index af0daceac6c6..a88c4224a2fa 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll @@ -4,7 +4,7 @@ define i8* @ptrmask_flat_i64(i8* %ptr, i64 %mask) { ; CHECK-LABEL: name: ptrmask_flat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -12,11 +12,13 @@ define i8* @ptrmask_flat_i64(i8* %ptr, i64 %mask) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[MV1]](s64) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %ptr, i64 %mask) ret i8* %masked } @@ -24,17 +26,19 @@ define i8* @ptrmask_flat_i64(i8* %ptr, i64 %mask) { define i8* @ptrmask_flat_i32(i8* %ptr, i32 %mask) { ; CHECK-LABEL: name: ptrmask_flat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[COPY2]](s32) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0, implicit $vgpr1 %masked = call i8* @llvm.ptrmask.p0i8.i32(i8* %ptr, i32 %mask) ret i8* %masked } @@ -42,18 +46,20 @@ define i8* @ptrmask_flat_i32(i8* %ptr, i32 %mask) { define i8* @ptrmask_flat_i16(i8* %ptr, i16 %mask) { ; CHECK-LABEL: name: ptrmask_flat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s16) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0, implicit $vgpr1 %masked = call i8* @llvm.ptrmask.p0i8.i16(i8* %ptr, i16 %mask) ret i8* %masked } @@ -61,18 +67,20 @@ define i8* @ptrmask_flat_i16(i8* %ptr, i16 %mask) { define i8* @ptrmask_flat_i1(i8* %ptr, i1 %mask) { ; CHECK-LABEL: name: ptrmask_flat_i1 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s1) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0, implicit $vgpr1 %masked = call i8* @llvm.ptrmask.p0i8.i1(i8* %ptr, i1 %mask) ret i8* %masked } @@ -80,15 +88,17 @@ define i8* @ptrmask_flat_i1(i8* %ptr, i1 %mask) { define i8 addrspace(3)* @ptrmask_local_i64(i8 addrspace(3)* %ptr, i64 %mask) { ; CHECK-LABEL: name: ptrmask_local_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[MV]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)* %ptr, i64 %mask) ret i8 addrspace(3)* %masked } @@ -96,13 +106,15 @@ define i8 addrspace(3)* @ptrmask_local_i64(i8 addrspace(3)* %ptr, i64 %mask) { define i8 addrspace(3)* @ptrmask_local_i32(i8 addrspace(3)* %ptr, i32 %mask) { ; CHECK-LABEL: name: ptrmask_local_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[COPY1]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* %ptr, i32 %mask) ret i8 addrspace(3)* %masked } @@ -110,14 +122,16 @@ define i8 addrspace(3)* @ptrmask_local_i32(i8 addrspace(3)* %ptr, i32 %mask) { define i8 addrspace(3)* @ptrmask_local_i16(i8 addrspace(3)* %ptr, i16 %mask) { ; CHECK-LABEL: name: ptrmask_local_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)* %ptr, i16 %mask) ret i8 addrspace(3)* %masked } @@ -125,14 +139,16 @@ define i8 addrspace(3)* @ptrmask_local_i16(i8 addrspace(3)* %ptr, i16 %mask) { define i8 addrspace(3)* @ptrmask_local_i1(i8 addrspace(3)* %ptr, i1 %mask) { ; CHECK-LABEL: name: ptrmask_local_i1 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s1) ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i1(i8 addrspace(3)* %ptr, i1 %mask) ret i8 addrspace(3)* %masked } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll index 4faea44d1582..0db076c6b0fc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll @@ -4,16 +4,18 @@ define i16 @uaddsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: uaddsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %res = call i16 @llvm.uadd.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -22,13 +24,15 @@ declare i16 @llvm.uadd.sat.i16(i16, i16) define i32 @uaddsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: uaddsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[UADDSAT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %res = call i32 @llvm.uadd.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -37,7 +41,7 @@ declare i32 @llvm.uadd.sat.i32(i32, i32) define i64 @uaddsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: uaddsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -45,11 +49,13 @@ define i64 @uaddsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[MV]], [[MV1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.uadd.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -58,7 +64,7 @@ declare i64 @llvm.uadd.sat.i64(i64, i64) define <2 x i32> @uaddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: uaddsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -66,11 +72,13 @@ define <2 x i32> @uaddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s32>) = G_UADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UADDSAT]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } @@ -79,16 +87,18 @@ declare <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32>, <2 x i32>) define i16 @saddsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: saddsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %res = call i16 @llvm.sadd.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -97,13 +107,15 @@ declare i16 @llvm.sadd.sat.i16(i16, i16) define i32 @saddsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: saddsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[SADDSAT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %res = call i32 @llvm.sadd.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -112,7 +124,7 @@ declare i32 @llvm.sadd.sat.i32(i32, i32) define i64 @saddsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: saddsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -120,11 +132,13 @@ define i64 @saddsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[MV]], [[MV1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.sadd.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -133,7 +147,7 @@ declare i64 @llvm.sadd.sat.i64(i64, i64) define <2 x i32> @saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: saddsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -141,11 +155,13 @@ define <2 x i32> @saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s32>) = G_SADDSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SADDSAT]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } @@ -154,16 +170,18 @@ declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>) define i16 @usubsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: usubsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %res = call i16 @llvm.usub.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -172,13 +190,15 @@ declare i16 @llvm.usub.sat.i16(i16, i16) define i32 @usubsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: usubsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[USUBSAT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %res = call i32 @llvm.usub.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -187,7 +207,7 @@ declare i32 @llvm.usub.sat.i32(i32, i32) define i64 @usubsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: usubsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -195,11 +215,13 @@ define i64 @usubsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[MV]], [[MV1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.usub.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -208,7 +230,7 @@ declare i64 @llvm.usub.sat.i64(i64, i64) define <2 x i32> @usubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: usubsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -216,11 +238,13 @@ define <2 x i32> @usubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s32>) = G_USUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USUBSAT]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } @@ -229,16 +253,18 @@ declare <2 x i32> @llvm.usub.sat.v2i32(<2 x i32>, <2 x i32>) define i16 @ssubsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: ssubsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]] ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %res = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -247,13 +273,15 @@ declare i16 @llvm.ssub.sat.i16(i16, i16) define i32 @ssubsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: ssubsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[COPY]], [[COPY1]] ; CHECK-NEXT: $vgpr0 = COPY [[SSUBSAT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %res = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -262,7 +290,7 @@ declare i32 @llvm.ssub.sat.i32(i32, i32) define i64 @ssubsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: ssubsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -270,11 +298,13 @@ define i64 @ssubsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[MV]], [[MV1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.ssub.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -283,7 +313,7 @@ declare i64 @llvm.ssub.sat.i64(i64, i64) define <2 x i32> @ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: ssubsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -291,11 +321,13 @@ define <2 x i32> @ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s32>) = G_SSUBSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]] ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSUBSAT]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } @@ -304,16 +336,18 @@ declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) define i16 @ushlsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: ushlsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s16) = G_USHLSAT [[TRUNC]], [[TRUNC1]](s16) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USHLSAT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %res = call i16 @llvm.ushl.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -322,13 +356,15 @@ declare i16 @llvm.ushl.sat.i16(i16, i16) define i32 @ushlsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: ushlsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s32) = G_USHLSAT [[COPY]], [[COPY1]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[USHLSAT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %res = call i32 @llvm.ushl.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -337,7 +373,7 @@ declare i32 @llvm.ushl.sat.i32(i32, i32) define i64 @ushlsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: ushlsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -345,11 +381,13 @@ define i64 @ushlsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(s64) = G_USHLSAT [[MV]], [[MV1]](s64) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.ushl.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -358,7 +396,7 @@ declare i64 @llvm.ushl.sat.i64(i64, i64) define <2 x i32> @ushlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: ushlsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -366,11 +404,13 @@ define <2 x i32> @ushlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[USHLSAT:%[0-9]+]]:_(<2 x s32>) = G_USHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[USHLSAT]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.ushl.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } @@ -379,16 +419,18 @@ declare <2 x i32> @llvm.ushl.sat.v2i32(<2 x i32>, <2 x i32>) define i16 @sshlsat_i16(i16 %lhs, i16 %rhs) { ; CHECK-LABEL: name: sshlsat_i16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s16) = G_SSHLSAT [[TRUNC]], [[TRUNC1]](s16) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSHLSAT]](s16) ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %res = call i16 @llvm.sshl.sat.i16(i16 %lhs, i16 %rhs) ret i16 %res } @@ -397,13 +439,15 @@ declare i16 @llvm.sshl.sat.i16(i16, i16) define i32 @sshlsat_i32(i32 %lhs, i32 %rhs) { ; CHECK-LABEL: name: sshlsat_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s32) = G_SSHLSAT [[COPY]], [[COPY1]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[SSHLSAT]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %res = call i32 @llvm.sshl.sat.i32(i32 %lhs, i32 %rhs) ret i32 %res } @@ -412,7 +456,7 @@ declare i32 @llvm.sshl.sat.i32(i32, i32) define i64 @sshlsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-LABEL: name: sshlsat_i64 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -420,11 +464,13 @@ define i64 @sshlsat_i64(i64 %lhs, i64 %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(s64) = G_SSHLSAT [[MV]], [[MV1]](s64) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](s64) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 %res = call i64 @llvm.sshl.sat.i64(i64 %lhs, i64 %rhs) ret i64 %res } @@ -433,7 +479,7 @@ declare i64 @llvm.sshl.sat.i64(i64, i64) define <2 x i32> @sshlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: name: sshlsat_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -441,11 +487,13 @@ define <2 x i32> @sshlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[SSHLSAT:%[0-9]+]]:_(<2 x s32>) = G_SSHLSAT [[BUILD_VECTOR]], [[BUILD_VECTOR1]](<2 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SSHLSAT]](<2 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 %res = call <2 x i32> @llvm.sshl.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %res } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll index 34925828c2d1..d6da4fca2198 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -5,13 +5,15 @@ define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 { ; GCN-LABEL: name: i32_fastcc_i32_i32 ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $vgpr0, $vgpr1 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) - ; GCN-NEXT: SI_RETURN implicit $vgpr0 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GCN-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %add0 = add i32 %arg0, %arg1 ret i32 %add0 } @@ -19,10 +21,11 @@ define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 { define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 { ; GCN-LABEL: name: i32_fastcc_i32_i32_stack_object ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $vgpr0, $vgpr1 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -30,7 +33,8 @@ define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 { ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) - ; GCN-NEXT: SI_RETURN implicit $vgpr0 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GCN-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %alloca = alloca [16 x i32], align 4, addrspace(5) %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5 store volatile i32 9, i32 addrspace(5)* %gep @@ -41,16 +45,17 @@ define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 { define hidden fastcc i32 @sibling_call_i32_fastcc_i32_i32(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) @@ -60,11 +65,12 @@ entry: define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_stack_object ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -73,8 +79,8 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -87,11 +93,12 @@ entry: define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_callee_stack_object ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -100,8 +107,8 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_stack_object ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_stack_object, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -114,16 +121,17 @@ entry: define fastcc void @sibling_call_i32_fastcc_i32_i32_unused_result(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_unused_result ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) @@ -163,15 +171,17 @@ entry: define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32 addrspace(5)* byval(i32) align 4 %arg1) #1 { ; GCN-LABEL: name: i32_fastcc_i32_byval_i32 ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $vgpr0 + ; GCN-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s32) from %ir.arg1, addrspace 5) ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[LOAD]] ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) - ; GCN-NEXT: SI_RETURN implicit $vgpr0 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GCN-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %arg1.load = load i32, i32 addrspace(5)* %arg1, align 4 %add0 = add i32 %arg0, %arg1.load ret i32 %add0 @@ -181,27 +191,29 @@ define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32 addrspace(5)* define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, i32 addrspace(5)* byval(i32) %b.byval, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_byval_i32_byval_parent ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX]](p5) ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32 - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(p5) = COPY $sgpr32 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY4]], [[C]](s32) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GCN-NEXT: G_MEMCPY [[PTR_ADD]](p5), [[COPY1]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into stack, addrspace 5), (dereferenceable load (s32) from %ir.b.byval, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_byval_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 4, implicit-def $scc - ; GCN-NEXT: $vgpr0 = COPY [[COPY5]](s32) - ; GCN-NEXT: SI_RETURN implicit $vgpr0 + ; GCN-NEXT: $vgpr0 = COPY [[COPY6]](s32) + ; GCN-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GCN-NEXT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) %b.byval) ret i32 %ret @@ -213,7 +225,7 @@ entry: define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %large) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_byval_i32 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -250,6 +262,7 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %lar ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.2, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GCN-NEXT: [[INTTOPTR:%[0-9]+]]:_(p5) = G_INTTOPTR [[C]](s32) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32 @@ -257,8 +270,8 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %lar ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load (s32) from `i32 addrspace(5)* inttoptr (i32 16 to i32 addrspace(5)*)`, align 16, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) + ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_byval_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) inttoptr (i32 16 to i32 addrspace(5)*)) @@ -268,7 +281,7 @@ entry: define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %large) #1 { ; GCN-LABEL: name: i32_fastcc_i32_i32_a32i32 ; GCN: bb.1 (%ir-block.0): - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -307,11 +320,13 @@ define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %l ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.1, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GCN-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[LOAD1]] ; GCN-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[LOAD2]] ; GCN-NEXT: $vgpr0 = COPY [[ADD2]](s32) - ; GCN-NEXT: SI_RETURN implicit $vgpr0 + ; GCN-NEXT: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] + ; GCN-NEXT: S_SETPC_B64_return [[COPY32]], implicit $vgpr0 %val_firststack = extractvalue [32 x i32] %large, 30 %val_laststack = extractvalue [32 x i32] %large, 31 %add0 = add i32 %arg0, %arg1 @@ -323,7 +338,7 @@ define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %l define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_a32i32 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -362,6 +377,7 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 ; GCN-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN-NEXT: G_STORE [[LOAD]](s32), [[FRAME_INDEX3]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5) @@ -400,8 +416,8 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32) ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32) ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) + ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) @@ -411,7 +427,7 @@ entry: define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i32 %b, [32 x i32] %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_a32i32_stack_object ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -450,6 +466,7 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -493,8 +510,8 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32) ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32) ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) + ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -510,22 +527,23 @@ entry: define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 { ; GCN-LABEL: name: no_sibling_call_callee_more_stack_space ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 - ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(p5) = COPY $sgpr32 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(p5) = COPY $sgpr32 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY2]], [[C1]](s32) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C1]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY2]], [[C2]](s32) + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C2]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack + 4, addrspace 5) ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY2]], [[C3]](s32) + ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY3]], [[C3]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) @@ -558,13 +576,14 @@ define fastcc i32 @no_sibling_call_callee_more_stack_space(i32 %a, i32 %b) #1 { ; GCN-NEXT: $vgpr28 = COPY [[C]](s32) ; GCN-NEXT: $vgpr29 = COPY [[C]](s32) ; GCN-NEXT: $vgpr30 = COPY [[C]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32_a32i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 12, implicit-def $scc - ; GCN-NEXT: $vgpr0 = COPY [[COPY4]](s32) - ; GCN-NEXT: SI_RETURN implicit $vgpr0 + ; GCN-NEXT: $vgpr0 = COPY [[COPY5]](s32) + ; GCN-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GCN-NEXT: S_SETPC_B64_return [[COPY6]], implicit $vgpr0 entry: %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] zeroinitializer) ret i32 %ret @@ -574,26 +593,27 @@ entry: define fastcc i32 @sibling_call_i32_fastcc_i32_i32_other_call(i32 %a, i32 %b, i32 %c) #1 { ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_i32_other_call ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GCN-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY3]](<4 x s32>) + ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY4]](<4 x s32>) ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @i32_fastcc_i32_i32, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0 - ; GCN-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc ; GCN-NEXT: [[GV1:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @sibling_call_i32_fastcc_i32_i32 ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) - ; GCN-NEXT: $vgpr2 = COPY [[COPY4]](s32) - ; GCN-NEXT: [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY5]](<4 x s32>) + ; GCN-NEXT: $vgpr2 = COPY [[COPY5]](s32) + ; GCN-NEXT: [[COPY6:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV1]](p0), @sibling_call_i32_fastcc_i32_i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %other.call = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b) @@ -606,7 +626,7 @@ entry: define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c) #1 { ; GCN-LABEL: name: sibling_call_stack_objecti32_fastcc_i32_i32_a32i32 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -645,6 +665,7 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i3 ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 @@ -688,8 +709,8 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i3 ; GCN-NEXT: $vgpr28 = COPY [[COPY28]](s32) ; GCN-NEXT: $vgpr29 = COPY [[COPY29]](s32) ; GCN-NEXT: $vgpr30 = COPY [[COPY30]](s32) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) + ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -702,7 +723,7 @@ entry: define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area(i32 %a, i32 %b, [36 x i32] %c) #1 { ; GCN-LABEL: name: sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg_area ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -749,6 +770,7 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg ; GCN-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p5) :: (invariant load (s32) from %fixed-stack.4, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p5) :: (invariant load (s32) from %fixed-stack.3, align 8, addrspace 5) + ; GCN-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca @@ -793,8 +815,8 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg ; GCN-NEXT: $vgpr28 = COPY [[C1]](s32) ; GCN-NEXT: $vgpr29 = COPY [[C1]](s32) ; GCN-NEXT: $vgpr30 = COPY [[C1]](s32) - ; GCN-NEXT: [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>) + ; GCN-NEXT: [[COPY32:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY32]](<4 x s32>) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3 entry: %alloca = alloca [16 x i32], align 4, addrspace(5) @@ -809,7 +831,7 @@ declare hidden void @void_fastcc_multi_byval(i32 %a, [3 x i32] addrspace(5)* byv define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 { ; GCN-LABEL: name: sibling_call_fastcc_multi_byval ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -918,6 +940,7 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 { ; GCN-NEXT: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.3, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX33:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN-NEXT: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX33]](p5) :: (invariant load (s32) from %fixed-stack.2, addrspace 5) + ; GCN-NEXT: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GCN-NEXT: [[FRAME_INDEX34:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca0 @@ -933,14 +956,14 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 { ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX35]], [[C3]](s32) ; GCN-NEXT: G_STORE [[C1]](s64), [[PTR_ADD2]](p5) :: (store (s64) into %ir.alloca1 + 8, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_multi_byval - ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[FRAME_INDEX36:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1 ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX36]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.1, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca0, align 16, addrspace 5) @@ -948,16 +971,16 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 { ; GCN-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX37]](p5), [[FRAME_INDEX35]](p5), [[C5]](s32), 0 :: (dereferenceable store (s128) into %fixed-stack.0, addrspace 5), (dereferenceable load (s128) from %ir.alloca1, align 8, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[COPY8]](s32) - ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY47]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY39]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY40]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY41]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY42]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY43]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY44]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY45]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY46]](s32) + ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY40]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY41]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY42]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY43]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY44]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY45]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY46]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY47]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @void_fastcc_multi_byval, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %alloca0 = alloca [3 x i32], align 16, addrspace(5) @@ -974,7 +997,7 @@ declare hidden void @void_fastcc_byval_and_stack_passed([3 x i32] addrspace(5)* define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 { ; GCN-LABEL: name: sibling_call_byval_and_stack_passed ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1083,6 +1106,7 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 ; GCN-NEXT: [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.4, align 16, addrspace 5) ; GCN-NEXT: [[FRAME_INDEX33:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.3 ; GCN-NEXT: [[LOAD33:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX33]](p5) :: (invariant load (s32) from %fixed-stack.3, addrspace 5) + ; GCN-NEXT: [[COPY39:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[FRAME_INDEX34:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca @@ -1094,14 +1118,14 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX34]], [[C3]](s32) ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD1]](p5) :: (store (s32) into %ir.alloca + 8, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @void_fastcc_byval_and_stack_passed - ; GCN-NEXT: [[COPY39:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY40:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY41:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY42:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY43:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY44:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY45:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY46:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[FRAME_INDEX35:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; GCN-NEXT: G_MEMCPY [[FRAME_INDEX35]](p5), [[FRAME_INDEX34]](p5), [[C4]](s32), 0 :: (dereferenceable store (s96) into %fixed-stack.2, align 16, addrspace 5), (dereferenceable load (s96) from %ir.alloca, align 16, addrspace 5) @@ -1140,16 +1164,16 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 ; GCN-NEXT: $vgpr28 = COPY [[C1]](s32) ; GCN-NEXT: $vgpr29 = COPY [[C1]](s32) ; GCN-NEXT: $vgpr30 = COPY [[C1]](s32) - ; GCN-NEXT: [[COPY47:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY47]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY39]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY40]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY41]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY42]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY43]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY44]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY45]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY46]](s32) + ; GCN-NEXT: [[COPY48:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY48]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY40]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY41]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY42]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY43]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY44]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY45]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY46]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY47]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @void_fastcc_byval_and_stack_passed, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %alloca = alloca [3 x i32], align 16, addrspace(5) @@ -1163,7 +1187,7 @@ declare hidden fastcc i64 @i64_fastcc_i64(i64 %arg0) define hidden fastcc i64 @sibling_call_i64_fastcc_i64(i64 %a) #1 { ; GCN-LABEL: name: sibling_call_i64_fastcc_i64 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1176,28 +1200,29 @@ define hidden fastcc i64 @sibling_call_i64_fastcc_i64(i64 %a) #1 { ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i64_fastcc_i64 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i64_fastcc_i64, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc i64 @i64_fastcc_i64(i64 %a) @@ -1209,7 +1234,7 @@ declare hidden fastcc i8 addrspace(1)* @p1i8_fastcc_p1i8(i8 addrspace(1)* %arg0) define hidden fastcc i8 addrspace(1)* @sibling_call_p1i8_fastcc_p1i8(i8 addrspace(1)* %a) #1 { ; GCN-LABEL: name: sibling_call_p1i8_fastcc_p1i8 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1222,28 +1247,29 @@ define hidden fastcc i8 addrspace(1)* @sibling_call_p1i8_fastcc_p1i8(i8 addrspac ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @p1i8_fastcc_p1i8 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @p1i8_fastcc_p1i8, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc i8 addrspace(1)* @p1i8_fastcc_p1i8(i8 addrspace(1)* %a) @@ -1255,7 +1281,7 @@ declare hidden fastcc i16 @i16_fastcc_i16(i16 %arg0) define hidden fastcc i16 @sibling_call_i16_fastcc_i16(i16 %a) #1 { ; GCN-LABEL: name: sibling_call_i16_fastcc_i16 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1267,27 +1293,28 @@ define hidden fastcc i16 @sibling_call_i16_fastcc_i16(i16 %a) #1 { ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i16_fastcc_i16 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) ; GCN-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @i16_fastcc_i16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc i16 @i16_fastcc_i16(i16 %a) @@ -1299,7 +1326,7 @@ declare hidden fastcc half @f16_fastcc_f16(half %arg0) define hidden fastcc half @sibling_call_f16_fastcc_f16(half %a) #1 { ; GCN-LABEL: name: sibling_call_f16_fastcc_f16 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1311,27 +1338,28 @@ define hidden fastcc half @sibling_call_f16_fastcc_f16(half %a) #1 { ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; GCN-NEXT: [[COPY9:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @f16_fastcc_f16 - ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) ; GCN-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY13]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY15]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY16]](s32) + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @f16_fastcc_f16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc half @f16_fastcc_f16(half %a) @@ -1343,7 +1371,7 @@ declare hidden fastcc <3 x i16> @v3i16_fastcc_v3i16(<3 x i16> %arg0) define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1 { ; GCN-LABEL: name: sibling_call_v3i16_fastcc_v3i16 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1358,30 +1386,31 @@ define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1 ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[DEF]](<2 x s16>) ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v3i16_fastcc_v3i16 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF ; GCN-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<3 x s16>), [[DEF1]](<3 x s16>) ; GCN-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) ; GCN-NEXT: $vgpr0 = COPY [[UV2]](<2 x s16>) ; GCN-NEXT: $vgpr1 = COPY [[UV3]](<2 x s16>) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v3i16_fastcc_v3i16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc <3 x i16> @v3i16_fastcc_v3i16(<3 x i16> %a) @@ -1393,7 +1422,7 @@ declare hidden fastcc <4 x i16> @v4i16_fastcc_v4i16(<4 x i16> %arg0) define hidden fastcc <4 x i16> @sibling_call_v4i16_fastcc_v4i16(<4 x i16> %a) #1 { ; GCN-LABEL: name: sibling_call_v4i16_fastcc_v4i16 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1406,28 +1435,29 @@ define hidden fastcc <4 x i16> @sibling_call_v4i16_fastcc_v4i16(<4 x i16> %a) #1 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>) + ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v4i16_fastcc_v4i16 - ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) ; GCN-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY11]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY13]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY14]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY15]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY16]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY17]](s32) + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY12]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY13]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY14]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY15]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY16]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY17]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY18]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v4i16_fastcc_v4i16, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc <4 x i16> @v4i16_fastcc_v4i16(<4 x i16> %a) @@ -1439,7 +1469,7 @@ declare hidden fastcc <2 x i64> @v2i64_fastcc_v2i64(<2 x i64> %arg0) define hidden fastcc <2 x i64> @sibling_call_v2i64_fastcc_v2i64(<2 x i64> %a) #1 { ; GCN-LABEL: name: sibling_call_v2i64_fastcc_v2i64 ; GCN: bb.1.entry: - ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; GCN-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; GCN-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -1456,30 +1486,31 @@ define hidden fastcc <2 x i64> @sibling_call_v2i64_fastcc_v2i64(<2 x i64> %a) #1 ; GCN-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) ; GCN-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; GCN-NEXT: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v2i64_fastcc_v2i64 - ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; GCN-NEXT: [[COPY14:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; GCN-NEXT: [[COPY15:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) ; GCN-NEXT: $vgpr0 = COPY [[UV]](s32) ; GCN-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GCN-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GCN-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GCN-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) - ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY12]](p4) - ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY13]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY14]](p4) - ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY15]](s64) - ; GCN-NEXT: $sgpr12 = COPY [[COPY16]](s32) - ; GCN-NEXT: $sgpr13 = COPY [[COPY17]](s32) - ; GCN-NEXT: $sgpr14 = COPY [[COPY18]](s32) - ; GCN-NEXT: $vgpr31 = COPY [[COPY19]](s32) + ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) + ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY13]](p4) + ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[COPY14]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[COPY15]](p4) + ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY16]](s64) + ; GCN-NEXT: $sgpr12 = COPY [[COPY17]](s32) + ; GCN-NEXT: $sgpr13 = COPY [[COPY18]](s32) + ; GCN-NEXT: $sgpr14 = COPY [[COPY19]](s32) + ; GCN-NEXT: $vgpr31 = COPY [[COPY20]](s32) ; GCN-NEXT: SI_TCRETURN [[GV]](p0), @v2i64_fastcc_v2i64, 0, csr_amdgpu_highregs, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 entry: %ret = tail call fastcc <2 x i64> @v2i64_fastcc_v2i64(<2 x i64> %a) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll index 87b53c69f612..1147647ec514 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-tail-call.ll @@ -6,7 +6,7 @@ declare hidden void @external_void_func_void() define void @tail_call_void_func_void() { ; CHECK-LABEL: name: tail_call_void_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 + ; CHECK-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14 @@ -16,25 +16,26 @@ define void @tail_call_void_func_void() { ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @external_void_func_void - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p4) = COPY [[COPY7]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY6]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY5]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY16]](<4 x s32>) - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY8]](p4) - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY9]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY10]](p4) - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) - ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) - ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) - ; CHECK-NEXT: $sgpr14 = COPY [[COPY14]](s32) - ; CHECK-NEXT: $vgpr31 = COPY [[COPY15]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>) + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[COPY10]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[COPY11]](p4) + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY12]](s64) + ; CHECK-NEXT: $sgpr12 = COPY [[COPY13]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[COPY14]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[COPY15]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[COPY16]](s32) ; CHECK-NEXT: SI_TCRETURN [[GV]](p0), @external_void_func_void, 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31 tail call void @external_void_func_void() ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll index 815f9b1a07fe..87ef4479d4bf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll @@ -187,21 +187,25 @@ define float @ds_fmax_f32_vv(float addrspace(3)* %ptr, float %val) { ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1 + ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] - ; GFX8-MIR: SI_RETURN implicit $vgpr0 + ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1 + ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] - ; GFX9-MIR: SI_RETURN implicit $vgpr0 + ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) ret float %ret } @@ -223,21 +227,25 @@ define float @ds_fmax_f32_vv_offset(float addrspace(3)* %ptr, float %val) { ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1 + ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] - ; GFX8-MIR: SI_RETURN implicit $vgpr0 + ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1 + ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] - ; GFX9-MIR: SI_RETURN implicit $vgpr0 + ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) ret float %ret @@ -260,19 +268,23 @@ define void @ds_fmax_f32_vv_nortn(float addrspace(3)* %ptr, float %val) { ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_nortn ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1 + ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX8-MIR: SI_RETURN + ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX8-MIR: S_SETPC_B64_return [[COPY3]] ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_nortn ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1 + ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3) - ; GFX9-MIR: SI_RETURN + ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX9-MIR: S_SETPC_B64_return [[COPY3]] %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) ret void } @@ -294,19 +306,23 @@ define void @ds_fmax_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) { ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1 + ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX8-MIR: SI_RETURN + ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX8-MIR: S_SETPC_B64_return [[COPY3]] ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1 + ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3) - ; GFX9-MIR: SI_RETURN + ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX9-MIR: S_SETPC_B64_return [[COPY3]] %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) ret void @@ -329,21 +345,25 @@ define float @ds_fmax_f32_vv_volatile(float addrspace(3)* %ptr, float %val) { ; GFX9-NEXT: s_setpc_b64 s[30:31] ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_volatile ; GFX8-MIR: bb.1 (%ir-block.0): - ; GFX8-MIR: liveins: $vgpr0, $vgpr1 + ; GFX8-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; GFX8-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX8-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-MIR: $m0 = S_MOV_B32 -1 ; GFX8-MIR: [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) ; GFX8-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_]] - ; GFX8-MIR: SI_RETURN implicit $vgpr0 + ; GFX8-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX8-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_volatile ; GFX9-MIR: bb.1 (%ir-block.0): - ; GFX9-MIR: liveins: $vgpr0, $vgpr1 + ; GFX9-MIR: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; GFX9-MIR: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; GFX9-MIR: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX9-MIR: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX9-MIR: [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3) ; GFX9-MIR: $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]] - ; GFX9-MIR: SI_RETURN implicit $vgpr0 + ; GFX9-MIR: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX9-MIR: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 true) ret float %ret } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll index b4a7d4f9855a..7d604236777d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll @@ -269,22 +269,22 @@ define void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, <4 x i32> %x) ; GFX10-NEXT: v_lshrrev_b16 v6, 8, v1 ; GFX10-NEXT: ds_write_b8 v0, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX10-NEXT: v_lshrrev_b16 v7, 8, v2 -; GFX10-NEXT: v_lshrrev_b16 v8, 8, v5 +; GFX10-NEXT: v_lshrrev_b16 v8, 8, v2 +; GFX10-NEXT: v_lshrrev_b16 v7, 8, v5 ; GFX10-NEXT: ds_write_b8 v0, v2 offset:4 ; GFX10-NEXT: ds_write_b8 v0, v6 offset:1 ; GFX10-NEXT: ds_write_b8 v0, v5 offset:2 -; GFX10-NEXT: v_lshrrev_b16 v5, 8, v3 +; GFX10-NEXT: ds_write_b8 v0, v7 offset:3 ; GFX10-NEXT: v_lshrrev_b16 v2, 8, v1 -; GFX10-NEXT: ds_write_b8 v0, v8 offset:3 -; GFX10-NEXT: ds_write_b8 v0, v7 offset:5 +; GFX10-NEXT: ds_write_b8 v0, v8 offset:5 ; GFX10-NEXT: ds_write_b8 v0, v1 offset:6 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v3 +; GFX10-NEXT: v_lshrrev_b16 v5, 8, v3 ; GFX10-NEXT: ds_write_b8 v0, v2 offset:7 ; GFX10-NEXT: ds_write_b8 v0, v3 offset:8 ; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v4 -; GFX10-NEXT: ds_write_b8 v0, v5 offset:9 ; GFX10-NEXT: v_lshrrev_b16 v3, 8, v1 +; GFX10-NEXT: ds_write_b8 v0, v5 offset:9 ; GFX10-NEXT: v_lshrrev_b16 v5, 8, v4 ; GFX10-NEXT: ds_write_b8 v0, v1 offset:10 ; GFX10-NEXT: v_lshrrev_b16 v1, 8, v2 @@ -344,21 +344,21 @@ define void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, <3 x i32> %x) ; GFX10-NEXT: ds_write_b8 v0, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v2 ; GFX10-NEXT: v_lshrrev_b16 v6, 8, v2 +; GFX10-NEXT: v_lshrrev_b16 v7, 8, v4 ; GFX10-NEXT: ds_write_b8 v0, v2 offset:4 -; GFX10-NEXT: v_lshrrev_b16 v2, 8, v4 -; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v3 +; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v3 ; GFX10-NEXT: ds_write_b8 v0, v5 offset:1 ; GFX10-NEXT: ds_write_b8 v0, v4 offset:2 +; GFX10-NEXT: ds_write_b8 v0, v7 offset:3 ; GFX10-NEXT: v_lshrrev_b16 v4, 8, v1 -; GFX10-NEXT: ds_write_b8 v0, v2 offset:3 -; GFX10-NEXT: ds_write_b8 v0, v6 offset:5 -; GFX10-NEXT: v_lshrrev_b16 v2, 8, v3 +; GFX10-NEXT: v_lshrrev_b16 v5, 8, v3 ; GFX10-NEXT: ds_write_b8 v0, v1 offset:6 -; GFX10-NEXT: v_lshrrev_b16 v1, 8, v7 +; GFX10-NEXT: v_lshrrev_b16 v1, 8, v2 +; GFX10-NEXT: ds_write_b8 v0, v6 offset:5 ; GFX10-NEXT: ds_write_b8 v0, v4 offset:7 ; GFX10-NEXT: ds_write_b8 v0, v3 offset:8 -; GFX10-NEXT: ds_write_b8 v0, v2 offset:9 -; GFX10-NEXT: ds_write_b8 v0, v7 offset:10 +; GFX10-NEXT: ds_write_b8 v0, v5 offset:9 +; GFX10-NEXT: ds_write_b8 v0, v2 offset:10 ; GFX10-NEXT: ds_write_b8 v0, v1 offset:11 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir index ab05e6ef7ead..da576626e995 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-smed3.mir @@ -9,20 +9,23 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-LABEL: name: test_min_max_ValK0_K1_i32 - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 -12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMAX %0, %7 @@ -30,7 +33,8 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMIN %3, %8 $vgpr0 = COPY %5(s32) - S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 ... --- @@ -40,20 +44,23 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-LABEL: name: min_max_ValK0_K1_i32 - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 -12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMAX %7, %0 @@ -61,7 +68,8 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMIN %3, %8 $vgpr0 = COPY %5(s32) - S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 ... --- @@ -71,20 +79,23 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-LABEL: name: test_min_K1max_ValK0__i32 - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 -12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMAX %0, %7 @@ -92,7 +103,8 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMIN %8, %3 $vgpr0 = COPY %5(s32) - S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 ... --- @@ -102,20 +114,23 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-LABEL: name: test_min_K1max_K0Val__i32 - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 -12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMAX %7, %0 @@ -123,7 +138,8 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMIN %8, %3 $vgpr0 = COPY %5(s32) - S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 ... --- @@ -133,20 +149,23 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-LABEL: name: test_max_min_ValK1_K0_i32 - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMIN %0, %7 @@ -154,7 +173,8 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMAX %3, %8 $vgpr0 = COPY %5(s32) - S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 ... --- @@ -164,20 +184,23 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-LABEL: name: test_max_min_K1Val_K0_i32 - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMIN %7, %0 @@ -185,7 +208,8 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMAX %3, %8 $vgpr0 = COPY %5(s32) - S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 ... --- @@ -195,20 +219,23 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-LABEL: name: test_max_K0min_ValK1__i32 - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMIN %0, %7 @@ -216,7 +243,8 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMAX %8, %3 $vgpr0 = COPY %5(s32) - S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 ... --- @@ -226,20 +254,23 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-LABEL: name: test_max_K0min_K1Val__i32 - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMIN %7, %0 @@ -247,7 +278,8 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMAX %8, %3 $vgpr0 = COPY %5(s32) - S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 ... --- @@ -257,12 +289,13 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-LABEL: name: test_max_K0min_K1Val__v2i16 - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 @@ -272,8 +305,10 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; CHECK-NEXT: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY3]], [[SMIN]] ; CHECK-NEXT: $vgpr0 = COPY [[SMAX]](<2 x s16>) - ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 %9:sgpr(s32) = G_CONSTANT i32 17 %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %9(s32), %9(s32) %10:sgpr(s32) = G_CONSTANT i32 -12 @@ -283,7 +318,8 @@ body: | %12:vgpr(<2 x s16>) = COPY %5(<2 x s16>) %7:vgpr(<2 x s16>) = G_SMAX %12, %4 $vgpr0 = COPY %7(<2 x s16>) - S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + %8:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %8, implicit $vgpr0 ... --- @@ -325,20 +361,23 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-LABEL: name: test_non_inline_constant_i32 - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_SMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_SMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_SMED3_]](s32) - ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 -12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_SMAX %0, %7 @@ -346,5 +385,6 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_SMIN %3, %8 $vgpr0 = COPY %5(s32) - S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir index 4ecc912fc7e4..8c5d796a60e0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankcombiner-umed3.mir @@ -9,20 +9,23 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-LABEL: name: test_min_max_ValK0_K1_u32 - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMAX %0, %7 @@ -30,7 +33,8 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMIN %3, %8 $vgpr0 = COPY %5(s32) - S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 ... --- @@ -40,20 +44,23 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-LABEL: name: min_max_ValK0_K1_i32 - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMAX %7, %0 @@ -61,7 +68,8 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMIN %3, %8 $vgpr0 = COPY %5(s32) - S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 ... --- @@ -71,20 +79,23 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-LABEL: name: test_min_K1max_ValK0__u32 - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMAX %0, %7 @@ -92,7 +103,8 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMIN %8, %3 $vgpr0 = COPY %5(s32) - S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 ... --- @@ -102,20 +114,23 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-LABEL: name: test_min_K1max_K0Val__u32 - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMAX %7, %0 @@ -123,7 +138,8 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMIN %8, %3 $vgpr0 = COPY %5(s32) - S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 ... --- @@ -133,20 +149,23 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-LABEL: name: test_max_min_ValK1_K0_u32 - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMIN %0, %7 @@ -154,7 +173,8 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMAX %3, %8 $vgpr0 = COPY %5(s32) - S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 ... --- @@ -164,20 +184,23 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-LABEL: name: test_max_min_K1Val_K0_u32 - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMIN %7, %0 @@ -185,7 +208,8 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMAX %3, %8 $vgpr0 = COPY %5(s32) - S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 ... --- @@ -195,20 +219,23 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-LABEL: name: test_max_K0min_ValK1__u32 - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMIN %0, %7 @@ -216,7 +243,8 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMAX %8, %3 $vgpr0 = COPY %5(s32) - S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 ... --- @@ -226,20 +254,23 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-LABEL: name: test_max_K0min_K1Val__u32 - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY3]], [[COPY2]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 17 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMIN %7, %0 @@ -247,7 +278,8 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMAX %8, %3 $vgpr0 = COPY %5(s32) - S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 ... --- @@ -257,12 +289,13 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-LABEL: name: test_max_K0min_K1Val__v2u16 - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 17 ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[C]](s32), [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 @@ -272,8 +305,10 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; CHECK-NEXT: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY3]], [[UMIN]] ; CHECK-NEXT: $vgpr0 = COPY [[UMAX]](<2 x s16>) - ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:vgpr(<2 x s16>) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 %9:sgpr(s32) = G_CONSTANT i32 17 %2:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC %9(s32), %9(s32) %10:sgpr(s32) = G_CONSTANT i32 12 @@ -283,7 +318,8 @@ body: | %12:vgpr(<2 x s16>) = COPY %5(<2 x s16>) %7:vgpr(<2 x s16>) = G_UMAX %12, %4 $vgpr0 = COPY %7(<2 x s16>) - S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + %8:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %8, implicit $vgpr0 ... --- @@ -326,20 +362,23 @@ regBankSelected: true tracksRegLiveness: true body: | bb.1: - liveins: $vgpr0 + liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-LABEL: name: test_non_inline_constant_i32 - ; CHECK: liveins: $vgpr0 + ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32) ; CHECK-NEXT: [[AMDGPU_UMED3_:%[0-9]+]]:vgpr(s32) = G_AMDGPU_UMED3 [[COPY]], [[COPY2]], [[COPY3]] ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_UMED3_]](s32) - ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr_64 = COPY $sgpr30_sgpr31 %2:sgpr(s32) = G_CONSTANT i32 12 %7:vgpr(s32) = COPY %2(s32) %3:vgpr(s32) = G_UMAX %0, %7 @@ -347,5 +386,6 @@ body: | %8:vgpr(s32) = COPY %4(s32) %5:vgpr(s32) = G_UMIN %3, %8 $vgpr0 = COPY %5(s32) - S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0 + %6:ccr_sgpr_64 = COPY %1 + S_SETPC_B64_return %6, implicit $vgpr0 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll index 641602657005..d9385251a4dc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/roundeven.ll @@ -401,13 +401,13 @@ define <4 x half> @v_roundeven_v4f16(<4 x half> %x) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff -; GFX10-NEXT: v_rndne_f16_e32 v3, v0 +; GFX10-NEXT: v_rndne_f16_e32 v2, v0 ; GFX10-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-NEXT: v_rndne_f16_e32 v4, v1 +; GFX10-NEXT: v_rndne_f16_e32 v3, v1 +; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff ; GFX10-NEXT: v_rndne_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 -; GFX10-NEXT: v_and_or_b32 v0, v3, v2, v0 -; GFX10-NEXT: v_and_or_b32 v1, v4, v2, v1 +; GFX10-NEXT: v_and_or_b32 v0, v2, v4, v0 +; GFX10-NEXT: v_and_or_b32 v1, v3, v4, v1 ; GFX10-NEXT: s_setpc_b64 s[30:31] %roundeven = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %x) ret <4 x half> %roundeven diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll index 1389e96f592c..c118e757ef53 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll @@ -642,25 +642,25 @@ define i32 @v_saddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) { ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 ; GFX10-NEXT: s_mov_b32 s4, 8 -; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff -; GFX10-NEXT: v_lshrrev_b32_sdwa v3, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 -; GFX10-NEXT: v_lshrrev_b32_sdwa v7, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 -; GFX10-NEXT: v_and_or_b32 v0, v0, v2, v3 -; GFX10-NEXT: v_and_or_b32 v1, v1, v2, v7 ; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_or_b32 v3, v6, v2, v4 -; GFX10-NEXT: v_and_or_b32 v2, v8, v2, v5 +; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2 +; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6 +; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 +; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5 +; GFX10-NEXT: v_mov_b32_e32 v4, 24 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_mov_b32_e32 v4, 24 -; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] +; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_add_i16 v0, v0, v1 clamp -; GFX10-NEXT: v_pk_add_i16 v1, v3, v2 clamp +; GFX10-NEXT: v_pk_add_i16 v1, v2, v3 clamp ; GFX10-NEXT: v_mov_b32_e32 v2, 8 ; GFX10-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_ashrrev_i16 v1, 8, v1 op_sel_hi:[0,1] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll index 8fee6c91474c..1f8a322dbdd1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll @@ -642,25 +642,25 @@ define i32 @v_ssubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) { ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 ; GFX10-NEXT: s_mov_b32 s4, 8 -; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff -; GFX10-NEXT: v_lshrrev_b32_sdwa v3, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 -; GFX10-NEXT: v_lshrrev_b32_sdwa v7, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 -; GFX10-NEXT: v_and_or_b32 v0, v0, v2, v3 -; GFX10-NEXT: v_and_or_b32 v1, v1, v2, v7 ; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_or_b32 v3, v6, v2, v4 -; GFX10-NEXT: v_and_or_b32 v2, v8, v2, v5 +; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2 +; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6 +; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 +; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5 +; GFX10-NEXT: v_mov_b32_e32 v4, 24 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_mov_b32_e32 v4, 24 -; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] +; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_sub_i16 v0, v0, v1 clamp -; GFX10-NEXT: v_pk_sub_i16 v1, v3, v2 clamp +; GFX10-NEXT: v_pk_sub_i16 v1, v2, v3 clamp ; GFX10-NEXT: v_mov_b32_e32 v2, 8 ; GFX10-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_ashrrev_i16 v1, 8, v1 op_sel_hi:[0,1] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll index 8e0d11434692..503b45d49991 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll @@ -471,25 +471,25 @@ define i32 @v_uaddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) { ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 ; GFX10-NEXT: s_mov_b32 s4, 8 -; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff -; GFX10-NEXT: v_lshrrev_b32_sdwa v3, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 -; GFX10-NEXT: v_lshrrev_b32_sdwa v7, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 -; GFX10-NEXT: v_and_or_b32 v0, v0, v2, v3 -; GFX10-NEXT: v_and_or_b32 v1, v1, v2, v7 ; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_or_b32 v3, v6, v2, v4 -; GFX10-NEXT: v_and_or_b32 v2, v8, v2, v5 +; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2 +; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6 +; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 +; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5 +; GFX10-NEXT: v_mov_b32_e32 v4, 24 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_mov_b32_e32 v4, 24 -; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] +; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 clamp -; GFX10-NEXT: v_pk_add_u16 v1, v3, v2 clamp +; GFX10-NEXT: v_pk_add_u16 v1, v2, v3 clamp ; GFX10-NEXT: v_mov_b32_e32 v2, 8 ; GFX10-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll index 82959c8f22cb..a4da5822dac5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll @@ -459,25 +459,25 @@ define i32 @v_usubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) { ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v1 ; GFX10-NEXT: s_mov_b32 s4, 8 -; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff -; GFX10-NEXT: v_lshrrev_b32_sdwa v3, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 -; GFX10-NEXT: v_lshrrev_b32_sdwa v7, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX10-NEXT: v_lshrrev_b32_sdwa v2, s4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_sdwa v6, s4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_mov_b32_e32 v7, 0xffff ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v1 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 -; GFX10-NEXT: v_and_or_b32 v0, v0, v2, v3 -; GFX10-NEXT: v_and_or_b32 v1, v1, v2, v7 ; GFX10-NEXT: s_movk_i32 s4, 0xff -; GFX10-NEXT: v_and_or_b32 v3, v6, v2, v4 -; GFX10-NEXT: v_and_or_b32 v2, v8, v2, v5 +; GFX10-NEXT: v_and_or_b32 v0, v0, v7, v2 +; GFX10-NEXT: v_and_or_b32 v1, v1, v7, v6 +; GFX10-NEXT: v_and_or_b32 v2, v3, v7, v4 +; GFX10-NEXT: v_and_or_b32 v3, v8, v7, v5 +; GFX10-NEXT: v_mov_b32_e32 v4, 24 ; GFX10-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v1, 8, v1 op_sel_hi:[0,1] -; GFX10-NEXT: v_mov_b32_e32 v4, 24 -; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshlrev_b16 v2, 8, v2 op_sel_hi:[0,1] +; GFX10-NEXT: v_pk_lshlrev_b16 v3, 8, v3 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_sub_u16 v0, v0, v1 clamp -; GFX10-NEXT: v_pk_sub_u16 v1, v3, v2 clamp +; GFX10-NEXT: v_pk_sub_u16 v1, v2, v3 clamp ; GFX10-NEXT: v_mov_b32_e32 v2, 8 ; GFX10-NEXT: v_pk_lshrrev_b16 v0, 8, v0 op_sel_hi:[0,1] ; GFX10-NEXT: v_pk_lshrrev_b16 v1, 8, v1 op_sel_hi:[0,1] diff --git a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll index dcde1b181b0c..11084620176d 100644 --- a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll +++ b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll @@ -29,15 +29,15 @@ define void @parent_func_missing_inputs() #0 { ; FIXEDABI-NEXT: s_add_u32 s16, s16, requires_all_inputs@rel32@lo+4 ; FIXEDABI-NEXT: s_addc_u32 s17, s17, requires_all_inputs@rel32@hi+12 ; FIXEDABI-NEXT: s_swappc_b64 s[30:31], s[16:17] -; FIXEDABI-NEXT: v_readlane_b32 s31, v40, 1 -; FIXEDABI-NEXT: v_readlane_b32 s30, v40, 0 +; FIXEDABI-NEXT: v_readlane_b32 s4, v40, 0 +; FIXEDABI-NEXT: v_readlane_b32 s5, v40, 1 ; FIXEDABI-NEXT: s_addk_i32 s32, 0xfc00 ; FIXEDABI-NEXT: v_readlane_b32 s33, v40, 2 -; FIXEDABI-NEXT: s_or_saveexec_b64 s[4:5], -1 +; FIXEDABI-NEXT: s_or_saveexec_b64 s[6:7], -1 ; FIXEDABI-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; FIXEDABI-NEXT: s_mov_b64 exec, s[4:5] +; FIXEDABI-NEXT: s_mov_b64 exec, s[6:7] ; FIXEDABI-NEXT: s_waitcnt vmcnt(0) -; FIXEDABI-NEXT: s_setpc_b64 s[30:31] +; FIXEDABI-NEXT: s_setpc_b64 s[4:5] call void @requires_all_inputs() ret void } diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll index 65f5c623a4e7..546bba7146b4 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll @@ -173,9 +173,9 @@ attributes #0 = { nounwind } ; GCN-NEXT: .vgpr_count: 0x1{{$}} ; GCN-NEXT: no_stack_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GCN-NEXT: .sgpr_count: 0x24{{$}} -; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} -; GCN-NEXT: .vgpr_count: 0x3{{$}} +; GCN-NEXT: .sgpr_count: 0x26{{$}} +; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}} +; GCN-NEXT: .vgpr_count: 0x2{{$}} ; GCN-NEXT: no_stack_extern_call: ; GCN-NEXT: .lds_size: 0{{$}} ; GFX8-NEXT: .sgpr_count: 0x28{{$}} @@ -213,9 +213,9 @@ attributes #0 = { nounwind } ; GCN-NEXT: .vgpr_count: 0x2{{$}} ; GCN-NEXT: simple_stack_call: ; GCN-NEXT: .lds_size: 0{{$}} -; GCN-NEXT: .sgpr_count: 0x24{{$}} +; GCN-NEXT: .sgpr_count: 0x26{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} -; GCN-NEXT: .vgpr_count: 0x4{{$}} +; GCN-NEXT: .vgpr_count: 0x3{{$}} ; GCN-NEXT: simple_stack_extern_call: ; GCN-NEXT: .lds_size: 0{{$}} ; GFX8-NEXT: .sgpr_count: 0x28{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll index 0f86d0ee0360..8ea4dbed7866 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll @@ -903,80 +903,80 @@ define void @spill_func(i32 addrspace(1)* %arg) #0 { ; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT: s_mov_b64 exec, s[4:5] ; CHECK-NEXT: s_waitcnt expcnt(1) -; CHECK-NEXT: v_writelane_b32 v0, s30, 0 -; CHECK-NEXT: v_writelane_b32 v0, s31, 1 -; CHECK-NEXT: v_writelane_b32 v0, s33, 2 -; CHECK-NEXT: v_writelane_b32 v0, s34, 3 -; CHECK-NEXT: v_writelane_b32 v0, s35, 4 -; CHECK-NEXT: v_writelane_b32 v0, s36, 5 -; CHECK-NEXT: v_writelane_b32 v0, s37, 6 -; CHECK-NEXT: v_writelane_b32 v0, s38, 7 -; CHECK-NEXT: v_writelane_b32 v0, s39, 8 -; CHECK-NEXT: v_writelane_b32 v0, s40, 9 -; CHECK-NEXT: v_writelane_b32 v0, s41, 10 -; CHECK-NEXT: v_writelane_b32 v0, s42, 11 -; CHECK-NEXT: v_writelane_b32 v0, s43, 12 -; CHECK-NEXT: v_writelane_b32 v0, s44, 13 -; CHECK-NEXT: v_writelane_b32 v0, s45, 14 -; CHECK-NEXT: v_writelane_b32 v0, s46, 15 -; CHECK-NEXT: v_writelane_b32 v0, s47, 16 -; CHECK-NEXT: v_writelane_b32 v0, s48, 17 -; CHECK-NEXT: v_writelane_b32 v0, s49, 18 -; CHECK-NEXT: v_writelane_b32 v0, s50, 19 -; CHECK-NEXT: v_writelane_b32 v0, s51, 20 -; CHECK-NEXT: v_writelane_b32 v0, s52, 21 -; CHECK-NEXT: v_writelane_b32 v0, s53, 22 -; CHECK-NEXT: v_writelane_b32 v0, s54, 23 -; CHECK-NEXT: v_writelane_b32 v0, s55, 24 -; CHECK-NEXT: v_writelane_b32 v0, s56, 25 -; CHECK-NEXT: v_writelane_b32 v0, s57, 26 -; CHECK-NEXT: v_writelane_b32 v0, s58, 27 -; CHECK-NEXT: v_writelane_b32 v0, s59, 28 -; CHECK-NEXT: v_writelane_b32 v0, s60, 29 -; CHECK-NEXT: v_writelane_b32 v0, s61, 30 -; CHECK-NEXT: v_writelane_b32 v0, s62, 31 -; CHECK-NEXT: v_writelane_b32 v0, s63, 32 -; CHECK-NEXT: v_writelane_b32 v0, s64, 33 -; CHECK-NEXT: v_writelane_b32 v0, s65, 34 -; CHECK-NEXT: v_writelane_b32 v0, s66, 35 -; CHECK-NEXT: v_writelane_b32 v0, s67, 36 -; CHECK-NEXT: v_writelane_b32 v0, s68, 37 -; CHECK-NEXT: v_writelane_b32 v0, s69, 38 -; CHECK-NEXT: v_writelane_b32 v0, s70, 39 -; CHECK-NEXT: v_writelane_b32 v0, s71, 40 -; CHECK-NEXT: v_writelane_b32 v0, s72, 41 -; CHECK-NEXT: v_writelane_b32 v0, s73, 42 -; CHECK-NEXT: v_writelane_b32 v0, s74, 43 -; CHECK-NEXT: v_writelane_b32 v0, s75, 44 -; CHECK-NEXT: v_writelane_b32 v0, s76, 45 -; CHECK-NEXT: v_writelane_b32 v0, s77, 46 -; CHECK-NEXT: v_writelane_b32 v0, s78, 47 -; CHECK-NEXT: v_writelane_b32 v0, s79, 48 -; CHECK-NEXT: v_writelane_b32 v0, s80, 49 -; CHECK-NEXT: v_writelane_b32 v0, s81, 50 -; CHECK-NEXT: v_writelane_b32 v0, s82, 51 -; CHECK-NEXT: v_writelane_b32 v0, s83, 52 -; CHECK-NEXT: v_writelane_b32 v0, s84, 53 -; CHECK-NEXT: v_writelane_b32 v0, s85, 54 -; CHECK-NEXT: v_writelane_b32 v0, s86, 55 -; CHECK-NEXT: v_writelane_b32 v0, s87, 56 -; CHECK-NEXT: v_writelane_b32 v0, s88, 57 +; CHECK-NEXT: v_writelane_b32 v0, s33, 0 +; CHECK-NEXT: v_writelane_b32 v0, s34, 1 +; CHECK-NEXT: v_writelane_b32 v0, s35, 2 +; CHECK-NEXT: v_writelane_b32 v0, s36, 3 +; CHECK-NEXT: v_writelane_b32 v0, s37, 4 +; CHECK-NEXT: v_writelane_b32 v0, s38, 5 +; CHECK-NEXT: v_writelane_b32 v0, s39, 6 +; CHECK-NEXT: v_writelane_b32 v0, s40, 7 +; CHECK-NEXT: v_writelane_b32 v0, s41, 8 +; CHECK-NEXT: v_writelane_b32 v0, s42, 9 +; CHECK-NEXT: v_writelane_b32 v0, s43, 10 +; CHECK-NEXT: v_writelane_b32 v0, s44, 11 +; CHECK-NEXT: v_writelane_b32 v0, s45, 12 +; CHECK-NEXT: v_writelane_b32 v0, s46, 13 +; CHECK-NEXT: v_writelane_b32 v0, s47, 14 +; CHECK-NEXT: v_writelane_b32 v0, s48, 15 +; CHECK-NEXT: v_writelane_b32 v0, s49, 16 +; CHECK-NEXT: v_writelane_b32 v0, s50, 17 +; CHECK-NEXT: v_writelane_b32 v0, s51, 18 +; CHECK-NEXT: v_writelane_b32 v0, s52, 19 +; CHECK-NEXT: v_writelane_b32 v0, s53, 20 +; CHECK-NEXT: v_writelane_b32 v0, s54, 21 +; CHECK-NEXT: v_writelane_b32 v0, s55, 22 +; CHECK-NEXT: v_writelane_b32 v0, s56, 23 +; CHECK-NEXT: v_writelane_b32 v0, s57, 24 +; CHECK-NEXT: v_writelane_b32 v0, s58, 25 +; CHECK-NEXT: v_writelane_b32 v0, s59, 26 +; CHECK-NEXT: v_writelane_b32 v0, s60, 27 +; CHECK-NEXT: v_writelane_b32 v0, s61, 28 +; CHECK-NEXT: v_writelane_b32 v0, s62, 29 +; CHECK-NEXT: v_writelane_b32 v0, s63, 30 +; CHECK-NEXT: v_writelane_b32 v0, s64, 31 +; CHECK-NEXT: v_writelane_b32 v0, s65, 32 +; CHECK-NEXT: v_writelane_b32 v0, s66, 33 +; CHECK-NEXT: v_writelane_b32 v0, s67, 34 +; CHECK-NEXT: v_writelane_b32 v0, s68, 35 +; CHECK-NEXT: v_writelane_b32 v0, s69, 36 +; CHECK-NEXT: v_writelane_b32 v0, s70, 37 +; CHECK-NEXT: v_writelane_b32 v0, s71, 38 +; CHECK-NEXT: v_writelane_b32 v0, s72, 39 +; CHECK-NEXT: v_writelane_b32 v0, s73, 40 +; CHECK-NEXT: v_writelane_b32 v0, s74, 41 +; CHECK-NEXT: v_writelane_b32 v0, s75, 42 +; CHECK-NEXT: v_writelane_b32 v0, s76, 43 +; CHECK-NEXT: v_writelane_b32 v0, s77, 44 +; CHECK-NEXT: v_writelane_b32 v0, s78, 45 +; CHECK-NEXT: v_writelane_b32 v0, s79, 46 +; CHECK-NEXT: v_writelane_b32 v0, s80, 47 +; CHECK-NEXT: v_writelane_b32 v0, s81, 48 +; CHECK-NEXT: v_writelane_b32 v0, s82, 49 +; CHECK-NEXT: v_writelane_b32 v0, s83, 50 +; CHECK-NEXT: v_writelane_b32 v0, s84, 51 +; CHECK-NEXT: v_writelane_b32 v0, s85, 52 +; CHECK-NEXT: v_writelane_b32 v0, s86, 53 +; CHECK-NEXT: v_writelane_b32 v0, s87, 54 +; CHECK-NEXT: v_writelane_b32 v0, s88, 55 +; CHECK-NEXT: v_writelane_b32 v0, s89, 56 +; CHECK-NEXT: v_writelane_b32 v0, s90, 57 ; CHECK-NEXT: s_waitcnt expcnt(0) -; CHECK-NEXT: v_writelane_b32 v1, s95, 0 -; CHECK-NEXT: v_writelane_b32 v0, s89, 58 -; CHECK-NEXT: v_writelane_b32 v1, s96, 1 -; CHECK-NEXT: v_writelane_b32 v0, s90, 59 -; CHECK-NEXT: v_writelane_b32 v1, s97, 2 -; CHECK-NEXT: v_writelane_b32 v0, s91, 60 -; CHECK-NEXT: v_writelane_b32 v1, s98, 3 -; CHECK-NEXT: v_writelane_b32 v0, s92, 61 -; CHECK-NEXT: v_writelane_b32 v1, s99, 4 -; CHECK-NEXT: v_writelane_b32 v0, s93, 62 -; CHECK-NEXT: v_writelane_b32 v1, s100, 5 -; CHECK-NEXT: s_mov_b32 s31, s12 -; CHECK-NEXT: v_writelane_b32 v0, s94, 63 -; CHECK-NEXT: v_writelane_b32 v1, s101, 6 -; CHECK-NEXT: s_cmp_eq_u32 s31, 0 +; CHECK-NEXT: v_writelane_b32 v1, s97, 0 +; CHECK-NEXT: v_writelane_b32 v0, s91, 58 +; CHECK-NEXT: v_writelane_b32 v1, s98, 1 +; CHECK-NEXT: v_writelane_b32 v0, s92, 59 +; CHECK-NEXT: v_writelane_b32 v1, s99, 2 +; CHECK-NEXT: v_writelane_b32 v0, s93, 60 +; CHECK-NEXT: v_writelane_b32 v1, s100, 3 +; CHECK-NEXT: v_writelane_b32 v0, s94, 61 +; CHECK-NEXT: v_writelane_b32 v1, s101, 4 +; CHECK-NEXT: v_writelane_b32 v0, s95, 62 +; CHECK-NEXT: v_writelane_b32 v1, s30, 5 +; CHECK-NEXT: s_mov_b32 s29, s12 +; CHECK-NEXT: v_writelane_b32 v0, s96, 63 +; CHECK-NEXT: v_writelane_b32 v1, s31, 6 +; CHECK-NEXT: s_cmp_eq_u32 s29, 0 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: s_mov_b32 s0, 0 ; CHECK-NEXT: ;;#ASMEND @@ -1336,6 +1336,7 @@ define void @spill_func(i32 addrspace(1)* %arg) #0 { ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; reg use s5 ; CHECK-NEXT: ;;#ASMEND +; CHECK-NEXT: v_readlane_b32 s4, v1, 5 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; reg use s6 ; CHECK-NEXT: ;;#ASMEND @@ -1630,83 +1631,82 @@ define void @spill_func(i32 addrspace(1)* %arg) #0 { ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; reg use vcc_hi ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s101, v1, 6 -; CHECK-NEXT: v_readlane_b32 s100, v1, 5 -; CHECK-NEXT: v_readlane_b32 s99, v1, 4 -; CHECK-NEXT: v_readlane_b32 s98, v1, 3 -; CHECK-NEXT: v_readlane_b32 s97, v1, 2 -; CHECK-NEXT: v_readlane_b32 s96, v1, 1 -; CHECK-NEXT: v_readlane_b32 s95, v1, 0 -; CHECK-NEXT: v_readlane_b32 s94, v0, 63 -; CHECK-NEXT: v_readlane_b32 s93, v0, 62 -; CHECK-NEXT: v_readlane_b32 s92, v0, 61 -; CHECK-NEXT: v_readlane_b32 s91, v0, 60 -; CHECK-NEXT: v_readlane_b32 s90, v0, 59 -; CHECK-NEXT: v_readlane_b32 s89, v0, 58 -; CHECK-NEXT: v_readlane_b32 s88, v0, 57 -; CHECK-NEXT: v_readlane_b32 s87, v0, 56 -; CHECK-NEXT: v_readlane_b32 s86, v0, 55 -; CHECK-NEXT: v_readlane_b32 s85, v0, 54 -; CHECK-NEXT: v_readlane_b32 s84, v0, 53 -; CHECK-NEXT: v_readlane_b32 s83, v0, 52 -; CHECK-NEXT: v_readlane_b32 s82, v0, 51 -; CHECK-NEXT: v_readlane_b32 s81, v0, 50 -; CHECK-NEXT: v_readlane_b32 s80, v0, 49 -; CHECK-NEXT: v_readlane_b32 s79, v0, 48 -; CHECK-NEXT: v_readlane_b32 s78, v0, 47 -; CHECK-NEXT: v_readlane_b32 s77, v0, 46 -; CHECK-NEXT: v_readlane_b32 s76, v0, 45 -; CHECK-NEXT: v_readlane_b32 s75, v0, 44 -; CHECK-NEXT: v_readlane_b32 s74, v0, 43 -; CHECK-NEXT: v_readlane_b32 s73, v0, 42 -; CHECK-NEXT: v_readlane_b32 s72, v0, 41 -; CHECK-NEXT: v_readlane_b32 s71, v0, 40 -; CHECK-NEXT: v_readlane_b32 s70, v0, 39 -; CHECK-NEXT: v_readlane_b32 s69, v0, 38 -; CHECK-NEXT: v_readlane_b32 s68, v0, 37 -; CHECK-NEXT: v_readlane_b32 s67, v0, 36 -; CHECK-NEXT: v_readlane_b32 s66, v0, 35 -; CHECK-NEXT: v_readlane_b32 s65, v0, 34 -; CHECK-NEXT: v_readlane_b32 s64, v0, 33 -; CHECK-NEXT: v_readlane_b32 s63, v0, 32 -; CHECK-NEXT: v_readlane_b32 s62, v0, 31 -; CHECK-NEXT: v_readlane_b32 s61, v0, 30 -; CHECK-NEXT: v_readlane_b32 s60, v0, 29 -; CHECK-NEXT: v_readlane_b32 s59, v0, 28 -; CHECK-NEXT: v_readlane_b32 s58, v0, 27 -; CHECK-NEXT: v_readlane_b32 s57, v0, 26 -; CHECK-NEXT: v_readlane_b32 s56, v0, 25 -; CHECK-NEXT: v_readlane_b32 s55, v0, 24 -; CHECK-NEXT: v_readlane_b32 s54, v0, 23 -; CHECK-NEXT: v_readlane_b32 s53, v0, 22 -; CHECK-NEXT: v_readlane_b32 s52, v0, 21 -; CHECK-NEXT: v_readlane_b32 s51, v0, 20 -; CHECK-NEXT: v_readlane_b32 s50, v0, 19 -; CHECK-NEXT: v_readlane_b32 s49, v0, 18 -; CHECK-NEXT: v_readlane_b32 s48, v0, 17 -; CHECK-NEXT: v_readlane_b32 s47, v0, 16 -; CHECK-NEXT: v_readlane_b32 s46, v0, 15 -; CHECK-NEXT: v_readlane_b32 s45, v0, 14 -; CHECK-NEXT: v_readlane_b32 s44, v0, 13 -; CHECK-NEXT: v_readlane_b32 s43, v0, 12 -; CHECK-NEXT: v_readlane_b32 s42, v0, 11 -; CHECK-NEXT: v_readlane_b32 s41, v0, 10 -; CHECK-NEXT: v_readlane_b32 s40, v0, 9 -; CHECK-NEXT: v_readlane_b32 s39, v0, 8 -; CHECK-NEXT: v_readlane_b32 s38, v0, 7 -; CHECK-NEXT: v_readlane_b32 s37, v0, 6 -; CHECK-NEXT: v_readlane_b32 s36, v0, 5 -; CHECK-NEXT: v_readlane_b32 s35, v0, 4 -; CHECK-NEXT: v_readlane_b32 s34, v0, 3 -; CHECK-NEXT: v_readlane_b32 s33, v0, 2 -; CHECK-NEXT: v_readlane_b32 s31, v0, 1 -; CHECK-NEXT: v_readlane_b32 s30, v0, 0 -; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 +; CHECK-NEXT: v_readlane_b32 s5, v1, 6 +; CHECK-NEXT: v_readlane_b32 s101, v1, 4 +; CHECK-NEXT: v_readlane_b32 s100, v1, 3 +; CHECK-NEXT: v_readlane_b32 s99, v1, 2 +; CHECK-NEXT: v_readlane_b32 s98, v1, 1 +; CHECK-NEXT: v_readlane_b32 s97, v1, 0 +; CHECK-NEXT: v_readlane_b32 s96, v0, 63 +; CHECK-NEXT: v_readlane_b32 s95, v0, 62 +; CHECK-NEXT: v_readlane_b32 s94, v0, 61 +; CHECK-NEXT: v_readlane_b32 s93, v0, 60 +; CHECK-NEXT: v_readlane_b32 s92, v0, 59 +; CHECK-NEXT: v_readlane_b32 s91, v0, 58 +; CHECK-NEXT: v_readlane_b32 s90, v0, 57 +; CHECK-NEXT: v_readlane_b32 s89, v0, 56 +; CHECK-NEXT: v_readlane_b32 s88, v0, 55 +; CHECK-NEXT: v_readlane_b32 s87, v0, 54 +; CHECK-NEXT: v_readlane_b32 s86, v0, 53 +; CHECK-NEXT: v_readlane_b32 s85, v0, 52 +; CHECK-NEXT: v_readlane_b32 s84, v0, 51 +; CHECK-NEXT: v_readlane_b32 s83, v0, 50 +; CHECK-NEXT: v_readlane_b32 s82, v0, 49 +; CHECK-NEXT: v_readlane_b32 s81, v0, 48 +; CHECK-NEXT: v_readlane_b32 s80, v0, 47 +; CHECK-NEXT: v_readlane_b32 s79, v0, 46 +; CHECK-NEXT: v_readlane_b32 s78, v0, 45 +; CHECK-NEXT: v_readlane_b32 s77, v0, 44 +; CHECK-NEXT: v_readlane_b32 s76, v0, 43 +; CHECK-NEXT: v_readlane_b32 s75, v0, 42 +; CHECK-NEXT: v_readlane_b32 s74, v0, 41 +; CHECK-NEXT: v_readlane_b32 s73, v0, 40 +; CHECK-NEXT: v_readlane_b32 s72, v0, 39 +; CHECK-NEXT: v_readlane_b32 s71, v0, 38 +; CHECK-NEXT: v_readlane_b32 s70, v0, 37 +; CHECK-NEXT: v_readlane_b32 s69, v0, 36 +; CHECK-NEXT: v_readlane_b32 s68, v0, 35 +; CHECK-NEXT: v_readlane_b32 s67, v0, 34 +; CHECK-NEXT: v_readlane_b32 s66, v0, 33 +; CHECK-NEXT: v_readlane_b32 s65, v0, 32 +; CHECK-NEXT: v_readlane_b32 s64, v0, 31 +; CHECK-NEXT: v_readlane_b32 s63, v0, 30 +; CHECK-NEXT: v_readlane_b32 s62, v0, 29 +; CHECK-NEXT: v_readlane_b32 s61, v0, 28 +; CHECK-NEXT: v_readlane_b32 s60, v0, 27 +; CHECK-NEXT: v_readlane_b32 s59, v0, 26 +; CHECK-NEXT: v_readlane_b32 s58, v0, 25 +; CHECK-NEXT: v_readlane_b32 s57, v0, 24 +; CHECK-NEXT: v_readlane_b32 s56, v0, 23 +; CHECK-NEXT: v_readlane_b32 s55, v0, 22 +; CHECK-NEXT: v_readlane_b32 s54, v0, 21 +; CHECK-NEXT: v_readlane_b32 s53, v0, 20 +; CHECK-NEXT: v_readlane_b32 s52, v0, 19 +; CHECK-NEXT: v_readlane_b32 s51, v0, 18 +; CHECK-NEXT: v_readlane_b32 s50, v0, 17 +; CHECK-NEXT: v_readlane_b32 s49, v0, 16 +; CHECK-NEXT: v_readlane_b32 s48, v0, 15 +; CHECK-NEXT: v_readlane_b32 s47, v0, 14 +; CHECK-NEXT: v_readlane_b32 s46, v0, 13 +; CHECK-NEXT: v_readlane_b32 s45, v0, 12 +; CHECK-NEXT: v_readlane_b32 s44, v0, 11 +; CHECK-NEXT: v_readlane_b32 s43, v0, 10 +; CHECK-NEXT: v_readlane_b32 s42, v0, 9 +; CHECK-NEXT: v_readlane_b32 s41, v0, 8 +; CHECK-NEXT: v_readlane_b32 s40, v0, 7 +; CHECK-NEXT: v_readlane_b32 s39, v0, 6 +; CHECK-NEXT: v_readlane_b32 s38, v0, 5 +; CHECK-NEXT: v_readlane_b32 s37, v0, 4 +; CHECK-NEXT: v_readlane_b32 s36, v0, 3 +; CHECK-NEXT: v_readlane_b32 s35, v0, 2 +; CHECK-NEXT: v_readlane_b32 s34, v0, 1 +; CHECK-NEXT: v_readlane_b32 s33, v0, 0 +; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1 ; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload ; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; CHECK-NEXT: s_mov_b64 exec, s[4:5] +; CHECK-NEXT: s_mov_b64 exec, s[6:7] ; CHECK-NEXT: s_waitcnt vmcnt(0) -; CHECK-NEXT: s_setpc_b64 s[30:31] +; CHECK-NEXT: s_setpc_b64 s[4:5] entry: %cnd = tail call i32 @llvm.amdgcn.workgroup.id.x() #0 %sgpr0 = tail call i32 asm sideeffect "s_mov_b32 s0, 0", "={s0}"() #0 diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll index 1a9ec6c31555..e91d62c4c3f2 100644 --- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll +++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll @@ -17,10 +17,9 @@ define void @use_vcc() #1 { ; GCN: v_writelane_b32 v40, s30, 0 ; GCN: v_writelane_b32 v40, s31, 1 ; GCN: s_swappc_b64 -; GCN: v_readlane_b32 s31, v40, 1 -; GCN: v_readlane_b32 s30, v40, 0 +; GCN: v_readlane_b32 s4, v40, 0 +; GCN: v_readlane_b32 s5, v40, 1 ; GCN: v_readlane_b32 s33, v40, 2 -; GCN: s_setpc_b64 s[30:31] ; GCN: ; NumSgprs: 36 ; GCN: ; NumVgprs: 41 define void @indirect_use_vcc() #1 { diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll index f3c875bd5803..82d879d76931 100644 --- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll @@ -25,26 +25,26 @@ define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_ ; MUBUF: buffer_store_dword ; FLATSCR: scratch_store_dword ; GCN: v_writelane_b32 v40, s33, 4 -; GCN: v_writelane_b32 v40, s30, 0 -; GCN: v_writelane_b32 v40, s31, 1 -; GCN: v_writelane_b32 v40, s34, 2 -; GCN: v_writelane_b32 v40, s35, 3 +; GCN: v_writelane_b32 v40, s34, 0 +; GCN: v_writelane_b32 v40, s35, 1 +; GCN: v_writelane_b32 v40, s30, 2 +; GCN: v_writelane_b32 v40, s31, 3 ; GCN: s_swappc_b64 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: s_swappc_b64 -; GCN: v_readlane_b32 s35, v40, 3 -; GCN: v_readlane_b32 s34, v40, 2 -; MUBUF-DAG: v_readlane_b32 s31, v40, 1 -; MUBUF-DAG: v_readlane_b32 s30, v40, 0 -; FLATSCR-DAG: v_readlane_b32 s31, v40, 1 -; FLATSCR-DAG: v_readlane_b32 s30, v40, 0 +; MUBUF-DAG: v_readlane_b32 s4, v40, 2 +; MUBUF-DAG: v_readlane_b32 s5, v40, 3 +; FLATSCR-DAG: v_readlane_b32 s0, v40, 2 +; FLATSCR-DAG: v_readlane_b32 s1, v40, 3 +; GCN: v_readlane_b32 s35, v40, 1 +; GCN: v_readlane_b32 s34, v40, 0 ; GCN: v_readlane_b32 s33, v40, 4 ; MUBUF: buffer_load_dword ; FLATSCR: scratch_load_dword -; GCN: s_setpc_b64 s[30:31] +; GCN: s_setpc_b64 define void @test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 { call void @external_void_func_void() call void asm sideeffect "", ""() #0 @@ -74,14 +74,11 @@ define void @test_func_call_external_void_funcx2() #0 { ; GCN-LABEL: {{^}}void_func_void_clobber_s30_s31: ; GCN: s_waitcnt -; GCN: v_writelane_b32 v0, s30, 0 -; GCN: v_writelane_b32 v0, s31, 1 +; GCN-NEXT: s_mov_b64 [[SAVEPC:s\[[0-9]+:[0-9]+\]]], s[30:31] ; GCN-NEXT: #ASMSTART ; GCN: ; clobber ; GCN-NEXT: #ASMEND -; GCN: v_readlane_b32 s31, v0, 1 -; GCN: v_readlane_b32 s30, v0, 0 -; GCN: s_setpc_b64 s[30:31] +; GCN-NEXT: s_setpc_b64 [[SAVEPC]] define void @void_func_void_clobber_s30_s31() #2 { call void asm sideeffect "; clobber", "~{s[30:31]}"() #0 ret void diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll index e3e5f011ccfc..e357b5adc8d4 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll @@ -102,8 +102,10 @@ define void @callee_with_stack_no_fp_elim_non_leaf() #2 { ; GCN: s_swappc_b64 -; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]] -; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]] +; MUBUF-DAG: v_readlane_b32 s5, [[CSR_VGPR]] +; MUBUF-DAG: v_readlane_b32 s4, [[CSR_VGPR]] +; FLATSCR-DAG: v_readlane_b32 s0, [[CSR_VGPR]] +; FLATSCR-DAG: v_readlane_b32 s1, [[CSR_VGPR]] ; MUBUF: s_addk_i32 s32, 0xfc00{{$}} ; FLATSCR: s_add_i32 s32, s32, -16{{$}} @@ -114,7 +116,7 @@ define void @callee_with_stack_no_fp_elim_non_leaf() #2 { ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN-NEXT: s_setpc_b64 define void @callee_with_stack_and_call() #0 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca @@ -142,8 +144,10 @@ define void @callee_with_stack_and_call() #0 { ; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1 ; GCN: s_swappc_b64 -; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]], 0 -; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]], 1 +; MUBUF-DAG: v_readlane_b32 s4, v40, 0 +; MUBUF-DAG: v_readlane_b32 s5, v40, 1 +; FLATSCR-DAG: v_readlane_b32 s0, v40, 0 +; FLATSCR-DAG: v_readlane_b32 s1, v40, 1 ; MUBUF: s_addk_i32 s32, 0xfc00 ; FLATSCR: s_add_i32 s32, s32, -16 @@ -153,7 +157,7 @@ define void @callee_with_stack_and_call() #0 { ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN-NEXT: s_setpc_b64 define void @callee_no_stack_with_call() #0 { call void @external_void_func_void() ret void @@ -389,28 +393,31 @@ define void @realign_stack_no_fp_elim() #1 { ; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] -; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2 +; GCN-NEXT: v_writelane_b32 v0, s33, 2 ; GCN-NEXT: s_mov_b32 s33, s32 -; GCN: v_writelane_b32 [[CSR_VGPR]], s30, 0 +; GCN-NEXT: v_writelane_b32 v0, s30, 0 ; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 -; MUBUF: s_addk_i32 s32, 0x300 -; FLATSCR: s_add_i32 s32, s32, 12 -; GCN: v_writelane_b32 [[CSR_VGPR]], s31, 1 +; GCN: v_writelane_b32 v0, s31, 1 ; MUBUF: buffer_store_dword [[ZERO]], off, s[0:3], s33{{$}} ; FLATSCR: scratch_store_dword off, [[ZERO]], s33{{$}} ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN: ;;#ASMSTART -; GCN: v_readlane_b32 s31, [[CSR_VGPR]], 1 -; GCN: v_readlane_b32 s30, [[CSR_VGPR]], 0 -; MUBUF: s_addk_i32 s32, 0xfd00 -; FLATSCR: s_add_i32 s32, s32, -12 -; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2 +; MUBUF: s_addk_i32 s32, 0x300 +; MUBUF-NEXT: v_readlane_b32 s4, v0, 0 +; MUBUF-NEXT: v_readlane_b32 s5, v0, 1 +; FLATSCR: s_add_i32 s32, s32, 12 +; FLATSCR-NEXT: v_readlane_b32 s0, v0, 0 +; FLATSCR-NEXT: v_readlane_b32 s1, v0, 1 +; MUBUF-NEXT: s_addk_i32 s32, 0xfd00 +; FLATSCR-NEXT: s_add_i32 s32, s32, -12 +; GCN-NEXT: v_readlane_b32 s33, v0, 2 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; MUBUF-NEXT: s_setpc_b64 s[4:5] +; FLATSCR-NEXT: s_setpc_b64 s[0:1] define void @no_unused_non_csr_sgpr_for_fp() #1 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca @@ -434,22 +441,28 @@ define void @no_unused_non_csr_sgpr_for_fp() #1 { ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2 ; GCN-NEXT: s_mov_b32 s33, s32 -; MUBUF: s_addk_i32 s32, 0x300{{$}} -; FLATSCR: s_add_i32 s32, s32, 12{{$}} +; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s30, 0 +; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1 ; MUBUF-DAG: buffer_store_dword ; FLATSCR-DAG: scratch_store_dword +; MUBUF: s_addk_i32 s32, 0x300{{$}} +; FLATSCR: s_add_i32 s32, s32, 12{{$}} +; MUBUF: v_readlane_b32 s4, [[CSR_VGPR]], 0 +; FLATSCR: v_readlane_b32 s0, [[CSR_VGPR]], 0 ; GCN: ;;#ASMSTART -; MUBUF: s_addk_i32 s32, 0xfd00{{$}} -; FLATSCR: s_add_i32 s32, s32, -12{{$}} +; MUBUF: v_readlane_b32 s5, [[CSR_VGPR]], 1 +; FLATSCR: v_readlane_b32 s1, [[CSR_VGPR]], 1 +; MUBUF-NEXT: s_addk_i32 s32, 0xfd00{{$}} +; FLATSCR-NEXT: s_add_i32 s32, s32, -12{{$}} ; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN-NEXT: s_setpc_b64 define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca @@ -481,15 +494,21 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 { ; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], [[SCRATCH_SGPR]] ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]] ; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2 +; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30, 0 ; GCN-DAG: s_mov_b32 s33, s32 +; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1 ; MUBUF-DAG: s_add_i32 s32, s32, 0x40300{{$}} ; FLATSCR-DAG: s_addk_i32 s32, 0x100c{{$}} ; MUBUF-DAG: buffer_store_dword ; FLATSCR-DAG: scratch_store_dword +; MUBUF: v_readlane_b32 s4, [[CSR_VGPR]], 0 +; FLATSCR: v_readlane_b32 s0, [[CSR_VGPR]], 0 ; GCN: ;;#ASMSTART -; MUBUF: s_add_i32 s32, s32, 0xfffbfd00{{$}} -; FLATSCR: s_addk_i32 s32, 0xeff4{{$}} +; MUBUF: v_readlane_b32 s5, [[CSR_VGPR]], 1 +; FLATSCR: v_readlane_b32 s1, [[CSR_VGPR]], 1 +; MUBUF-NEXT: s_add_i32 s32, s32, 0xfffbfd00{{$}} +; FLATSCR-NEXT: s_addk_i32 s32, 0xeff4{{$}} ; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2 ; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}} ; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40100 @@ -498,7 +517,7 @@ define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 { ; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, [[SCRATCH_SGPR]] ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN-NEXT: s_setpc_b64 define void @scratch_reg_needed_mubuf_offset([4096 x i8] addrspace(5)* byval([4096 x i8]) align 4 %arg) #1 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca @@ -530,7 +549,7 @@ define internal void @local_empty_func() #0 { ; An FP is needed, despite not needing any spills ; TODO: Ccould see callee does not use stack and omit FP. ; GCN-LABEL: {{^}}ipra_call_with_stack: -; GCN: v_writelane_b32 v0, s33, 2 +; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33 ; GCN: s_mov_b32 s33, s32 ; MUBUF: s_addk_i32 s32, 0x400 ; FLATSCR: s_add_i32 s32, s32, 16 @@ -539,7 +558,7 @@ define internal void @local_empty_func() #0 { ; GCN: s_swappc_b64 ; MUBUF: s_addk_i32 s32, 0xfc00 ; FLATSCR: s_add_i32 s32, s32, -16 -; GCN: v_readlane_b32 s33, v0, 2 +; GCN: s_mov_b32 s33, [[FP_COPY:s[0-9]+]] define void @ipra_call_with_stack() #0 { %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll index 980b0bc631af..351aabf25738 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll @@ -317,7 +317,7 @@ define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 { ; GCN-NOT: s12 ; GCN-NOT: s13 ; GCN-NOT: s14 -; GCN: v_readlane_b32 s30, v40, 0 +; GCN: v_readlane_b32 s4, v40, 0 define hidden void @func_indirect_use_workgroup_id_x() #1 { call void @use_workgroup_id_x() ret void diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll index 29fc098899ee..82dddcb6a148 100644 --- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll @@ -39,15 +39,15 @@ define float @call_split_type_used_outside_block_v2f32() #0 { ; GCN-NEXT: s_add_u32 s16, s16, func_v2f32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, func_v2f32@rel32@hi+12 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s4, v40, 0 +; GCN-NEXT: v_readlane_b32 s5, v40, 1 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN-NEXT: s_setpc_b64 s[4:5] bb0: %split.ret.type = call <2 x float> @func_v2f32() br label %bb1 @@ -73,15 +73,15 @@ define float @call_split_type_used_outside_block_v3f32() #0 { ; GCN-NEXT: s_add_u32 s16, s16, func_v3f32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, func_v3f32@rel32@hi+12 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s4, v40, 0 +; GCN-NEXT: v_readlane_b32 s5, v40, 1 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN-NEXT: s_setpc_b64 s[4:5] bb0: %split.ret.type = call <3 x float> @func_v3f32() br label %bb1 @@ -107,15 +107,15 @@ define half @call_split_type_used_outside_block_v4f16() #0 { ; GCN-NEXT: s_add_u32 s16, s16, func_v4f16@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, func_v4f16@rel32@hi+12 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s4, v40, 0 +; GCN-NEXT: v_readlane_b32 s5, v40, 1 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN-NEXT: s_setpc_b64 s[4:5] bb0: %split.ret.type = call <4 x half> @func_v4f16() br label %bb1 @@ -141,16 +141,16 @@ define { i32, half } @call_split_type_used_outside_block_struct() #0 { ; GCN-NEXT: s_add_u32 s16, s16, func_struct@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, func_struct@rel32@hi+12 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GCN-NEXT: v_readlane_b32 s4, v40, 0 ; GCN-NEXT: v_mov_b32_e32 v1, v4 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s5, v40, 1 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN-NEXT: s_setpc_b64 s[4:5] bb0: %split.ret.type = call { <4 x i32>, <4 x half> } @func_struct() br label %bb1 diff --git a/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll b/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll index cf610f9436ac..121d5d35bfe7 100644 --- a/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll +++ b/llvm/test/CodeGen/AMDGPU/fdiv-nofpexcept.ll @@ -7,29 +7,32 @@ define float @fdiv_f32(float %a, float %b) #0 { ; GCN-LABEL: name: fdiv_f32 ; GCN: bb.0.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %4:vgpr_32, %5:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %8:vgpr_32 = nofpexcept V_RCP_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY2]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %8:vgpr_32, %9:sreg_64 = nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %10:vgpr_32 = nofpexcept V_RCP_F32_e64 0, %8, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216 ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_]], 2305, implicit-def $mode, implicit $mode - ; GCN-NEXT: %12:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %6, 0, %8, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %13:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %12, 0, %8, 0, %8, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %14:vgpr_32 = nofpexcept V_MUL_F32_e64 0, %4, 0, %13, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %15:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %6, 0, %14, 0, %4, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %16:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %15, 0, %13, 0, %14, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %17:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %6, 0, %16, 0, %4, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %14:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %8, 0, %10, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %15:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %14, 0, %10, 0, %10, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %16:vgpr_32 = nofpexcept V_MUL_F32_e64 0, %6, 0, %15, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %17:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %8, 0, %16, 0, %6, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %18:vgpr_32 = nofpexcept V_FMA_F32_e64 0, killed %17, 0, %15, 0, %16, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %19:vgpr_32 = nofpexcept V_FMA_F32_e64 1, %8, 0, %18, 0, %6, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_2]], 2305, implicit-def dead $mode, implicit $mode - ; GCN-NEXT: $vcc = COPY %5 - ; GCN-NEXT: %18:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, killed %17, 0, %13, 0, %16, 0, 0, implicit $mode, implicit $vcc, implicit $exec - ; GCN-NEXT: %19:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, killed %18, 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %19 - ; GCN-NEXT: SI_RETURN implicit $vgpr0 + ; GCN-NEXT: $vcc = COPY %7 + ; GCN-NEXT: %20:vgpr_32 = nofpexcept V_DIV_FMAS_F32_e64 0, killed %19, 0, %15, 0, %18, 0, 0, implicit $mode, implicit $vcc, implicit $exec + ; GCN-NEXT: %21:vgpr_32 = nofpexcept V_DIV_FIXUP_F32_e64 0, killed %20, 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; GCN-NEXT: $vgpr0 = COPY %21 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GCN-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 entry: %fdiv = fdiv float %a, %b ret float %fdiv @@ -38,29 +41,32 @@ entry: define float @fdiv_nnan_f32(float %a, float %b) #0 { ; GCN-LABEL: name: fdiv_nnan_f32 ; GCN: bb.0.entry: - ; GCN-NEXT: liveins: $vgpr0, $vgpr1 + ; GCN-NEXT: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GCN-NEXT: %4:vgpr_32, %5:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY]], 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %8:vgpr_32 = nnan nofpexcept V_RCP_F32_e64 0, %6, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN-NEXT: %6:vgpr_32, %7:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY2]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %8:vgpr_32, %9:sreg_64 = nnan nofpexcept V_DIV_SCALE_F32_e64 0, [[COPY1]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %10:vgpr_32 = nnan nofpexcept V_RCP_F32_e64 0, %8, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216 ; GCN-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 0 ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_]], 2305, implicit-def $mode, implicit $mode - ; GCN-NEXT: %12:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %6, 0, %8, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %13:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %12, 0, %8, 0, %8, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %14:vgpr_32 = nnan nofpexcept V_MUL_F32_e64 0, %4, 0, %13, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %15:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %6, 0, %14, 0, %4, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %16:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %15, 0, %13, 0, %14, 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: %17:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %6, 0, %16, 0, %4, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %14:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %8, 0, %10, 0, killed [[S_MOV_B32_1]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %15:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %14, 0, %10, 0, %10, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %16:vgpr_32 = nnan nofpexcept V_MUL_F32_e64 0, %6, 0, %15, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %17:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %8, 0, %16, 0, %6, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %18:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 0, killed %17, 0, %15, 0, %16, 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: %19:vgpr_32 = nnan nofpexcept V_FMA_F32_e64 1, %8, 0, %18, 0, %6, 0, 0, implicit $mode, implicit $exec ; GCN-NEXT: S_SETREG_B32_mode killed [[S_MOV_B32_2]], 2305, implicit-def dead $mode, implicit $mode - ; GCN-NEXT: $vcc = COPY %5 - ; GCN-NEXT: %18:vgpr_32 = nnan nofpexcept V_DIV_FMAS_F32_e64 0, killed %17, 0, %13, 0, %16, 0, 0, implicit $mode, implicit $vcc, implicit $exec - ; GCN-NEXT: %19:vgpr_32 = nnan nofpexcept V_DIV_FIXUP_F32_e64 0, killed %18, 0, [[COPY]], 0, [[COPY1]], 0, 0, implicit $mode, implicit $exec - ; GCN-NEXT: $vgpr0 = COPY %19 - ; GCN-NEXT: SI_RETURN implicit $vgpr0 + ; GCN-NEXT: $vcc = COPY %7 + ; GCN-NEXT: %20:vgpr_32 = nnan nofpexcept V_DIV_FMAS_F32_e64 0, killed %19, 0, %15, 0, %18, 0, 0, implicit $mode, implicit $vcc, implicit $exec + ; GCN-NEXT: %21:vgpr_32 = nnan nofpexcept V_DIV_FIXUP_F32_e64 0, killed %20, 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec + ; GCN-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; GCN-NEXT: $vgpr0 = COPY %21 + ; GCN-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; GCN-NEXT: S_SETPC_B64_return [[COPY4]], implicit $vgpr0 entry: %fdiv = fdiv nnan float %a, %b ret float %fdiv diff --git a/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll b/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll index 47287f67b01e..408e00f1f37d 100644 --- a/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll +++ b/llvm/test/CodeGen/AMDGPU/fix-frame-ptr-reg-copy-livein.ll @@ -10,17 +10,17 @@ define i32 @fp_save_restore_in_temp_sgpr(%struct.Data addrspace(5)* nocapture readonly byval(%struct.Data) align 4 %arg) #0 { ; GCN-LABEL: name: fp_save_restore_in_temp_sgpr ; GCN: bb.0.begin: - ; GCN: liveins: $sgpr11 + ; GCN: liveins: $sgpr11, $sgpr30_sgpr31 ; GCN: $sgpr11 = frame-setup COPY $sgpr33 ; GCN: $sgpr33 = frame-setup COPY $sgpr32 ; GCN: bb.1.lp_end: - ; GCN: liveins: $sgpr10, $sgpr11, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 + ; GCN: liveins: $sgpr10, $sgpr11, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr30_sgpr31 ; GCN: bb.2.lp_begin: - ; GCN: liveins: $sgpr10, $sgpr11, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7 + ; GCN: liveins: $sgpr10, $sgpr11, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr30_sgpr31 ; GCN: bb.3.Flow: - ; GCN: liveins: $sgpr10, $sgpr11, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9 + ; GCN: liveins: $sgpr10, $sgpr11, $vgpr0, $vgpr1, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr30_sgpr31 ; GCN: bb.4.end: - ; GCN: liveins: $sgpr11, $vgpr0, $sgpr4_sgpr5 + ; GCN: liveins: $sgpr11, $vgpr0, $sgpr4_sgpr5, $sgpr30_sgpr31 ; GCN: $sgpr33 = frame-destroy COPY $sgpr11 begin: br label %lp_begin diff --git a/llvm/test/CodeGen/AMDGPU/fpow.ll b/llvm/test/CodeGen/AMDGPU/fpow.ll index bf783cba5ace..ea461cab629d 100644 --- a/llvm/test/CodeGen/AMDGPU/fpow.ll +++ b/llvm/test/CodeGen/AMDGPU/fpow.ll @@ -202,9 +202,9 @@ define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX8-NEXT: v_exp_f32_e32 v1, v2 +; GFX8-NEXT: v_exp_f32_e32 v2, v2 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -220,9 +220,9 @@ define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX9-NEXT: v_exp_f32_e32 v1, v2 +; GFX9-NEXT: v_exp_f32_e32 v2, v2 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -238,9 +238,9 @@ define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { ; GFX90A-NEXT: v_log_f32_e32 v0, v0 ; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 -; GFX90A-NEXT: v_exp_f32_e32 v1, v2 +; GFX90A-NEXT: v_exp_f32_e32 v2, v2 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0 -; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX90A-NEXT: s_setpc_b64 s[30:31] @@ -302,9 +302,9 @@ define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) { ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX8-NEXT: v_exp_f32_e32 v1, v2 +; GFX8-NEXT: v_exp_f32_e32 v2, v2 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -320,9 +320,9 @@ define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) { ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX9-NEXT: v_exp_f32_e32 v1, v2 +; GFX9-NEXT: v_exp_f32_e32 v2, v2 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -338,9 +338,9 @@ define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) { ; GFX90A-NEXT: v_log_f32_e32 v0, v0 ; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 -; GFX90A-NEXT: v_exp_f32_e32 v1, v2 +; GFX90A-NEXT: v_exp_f32_e32 v2, v2 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0 -; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX90A-NEXT: s_setpc_b64 s[30:31] @@ -403,9 +403,9 @@ define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) { ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX8-NEXT: v_exp_f32_e32 v1, v2 +; GFX8-NEXT: v_exp_f32_e32 v2, v2 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -421,9 +421,9 @@ define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) { ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX9-NEXT: v_exp_f32_e32 v1, v2 +; GFX9-NEXT: v_exp_f32_e32 v2, v2 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -439,9 +439,9 @@ define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) { ; GFX90A-NEXT: v_log_f32_e32 v0, v0 ; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 -; GFX90A-NEXT: v_exp_f32_e32 v1, v2 +; GFX90A-NEXT: v_exp_f32_e32 v2, v2 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0 -; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX90A-NEXT: s_setpc_b64 s[30:31] @@ -509,9 +509,9 @@ define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) { ; GFX8-NEXT: v_log_f32_e32 v0, v0 ; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX8-NEXT: v_exp_f32_e32 v1, v2 +; GFX8-NEXT: v_exp_f32_e32 v2, v2 ; GFX8-NEXT: v_exp_f32_e32 v0, v0 -; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] @@ -527,9 +527,9 @@ define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) { ; GFX9-NEXT: v_log_f32_e32 v0, v0 ; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 ; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 -; GFX9-NEXT: v_exp_f32_e32 v1, v2 +; GFX9-NEXT: v_exp_f32_e32 v2, v2 ; GFX9-NEXT: v_exp_f32_e32 v0, v0 -; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 ; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -545,9 +545,9 @@ define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) { ; GFX90A-NEXT: v_log_f32_e32 v0, v0 ; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 ; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 -; GFX90A-NEXT: v_exp_f32_e32 v1, v2 +; GFX90A-NEXT: v_exp_f32_e32 v2, v2 ; GFX90A-NEXT: v_exp_f32_e32 v0, v0 -; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 ; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 ; GFX90A-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll index 450cae8a4a38..e7121cec90c8 100644 --- a/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/frame-setup-without-sgpr-to-vgpr-spills.ll @@ -25,15 +25,15 @@ define void @callee_with_stack_and_call() #0 { ; SPILL-TO-VGPR-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; SPILL-TO-VGPR-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 ; SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5] -; SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v40, 1 -; SPILL-TO-VGPR-NEXT: v_readlane_b32 s30, v40, 0 +; SPILL-TO-VGPR-NEXT: v_readlane_b32 s4, v40, 0 +; SPILL-TO-VGPR-NEXT: v_readlane_b32 s5, v40, 1 ; SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0xfc00 ; SPILL-TO-VGPR-NEXT: v_readlane_b32 s33, v40, 2 -; SPILL-TO-VGPR-NEXT: s_or_saveexec_b64 s[4:5], -1 +; SPILL-TO-VGPR-NEXT: s_or_saveexec_b64 s[6:7], -1 ; SPILL-TO-VGPR-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] +; SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[6:7] ; SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; SPILL-TO-VGPR-NEXT: s_setpc_b64 s[30:31] +; SPILL-TO-VGPR-NEXT: s_setpc_b64 s[4:5] ; ; NO-SPILL-TO-VGPR-LABEL: callee_with_stack_and_call: ; NO-SPILL-TO-VGPR: ; %bb.0: @@ -43,21 +43,14 @@ define void @callee_with_stack_and_call() #0 { ; NO-SPILL-TO-VGPR-NEXT: s_mov_b32 s33, s32 ; NO-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0x800 ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[8:9], exec -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 3 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v1, s30, 0 +; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v1, s31, 1 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) ; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[8:9] -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[8:9], exec -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 -; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 -; NO-SPILL-TO-VGPR-NEXT: v_writelane_b32 v1, s31, 0 -; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill -; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 -; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[8:9] ; NO-SPILL-TO-VGPR-NEXT: v_mov_b32_e32 v0, 0 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) @@ -65,29 +58,21 @@ define void @callee_with_stack_and_call() #0 { ; NO-SPILL-TO-VGPR-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4 ; NO-SPILL-TO-VGPR-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+12 ; NO-SPILL-TO-VGPR-NEXT: s_swappc_b64 s[30:31], s[4:5] -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 -; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 -; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s31, v1, 0 -; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 -; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[4:5], exec -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 1 +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 s[6:7], exec +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, 3 ; NO-SPILL-TO-VGPR-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s30, v1, 0 +; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s4, v1, 0 +; NO-SPILL-TO-VGPR-NEXT: v_readlane_b32 s5, v1, 1 ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:16 ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) -; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[4:5] +; NO-SPILL-TO-VGPR-NEXT: s_mov_b64 exec, s[6:7] ; NO-SPILL-TO-VGPR-NEXT: s_addk_i32 s32, 0xf800 ; NO-SPILL-TO-VGPR-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; NO-SPILL-TO-VGPR-NEXT: s_waitcnt vmcnt(0) ; NO-SPILL-TO-VGPR-NEXT: v_readfirstlane_b32 s33, v0 -; NO-SPILL-TO-VGPR-NEXT: s_setpc_b64 s[30:31] +; NO-SPILL-TO-VGPR-NEXT: s_setpc_b64 s[4:5] %alloca = alloca i32, addrspace(5) store volatile i32 0, i32 addrspace(5)* %alloca call void @external_void_func_void() diff --git a/llvm/test/CodeGen/AMDGPU/fshr.ll b/llvm/test/CodeGen/AMDGPU/fshr.ll index 940614151e2d..5214a02b9f74 100644 --- a/llvm/test/CodeGen/AMDGPU/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/fshr.ll @@ -949,24 +949,24 @@ define <4 x i16> @v_fshr_v4i16(<4 x i16> %src0, <4 x i16> %src1, <4 x i16> %src2 ; GFX10-NEXT: v_lshrrev_b32_e32 v7, 16, v5 ; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v3 ; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v4 -; GFX10-NEXT: v_lshrrev_b32_e32 v12, 16, v0 +; GFX10-NEXT: v_lshlrev_b16 v1, 1, v1 ; GFX10-NEXT: v_lshlrev_b16 v6, 1, v6 ; GFX10-NEXT: v_xor_b32_e32 v9, -1, v7 ; GFX10-NEXT: v_lshrrev_b16 v7, v7, v8 +; GFX10-NEXT: v_lshrrev_b32_e32 v8, 16, v0 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 -; GFX10-NEXT: v_xor_b32_e32 v8, -1, v4 -; GFX10-NEXT: v_lshlrev_b16 v1, 1, v1 +; GFX10-NEXT: v_xor_b32_e32 v12, -1, v5 ; GFX10-NEXT: v_lshlrev_b16 v6, v9, v6 -; GFX10-NEXT: v_xor_b32_e32 v9, -1, v5 +; GFX10-NEXT: v_xor_b32_e32 v9, -1, v4 ; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v2 -; GFX10-NEXT: v_lshlrev_b16 v12, 1, v12 +; GFX10-NEXT: v_lshlrev_b16 v8, 1, v8 ; GFX10-NEXT: v_xor_b32_e32 v13, -1, v11 -; GFX10-NEXT: v_lshlrev_b16 v0, v8, v0 ; GFX10-NEXT: v_lshrrev_b16 v2, v4, v2 -; GFX10-NEXT: v_lshlrev_b16 v1, v9, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, v9, v0 +; GFX10-NEXT: v_lshlrev_b16 v1, v12, v1 ; GFX10-NEXT: v_lshrrev_b16 v3, v5, v3 ; GFX10-NEXT: v_lshrrev_b16 v4, v11, v10 -; GFX10-NEXT: v_lshlrev_b16 v5, v13, v12 +; GFX10-NEXT: v_lshlrev_b16 v5, v13, v8 ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX10-NEXT: v_mov_b32_e32 v2, 0xffff ; GFX10-NEXT: v_or_b32_e32 v1, v1, v3 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll index 44450ffe44ac..989e8bfe7f56 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll @@ -105,13 +105,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1@rel32@hi+12 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -132,15 +132,15 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -169,8 +169,8 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -197,14 +197,14 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1_signext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1_signext@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1_signext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1_signext@rel32@hi+12 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -226,16 +226,16 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1_signext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1_signext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1_signext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1_signext@rel32@hi+12 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -266,8 +266,8 @@ define amdgpu_gfx void @test_call_external_void_func_i1_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -295,14 +295,14 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1_zeroext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1_zeroext@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1_zeroext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1_zeroext@rel32@hi+12 ; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -324,16 +324,16 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1_zeroext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1_zeroext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1_zeroext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1_zeroext@rel32@hi+12 ; GFX10-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -364,8 +364,8 @@ define amdgpu_gfx void @test_call_external_void_func_i1_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -392,12 +392,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -418,14 +418,14 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -453,8 +453,8 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -481,12 +481,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8_signext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8_signext@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8_signext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8_signext@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -508,14 +508,14 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8_signext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8_signext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8_signext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8_signext@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -544,8 +544,8 @@ define amdgpu_gfx void @test_call_external_void_func_i8_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -573,12 +573,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8_zeroext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8_zeroext@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8_zeroext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8_zeroext@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -600,14 +600,14 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8_zeroext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8_zeroext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8_zeroext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8_zeroext@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -636,8 +636,8 @@ define amdgpu_gfx void @test_call_external_void_func_i8_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -664,12 +664,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -690,14 +690,14 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -725,8 +725,8 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -753,12 +753,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16_signext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16_signext@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16_signext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16_signext@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -780,14 +780,14 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16_signext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16_signext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16_signext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16_signext@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -816,8 +816,8 @@ define amdgpu_gfx void @test_call_external_void_func_i16_signext(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -845,12 +845,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16_zeroext@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16_zeroext@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16_zeroext@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16_zeroext@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -872,14 +872,14 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16_zeroext@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16_zeroext@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16_zeroext@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16_zeroext@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -908,8 +908,8 @@ define amdgpu_gfx void @test_call_external_void_func_i16_zeroext(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -936,12 +936,12 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -962,14 +962,14 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -997,8 +997,8 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1025,12 +1025,12 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1053,13 +1053,13 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i64@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1088,8 +1088,8 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_i64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1117,12 +1117,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1144,15 +1144,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1182,8 +1182,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1213,12 +1213,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v2, 3 ; GFX9-NEXT: v_mov_b32_e32 v3, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1243,13 +1243,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1280,8 +1280,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1311,12 +1311,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX9-NEXT: v_mov_b32_e32 v4, 1 ; GFX9-NEXT: v_mov_b32_e32 v5, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1342,13 +1342,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX10-NEXT: global_load_dwordx4 v[0:3], v[0:1], off ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i64@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1380,8 +1380,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1416,12 +1416,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX9-NEXT: v_mov_b32_e32 v6, 3 ; GFX9-NEXT: v_mov_b32_e32 v7, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1449,13 +1449,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX10-NEXT: v_mov_b32_e32 v7, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i64@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1489,8 +1489,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1518,12 +1518,12 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1544,14 +1544,14 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, 0x4400 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f16@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1579,8 +1579,8 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1606,12 +1606,12 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1632,14 +1632,14 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, 4.0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1667,8 +1667,8 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1695,12 +1695,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 1.0 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1723,13 +1723,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1758,8 +1758,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1787,12 +1787,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 ; GFX9-NEXT: v_mov_b32_e32 v2, 4.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1816,13 +1816,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1852,8 +1852,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1883,12 +1883,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v3, -1.0 ; GFX9-NEXT: v_mov_b32_e32 v4, 0.5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -1914,13 +1914,13 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v4, 0.5 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -1952,8 +1952,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5f32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -1980,12 +1980,12 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40100000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2008,13 +2008,13 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f64@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2043,8 +2043,8 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_f64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2073,12 +2073,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_mov_b32_e32 v3, 0x40100000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2103,13 +2103,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2140,8 +2140,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2f64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2172,12 +2172,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v4, 0 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x40200000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f64@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f64@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2205,12 +2205,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f64@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f64@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2243,8 +2243,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm() #0 { ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v3f64@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f64@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2270,12 +2270,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2296,14 +2296,14 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2331,8 +2331,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2359,12 +2359,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2385,14 +2385,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2420,8 +2420,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2448,12 +2448,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2474,14 +2474,14 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2509,8 +2509,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2538,12 +2538,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX9-NEXT: v_mov_b32_e32 v1, 3 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2566,13 +2566,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2601,8 +2601,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i16@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2629,12 +2629,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0x40003c00 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x4400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2657,13 +2657,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2692,8 +2692,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3f16@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2719,12 +2719,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2745,14 +2745,14 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2780,8 +2780,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2809,12 +2809,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 0x20001 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x40003 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2837,13 +2837,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2872,8 +2872,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i16@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2899,12 +2899,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f16@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f16@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -2925,14 +2925,14 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f16@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f16@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -2960,8 +2960,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -2988,12 +2988,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3014,14 +3014,14 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3049,8 +3049,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3078,12 +3078,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_mov_b32_e32 v1, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3106,13 +3106,13 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3141,8 +3141,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v2i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3170,12 +3170,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX9-NEXT: v_mov_b32_e32 v1, 4 ; GFX9-NEXT: v_mov_b32_e32 v2, 5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3199,13 +3199,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3235,8 +3235,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm(i32) #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3265,12 +3265,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX9-NEXT: v_mov_b32_e32 v2, 5 ; GFX9-NEXT: v_mov_b32_e32 v3, 6 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3295,13 +3295,13 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3332,8 +3332,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32(i32) #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v3i32_i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3359,12 +3359,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3385,14 +3385,14 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3420,8 +3420,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3451,12 +3451,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v2, 3 ; GFX9-NEXT: v_mov_b32_e32 v3, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3481,13 +3481,13 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3518,8 +3518,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v4i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3549,12 +3549,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v3, 4 ; GFX9-NEXT: v_mov_b32_e32 v4, 5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3580,13 +3580,13 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX10-NEXT: v_mov_b32_e32 v4, 5 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3618,8 +3618,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v5i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3639,22 +3639,22 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v8, 0 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v8, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v8, s[34:35] -; GFX9-NEXT: global_load_dwordx4 v[4:7], v8, s[34:35] offset:16 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v8, s[30:31] +; GFX9-NEXT: global_load_dwordx4 v[4:7], v8, s[30:31] offset:16 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3671,23 +3671,24 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX10-NEXT: v_mov_b32_e32 v8, 0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: v_mov_b32_e32 v8, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v8, s[34:35] -; GFX10-NEXT: global_load_dwordx4 v[4:7], v8, s[34:35] offset:16 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_clause 0x1 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v8, s[30:31] +; GFX10-NEXT: global_load_dwordx4 v[4:7], v8, s[30:31] offset:16 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3720,8 +3721,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3756,12 +3757,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX9-NEXT: v_mov_b32_e32 v6, 7 ; GFX9-NEXT: v_mov_b32_e32 v7, 8 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3791,12 +3792,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3831,8 +3832,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm() #0 { ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_v8i32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v8i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3852,24 +3853,24 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v16, 0 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v16, s[34:35] -; GFX9-NEXT: global_load_dwordx4 v[4:7], v16, s[34:35] offset:16 -; GFX9-NEXT: global_load_dwordx4 v[8:11], v16, s[34:35] offset:32 -; GFX9-NEXT: global_load_dwordx4 v[12:15], v16, s[34:35] offset:48 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v16i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v16, 0 +; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_load_dwordx4 v[0:3], v16, s[30:31] +; GFX9-NEXT: global_load_dwordx4 v[4:7], v16, s[30:31] offset:16 +; GFX9-NEXT: global_load_dwordx4 v[8:11], v16, s[30:31] offset:32 +; GFX9-NEXT: global_load_dwordx4 v[12:15], v16, s[30:31] offset:48 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v16i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v16i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -3886,25 +3887,26 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX10-NEXT: v_mov_b32_e32 v16, 0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: v_mov_b32_e32 v16, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_clause 0x3 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v16, s[34:35] -; GFX10-NEXT: global_load_dwordx4 v[4:7], v16, s[34:35] offset:16 -; GFX10-NEXT: global_load_dwordx4 v[8:11], v16, s[34:35] offset:32 -; GFX10-NEXT: global_load_dwordx4 v[12:15], v16, s[34:35] offset:48 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v16i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_clause 0x3 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v16, s[30:31] +; GFX10-NEXT: global_load_dwordx4 v[4:7], v16, s[30:31] offset:16 +; GFX10-NEXT: global_load_dwordx4 v[8:11], v16, s[30:31] offset:32 +; GFX10-NEXT: global_load_dwordx4 v[12:15], v16, s[30:31] offset:48 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v16i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v16i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -3939,8 +3941,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v16i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -3962,29 +3964,29 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v28, 0 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v28, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[34:35] -; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[34:35] offset:16 -; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[34:35] offset:32 -; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[34:35] offset:48 -; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[34:35] offset:64 -; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[34:35] offset:80 -; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[34:35] offset:96 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[30:31] +; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[30:31] offset:16 +; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[30:31] offset:32 +; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[30:31] offset:48 +; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[30:31] offset:64 +; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[30:31] offset:80 +; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[30:31] offset:96 ; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[34:35] offset:112 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[30:31] offset:112 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -4001,29 +4003,30 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX10-NEXT: v_mov_b32_e32 v32, 0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: v_mov_b32_e32 v32, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_clause 0x7 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[34:35] -; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[34:35] offset:16 -; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[34:35] offset:32 -; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[34:35] offset:48 -; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[34:35] offset:64 -; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[34:35] offset:80 -; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[34:35] offset:96 -; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[34:35] offset:112 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_clause 0x7 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[30:31] +; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[30:31] offset:16 +; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[30:31] offset:32 +; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[30:31] offset:48 +; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[30:31] offset:64 +; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[30:31] offset:80 +; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[30:31] offset:96 +; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[30:31] offset:112 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -4062,8 +4065,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_v32i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -4085,32 +4088,32 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-NEXT: v_mov_b32_e32 v28, 0 ; GFX9-NEXT: global_load_dword v32, v[0:1], off -; GFX9-NEXT: v_writelane_b32 v40, s33, 2 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[34:35] -; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[34:35] offset:16 -; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[34:35] offset:32 -; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[34:35] offset:48 -; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[34:35] offset:64 -; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[34:35] offset:80 -; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[34:35] offset:96 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v28, s[30:31] +; GFX9-NEXT: global_load_dwordx4 v[4:7], v28, s[30:31] offset:16 +; GFX9-NEXT: global_load_dwordx4 v[8:11], v28, s[30:31] offset:32 +; GFX9-NEXT: global_load_dwordx4 v[12:15], v28, s[30:31] offset:48 +; GFX9-NEXT: global_load_dwordx4 v[16:19], v28, s[30:31] offset:64 +; GFX9-NEXT: global_load_dwordx4 v[20:23], v28, s[30:31] offset:80 +; GFX9-NEXT: global_load_dwordx4 v[24:27], v28, s[30:31] offset:96 ; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[34:35] offset:112 +; GFX9-NEXT: global_load_dwordx4 v[28:31], v28, s[30:31] offset:112 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(8) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -4127,32 +4130,33 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX10-NEXT: v_mov_b32_e32 v32, 0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: v_mov_b32_e32 v32, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 +; GFX10-NEXT: v_writelane_b32 v40, s30, 0 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: global_load_dword v33, v[0:1], off ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x7 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[34:35] -; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[34:35] offset:16 -; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[34:35] offset:32 -; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[34:35] offset:48 -; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[34:35] offset:64 -; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[34:35] offset:80 -; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[34:35] offset:96 -; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[34:35] offset:112 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v32, s[30:31] +; GFX10-NEXT: global_load_dwordx4 v[4:7], v32, s[30:31] offset:16 +; GFX10-NEXT: global_load_dwordx4 v[8:11], v32, s[30:31] offset:32 +; GFX10-NEXT: global_load_dwordx4 v[12:15], v32, s[30:31] offset:48 +; GFX10-NEXT: global_load_dwordx4 v[16:19], v32, s[30:31] offset:64 +; GFX10-NEXT: global_load_dwordx4 v[20:23], v32, s[30:31] offset:80 +; GFX10-NEXT: global_load_dwordx4 v[24:27], v32, s[30:31] offset:96 +; GFX10-NEXT: global_load_dwordx4 v[28:31], v32, s[30:31] offset:112 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32@rel32@hi+12 ; GFX10-NEXT: s_waitcnt vmcnt(8) ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -4194,8 +4198,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32(i32) #0 { ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(8) ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v33, s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -4228,16 +4232,16 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %o ; GFX9-NEXT: v_mov_b32_e32 v0, 42 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 ; GFX9-NEXT: v_mov_b32_e32 v42, v1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_i32_func_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_i32_func_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_i32_func_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_i32_func_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: global_store_dword v[41:42], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -4263,18 +4267,18 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %o ; GFX10-NEXT: v_mov_b32_e32 v0, 42 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_mov_b32_e32 v42, v1 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_i32_func_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_i32_func_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_i32_func_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_i32_func_i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: global_store_dword v[41:42], v0, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -4311,8 +4315,8 @@ define amdgpu_gfx void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %o ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v42, off, s33 ; GFX10-SCRATCH-NEXT: scratch_load_dword v41, off, s33 offset:4 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -4333,22 +4337,22 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_ubyte v0, v2, s[34:35] -; GFX9-NEXT: global_load_dword v1, v2, s[34:35] offset:4 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_struct_i8_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: global_load_ubyte v0, v2, s[30:31] +; GFX9-NEXT: global_load_dword v1, v2, s[30:31] offset:4 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_struct_i8_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_struct_i8_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -4365,23 +4369,24 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: v_mov_b32_e32 v2, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: global_load_ubyte v0, v2, s[34:35] -; GFX10-NEXT: global_load_dword v1, v2, s[34:35] offset:4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_struct_i8_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_struct_i8_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_clause 0x1 +; GFX10-NEXT: global_load_ubyte v0, v2, s[30:31] +; GFX10-NEXT: global_load_dword v1, v2, s[30:31] offset:4 +; GFX10-NEXT: s_waitcnt_depctr 0xffe3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_struct_i8_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_struct_i8_i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -4414,8 +4419,8 @@ define amdgpu_gfx void @test_call_external_void_func_struct_i8_i32() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_struct_i8_i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -4447,12 +4452,12 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_byval_struct_i8_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_byval_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_byval_struct_i8_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_byval_struct_i8_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -4478,13 +4483,13 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s33 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_byval_struct_i8_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_byval_struct_i8_i32@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_byval_struct_i8_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_byval_struct_i8_i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -4516,8 +4521,8 @@ define amdgpu_gfx void @test_call_external_void_func_byval_struct_i8_i32() #0 { ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_byval_struct_i8_i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -4554,14 +4559,14 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX9-NEXT: v_add_u32_e32 v0, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e64 v1, 6, s33 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8 ; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xf800 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -4593,17 +4598,17 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_lshrrev_b32_e64 v1, 5, s33 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 8, v0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: s_clause 0x1 ; GFX10-NEXT: buffer_load_ubyte v0, off, s[0:3], s33 offset:8 ; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:12 -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfc00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) @@ -4645,8 +4650,8 @@ define amdgpu_gfx void @test_call_external_void_func_sret_struct_i8_i32_byval_st ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: scratch_load_ubyte v0, off, s33 offset:8 ; GFX10-SCRATCH-NEXT: scratch_load_dword v1, off, s33 offset:12 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) @@ -4684,18 +4689,18 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[34:35] -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v16i8@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v16i8@rel32@hi+12 +; GFX9-NEXT: global_load_dwordx4 v[0:3], v0, s[30:31] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v16i8@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v16i8@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshrrev_b32_e32 v16, 8, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v17, 16, v0 @@ -4715,9 +4720,9 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX9-NEXT: v_mov_b32_e32 v1, v16 ; GFX9-NEXT: v_mov_b32_e32 v2, v17 ; GFX9-NEXT: v_mov_b32_e32 v3, v18 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -4734,19 +4739,19 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 +; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[34:35] +; GFX10-NEXT: global_load_dwordx4 v[0:3], v0, s[30:31] ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v16i8@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v16i8@rel32@hi+12 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v16i8@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v16i8@rel32@hi+12 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v16, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v17, 16, v0 @@ -4766,9 +4771,9 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10-NEXT: v_mov_b32_e32 v1, v16 ; GFX10-NEXT: v_mov_b32_e32 v2, v17 ; GFX10-NEXT: v_mov_b32_e32 v3, v18 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -4819,8 +4824,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i8() #0 { ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v2, v17 ; GFX10-SCRATCH-NEXT: v_mov_b32_e32 v3, v18 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -4842,91 +4847,88 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] -; GFX9-NEXT: v_writelane_b32 v40, s33, 32 +; GFX9-NEXT: v_writelane_b32 v40, s33, 30 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:20 ; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:16 ; GFX9-NEXT: buffer_load_dword v31, off, s[0:3], s33 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 -; GFX9-NEXT: v_writelane_b32 v40, s35, 3 -; GFX9-NEXT: v_writelane_b32 v40, s36, 4 -; GFX9-NEXT: v_writelane_b32 v40, s37, 5 -; GFX9-NEXT: v_writelane_b32 v40, s38, 6 -; GFX9-NEXT: v_writelane_b32 v40, s39, 7 -; GFX9-NEXT: v_writelane_b32 v40, s40, 8 -; GFX9-NEXT: v_writelane_b32 v40, s41, 9 -; GFX9-NEXT: v_writelane_b32 v40, s42, 10 -; GFX9-NEXT: v_writelane_b32 v40, s43, 11 -; GFX9-NEXT: v_writelane_b32 v40, s44, 12 -; GFX9-NEXT: v_writelane_b32 v40, s45, 13 -; GFX9-NEXT: v_writelane_b32 v40, s46, 14 -; GFX9-NEXT: v_writelane_b32 v40, s47, 15 -; GFX9-NEXT: v_writelane_b32 v40, s48, 16 -; GFX9-NEXT: v_writelane_b32 v40, s49, 17 -; GFX9-NEXT: v_writelane_b32 v40, s50, 18 -; GFX9-NEXT: v_writelane_b32 v40, s51, 19 -; GFX9-NEXT: v_writelane_b32 v40, s52, 20 -; GFX9-NEXT: v_writelane_b32 v40, s53, 21 -; GFX9-NEXT: v_writelane_b32 v40, s54, 22 -; GFX9-NEXT: v_writelane_b32 v40, s55, 23 -; GFX9-NEXT: v_writelane_b32 v40, s56, 24 -; GFX9-NEXT: v_writelane_b32 v40, s57, 25 -; GFX9-NEXT: v_writelane_b32 v40, s58, 26 -; GFX9-NEXT: v_writelane_b32 v40, s59, 27 -; GFX9-NEXT: v_writelane_b32 v40, s60, 28 -; GFX9-NEXT: v_writelane_b32 v40, s61, 29 +; GFX9-NEXT: v_writelane_b32 v40, s34, 0 +; GFX9-NEXT: v_writelane_b32 v40, s35, 1 +; GFX9-NEXT: v_writelane_b32 v40, s36, 2 +; GFX9-NEXT: v_writelane_b32 v40, s37, 3 +; GFX9-NEXT: v_writelane_b32 v40, s38, 4 +; GFX9-NEXT: v_writelane_b32 v40, s39, 5 +; GFX9-NEXT: v_writelane_b32 v40, s40, 6 +; GFX9-NEXT: v_writelane_b32 v40, s41, 7 +; GFX9-NEXT: v_writelane_b32 v40, s42, 8 +; GFX9-NEXT: v_writelane_b32 v40, s43, 9 +; GFX9-NEXT: v_writelane_b32 v40, s44, 10 +; GFX9-NEXT: v_writelane_b32 v40, s45, 11 +; GFX9-NEXT: v_writelane_b32 v40, s46, 12 +; GFX9-NEXT: v_writelane_b32 v40, s47, 13 +; GFX9-NEXT: v_writelane_b32 v40, s48, 14 +; GFX9-NEXT: v_writelane_b32 v40, s49, 15 +; GFX9-NEXT: v_writelane_b32 v40, s50, 16 +; GFX9-NEXT: v_writelane_b32 v40, s51, 17 +; GFX9-NEXT: v_writelane_b32 v40, s52, 18 +; GFX9-NEXT: v_writelane_b32 v40, s53, 19 +; GFX9-NEXT: v_writelane_b32 v40, s54, 20 +; GFX9-NEXT: v_writelane_b32 v40, s55, 21 +; GFX9-NEXT: v_writelane_b32 v40, s56, 22 +; GFX9-NEXT: v_writelane_b32 v40, s57, 23 +; GFX9-NEXT: v_writelane_b32 v40, s58, 24 +; GFX9-NEXT: v_writelane_b32 v40, s59, 25 +; GFX9-NEXT: v_writelane_b32 v40, s60, 26 +; GFX9-NEXT: v_writelane_b32 v40, s61, 27 ; GFX9-NEXT: s_addk_i32 s32, 0x800 -; GFX9-NEXT: v_writelane_b32 v40, s62, 30 -; GFX9-NEXT: v_writelane_b32 v40, s63, 31 -; GFX9-NEXT: s_getpc_b64 s[4:5] -; GFX9-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 +; GFX9-NEXT: v_writelane_b32 v40, s62, 28 +; GFX9-NEXT: v_writelane_b32 v40, s63, 29 +; GFX9-NEXT: s_mov_b64 s[4:5], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[6:7] +; GFX9-NEXT: s_add_u32 s6, s6, byval_align16_f64_arg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s7, s7, byval_align16_f64_arg@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(2) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(2) ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s63, v40, 31 -; GFX9-NEXT: v_readlane_b32 s62, v40, 30 -; GFX9-NEXT: v_readlane_b32 s61, v40, 29 -; GFX9-NEXT: v_readlane_b32 s60, v40, 28 -; GFX9-NEXT: v_readlane_b32 s59, v40, 27 -; GFX9-NEXT: v_readlane_b32 s58, v40, 26 -; GFX9-NEXT: v_readlane_b32 s57, v40, 25 -; GFX9-NEXT: v_readlane_b32 s56, v40, 24 -; GFX9-NEXT: v_readlane_b32 s55, v40, 23 -; GFX9-NEXT: v_readlane_b32 s54, v40, 22 -; GFX9-NEXT: v_readlane_b32 s53, v40, 21 -; GFX9-NEXT: v_readlane_b32 s52, v40, 20 -; GFX9-NEXT: v_readlane_b32 s51, v40, 19 -; GFX9-NEXT: v_readlane_b32 s50, v40, 18 -; GFX9-NEXT: v_readlane_b32 s49, v40, 17 -; GFX9-NEXT: v_readlane_b32 s48, v40, 16 -; GFX9-NEXT: v_readlane_b32 s47, v40, 15 -; GFX9-NEXT: v_readlane_b32 s46, v40, 14 -; GFX9-NEXT: v_readlane_b32 s45, v40, 13 -; GFX9-NEXT: v_readlane_b32 s44, v40, 12 -; GFX9-NEXT: v_readlane_b32 s43, v40, 11 -; GFX9-NEXT: v_readlane_b32 s42, v40, 10 -; GFX9-NEXT: v_readlane_b32 s41, v40, 9 -; GFX9-NEXT: v_readlane_b32 s40, v40, 8 -; GFX9-NEXT: v_readlane_b32 s39, v40, 7 -; GFX9-NEXT: v_readlane_b32 s38, v40, 6 -; GFX9-NEXT: v_readlane_b32 s37, v40, 5 -; GFX9-NEXT: v_readlane_b32 s36, v40, 4 -; GFX9-NEXT: v_readlane_b32 s35, v40, 3 -; GFX9-NEXT: v_readlane_b32 s34, v40, 2 -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX9-NEXT: v_readlane_b32 s63, v40, 29 +; GFX9-NEXT: v_readlane_b32 s62, v40, 28 +; GFX9-NEXT: v_readlane_b32 s61, v40, 27 +; GFX9-NEXT: v_readlane_b32 s60, v40, 26 +; GFX9-NEXT: v_readlane_b32 s59, v40, 25 +; GFX9-NEXT: v_readlane_b32 s58, v40, 24 +; GFX9-NEXT: v_readlane_b32 s57, v40, 23 +; GFX9-NEXT: v_readlane_b32 s56, v40, 22 +; GFX9-NEXT: v_readlane_b32 s55, v40, 21 +; GFX9-NEXT: v_readlane_b32 s54, v40, 20 +; GFX9-NEXT: v_readlane_b32 s53, v40, 19 +; GFX9-NEXT: v_readlane_b32 s52, v40, 18 +; GFX9-NEXT: v_readlane_b32 s51, v40, 17 +; GFX9-NEXT: v_readlane_b32 s50, v40, 16 +; GFX9-NEXT: v_readlane_b32 s49, v40, 15 +; GFX9-NEXT: v_readlane_b32 s48, v40, 14 +; GFX9-NEXT: v_readlane_b32 s47, v40, 13 +; GFX9-NEXT: v_readlane_b32 s46, v40, 12 +; GFX9-NEXT: v_readlane_b32 s45, v40, 11 +; GFX9-NEXT: v_readlane_b32 s44, v40, 10 +; GFX9-NEXT: v_readlane_b32 s43, v40, 9 +; GFX9-NEXT: v_readlane_b32 s42, v40, 8 +; GFX9-NEXT: v_readlane_b32 s41, v40, 7 +; GFX9-NEXT: v_readlane_b32 s40, v40, 6 +; GFX9-NEXT: v_readlane_b32 s39, v40, 5 +; GFX9-NEXT: v_readlane_b32 s38, v40, 4 +; GFX9-NEXT: v_readlane_b32 s37, v40, 3 +; GFX9-NEXT: v_readlane_b32 s36, v40, 2 +; GFX9-NEXT: v_readlane_b32 s35, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xf800 -; GFX9-NEXT: v_readlane_b32 s33, v40, 32 -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: v_readlane_b32 s33, v40, 30 +; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[6:7] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-NEXT: s_setpc_b64 s[4:5] ; ; GFX10-LABEL: tail_call_byval_align16: ; GFX10: ; %bb.0: ; %entry @@ -4936,93 +4938,90 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s4 -; GFX10-NEXT: s_mov_b32 s6, s33 +; GFX10-NEXT: v_writelane_b32 v40, s33, 30 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_clause 0x2 ; GFX10-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:20 ; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:16 ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s33 -; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: s_addk_i32 s32, 0x400 -; GFX10-NEXT: s_getpc_b64 s[4:5] -; GFX10-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v40, s34, 0 +; GFX10-NEXT: s_mov_b64 s[4:5], s[30:31] +; GFX10-NEXT: s_getpc_b64 s[6:7] +; GFX10-NEXT: s_add_u32 s6, s6, byval_align16_f64_arg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s7, s7, byval_align16_f64_arg@rel32@hi+12 ; GFX10-NEXT: s_waitcnt vmcnt(2) ; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-NEXT: v_writelane_b32 v40, s35, 3 -; GFX10-NEXT: v_writelane_b32 v40, s36, 4 -; GFX10-NEXT: v_writelane_b32 v40, s37, 5 -; GFX10-NEXT: v_writelane_b32 v40, s38, 6 -; GFX10-NEXT: v_writelane_b32 v40, s39, 7 -; GFX10-NEXT: v_writelane_b32 v40, s40, 8 -; GFX10-NEXT: v_writelane_b32 v40, s41, 9 -; GFX10-NEXT: v_writelane_b32 v40, s42, 10 -; GFX10-NEXT: v_writelane_b32 v40, s43, 11 -; GFX10-NEXT: v_writelane_b32 v40, s44, 12 -; GFX10-NEXT: v_writelane_b32 v40, s45, 13 -; GFX10-NEXT: v_writelane_b32 v40, s46, 14 -; GFX10-NEXT: v_writelane_b32 v40, s47, 15 -; GFX10-NEXT: v_writelane_b32 v40, s48, 16 -; GFX10-NEXT: v_writelane_b32 v40, s49, 17 -; GFX10-NEXT: v_writelane_b32 v40, s50, 18 -; GFX10-NEXT: v_writelane_b32 v40, s51, 19 -; GFX10-NEXT: v_writelane_b32 v40, s52, 20 -; GFX10-NEXT: v_writelane_b32 v40, s53, 21 -; GFX10-NEXT: v_writelane_b32 v40, s54, 22 -; GFX10-NEXT: v_writelane_b32 v40, s55, 23 -; GFX10-NEXT: v_writelane_b32 v40, s56, 24 -; GFX10-NEXT: v_writelane_b32 v40, s57, 25 -; GFX10-NEXT: v_writelane_b32 v40, s58, 26 -; GFX10-NEXT: v_writelane_b32 v40, s59, 27 -; GFX10-NEXT: v_writelane_b32 v40, s60, 28 -; GFX10-NEXT: v_writelane_b32 v40, s61, 29 -; GFX10-NEXT: v_writelane_b32 v40, s62, 30 -; GFX10-NEXT: v_writelane_b32 v40, s63, 31 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s63, v40, 31 -; GFX10-NEXT: v_readlane_b32 s62, v40, 30 -; GFX10-NEXT: v_readlane_b32 s61, v40, 29 -; GFX10-NEXT: v_readlane_b32 s60, v40, 28 -; GFX10-NEXT: v_readlane_b32 s59, v40, 27 -; GFX10-NEXT: v_readlane_b32 s58, v40, 26 -; GFX10-NEXT: v_readlane_b32 s57, v40, 25 -; GFX10-NEXT: v_readlane_b32 s56, v40, 24 -; GFX10-NEXT: v_readlane_b32 s55, v40, 23 -; GFX10-NEXT: v_readlane_b32 s54, v40, 22 -; GFX10-NEXT: v_readlane_b32 s53, v40, 21 -; GFX10-NEXT: v_readlane_b32 s52, v40, 20 -; GFX10-NEXT: v_readlane_b32 s51, v40, 19 -; GFX10-NEXT: v_readlane_b32 s50, v40, 18 -; GFX10-NEXT: v_readlane_b32 s49, v40, 17 -; GFX10-NEXT: v_readlane_b32 s48, v40, 16 -; GFX10-NEXT: v_readlane_b32 s47, v40, 15 -; GFX10-NEXT: v_readlane_b32 s46, v40, 14 -; GFX10-NEXT: v_readlane_b32 s45, v40, 13 -; GFX10-NEXT: v_readlane_b32 s44, v40, 12 -; GFX10-NEXT: v_readlane_b32 s43, v40, 11 -; GFX10-NEXT: v_readlane_b32 s42, v40, 10 -; GFX10-NEXT: v_readlane_b32 s41, v40, 9 -; GFX10-NEXT: v_readlane_b32 s40, v40, 8 -; GFX10-NEXT: v_readlane_b32 s39, v40, 7 -; GFX10-NEXT: v_readlane_b32 s38, v40, 6 -; GFX10-NEXT: v_readlane_b32 s37, v40, 5 -; GFX10-NEXT: v_readlane_b32 s36, v40, 4 -; GFX10-NEXT: v_readlane_b32 s35, v40, 3 -; GFX10-NEXT: v_readlane_b32 s34, v40, 2 -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_writelane_b32 v40, s35, 1 +; GFX10-NEXT: v_writelane_b32 v40, s36, 2 +; GFX10-NEXT: v_writelane_b32 v40, s37, 3 +; GFX10-NEXT: v_writelane_b32 v40, s38, 4 +; GFX10-NEXT: v_writelane_b32 v40, s39, 5 +; GFX10-NEXT: v_writelane_b32 v40, s40, 6 +; GFX10-NEXT: v_writelane_b32 v40, s41, 7 +; GFX10-NEXT: v_writelane_b32 v40, s42, 8 +; GFX10-NEXT: v_writelane_b32 v40, s43, 9 +; GFX10-NEXT: v_writelane_b32 v40, s44, 10 +; GFX10-NEXT: v_writelane_b32 v40, s45, 11 +; GFX10-NEXT: v_writelane_b32 v40, s46, 12 +; GFX10-NEXT: v_writelane_b32 v40, s47, 13 +; GFX10-NEXT: v_writelane_b32 v40, s48, 14 +; GFX10-NEXT: v_writelane_b32 v40, s49, 15 +; GFX10-NEXT: v_writelane_b32 v40, s50, 16 +; GFX10-NEXT: v_writelane_b32 v40, s51, 17 +; GFX10-NEXT: v_writelane_b32 v40, s52, 18 +; GFX10-NEXT: v_writelane_b32 v40, s53, 19 +; GFX10-NEXT: v_writelane_b32 v40, s54, 20 +; GFX10-NEXT: v_writelane_b32 v40, s55, 21 +; GFX10-NEXT: v_writelane_b32 v40, s56, 22 +; GFX10-NEXT: v_writelane_b32 v40, s57, 23 +; GFX10-NEXT: v_writelane_b32 v40, s58, 24 +; GFX10-NEXT: v_writelane_b32 v40, s59, 25 +; GFX10-NEXT: v_writelane_b32 v40, s60, 26 +; GFX10-NEXT: v_writelane_b32 v40, s61, 27 +; GFX10-NEXT: v_writelane_b32 v40, s62, 28 +; GFX10-NEXT: v_writelane_b32 v40, s63, 29 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GFX10-NEXT: v_readlane_b32 s63, v40, 29 +; GFX10-NEXT: v_readlane_b32 s62, v40, 28 +; GFX10-NEXT: v_readlane_b32 s61, v40, 27 +; GFX10-NEXT: v_readlane_b32 s60, v40, 26 +; GFX10-NEXT: v_readlane_b32 s59, v40, 25 +; GFX10-NEXT: v_readlane_b32 s58, v40, 24 +; GFX10-NEXT: v_readlane_b32 s57, v40, 23 +; GFX10-NEXT: v_readlane_b32 s56, v40, 22 +; GFX10-NEXT: v_readlane_b32 s55, v40, 21 +; GFX10-NEXT: v_readlane_b32 s54, v40, 20 +; GFX10-NEXT: v_readlane_b32 s53, v40, 19 +; GFX10-NEXT: v_readlane_b32 s52, v40, 18 +; GFX10-NEXT: v_readlane_b32 s51, v40, 17 +; GFX10-NEXT: v_readlane_b32 s50, v40, 16 +; GFX10-NEXT: v_readlane_b32 s49, v40, 15 +; GFX10-NEXT: v_readlane_b32 s48, v40, 14 +; GFX10-NEXT: v_readlane_b32 s47, v40, 13 +; GFX10-NEXT: v_readlane_b32 s46, v40, 12 +; GFX10-NEXT: v_readlane_b32 s45, v40, 11 +; GFX10-NEXT: v_readlane_b32 s44, v40, 10 +; GFX10-NEXT: v_readlane_b32 s43, v40, 9 +; GFX10-NEXT: v_readlane_b32 s42, v40, 8 +; GFX10-NEXT: v_readlane_b32 s41, v40, 7 +; GFX10-NEXT: v_readlane_b32 s40, v40, 6 +; GFX10-NEXT: v_readlane_b32 s39, v40, 5 +; GFX10-NEXT: v_readlane_b32 s38, v40, 4 +; GFX10-NEXT: v_readlane_b32 s37, v40, 3 +; GFX10-NEXT: v_readlane_b32 s36, v40, 2 +; GFX10-NEXT: v_readlane_b32 s35, v40, 1 +; GFX10-NEXT: v_readlane_b32 s34, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfc00 -; GFX10-NEXT: s_mov_b32 s33, s6 -; GFX10-NEXT: s_or_saveexec_b32 s4, -1 +; GFX10-NEXT: v_readlane_b32 s33, v40, 30 +; GFX10-NEXT: s_or_saveexec_b32 s6, -1 ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s4 +; GFX10-NEXT: s_mov_b32 exec_lo, s6 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX10-NEXT: s_setpc_b64 s[4:5] ; ; GFX10-SCRATCH-LABEL: tail_call_byval_align16: ; GFX10-SCRATCH: ; %bb.0: ; %entry @@ -5032,90 +5031,87 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { ; GFX10-SCRATCH-NEXT: scratch_store_dword off, v40, s32 offset:24 ; 4-byte Folded Spill ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s4, s33 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s33, 30 ; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s32 ; GFX10-SCRATCH-NEXT: s_clause 0x1 ; GFX10-SCRATCH-NEXT: scratch_load_dwordx2 v[32:33], off, s33 offset:16 ; GFX10-SCRATCH-NEXT: scratch_load_dword v31, off, s33 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, 32 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s34, 0 +; GFX10-SCRATCH-NEXT: s_mov_b64 s[4:5], s[30:31] ; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1] ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, byval_align16_f64_arg@rel32@hi+12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s34, 2 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s35, 3 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 4 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 5 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 6 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 7 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s40, 8 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s41, 9 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s42, 10 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s43, 11 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s44, 12 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s45, 13 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s46, 14 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s47, 15 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 16 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 17 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 18 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 19 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s52, 20 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s53, 21 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s54, 22 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s55, 23 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s56, 24 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s57, 25 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s58, 26 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s59, 27 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s60, 28 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s61, 29 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s62, 30 -; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s63, 31 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s35, 1 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s36, 2 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s37, 3 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s38, 4 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s39, 5 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s40, 6 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s41, 7 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s42, 8 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s43, 9 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s44, 10 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s45, 11 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s46, 12 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s47, 13 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s48, 14 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s49, 15 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 16 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 17 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s52, 18 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s53, 19 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s54, 20 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s55, 21 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s56, 22 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s57, 23 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s58, 24 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s59, 25 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s60, 26 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s61, 27 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s62, 28 +; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s63, 29 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(1) ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s63, v40, 31 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s62, v40, 30 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s61, v40, 29 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s60, v40, 28 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s59, v40, 27 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s58, v40, 26 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s57, v40, 25 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s56, v40, 24 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s55, v40, 23 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s54, v40, 22 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s53, v40, 21 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s52, v40, 20 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 19 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 18 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 17 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 16 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s47, v40, 15 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s46, v40, 14 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s45, v40, 13 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s44, v40, 12 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s43, v40, 11 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s42, v40, 10 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s41, v40, 9 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s40, v40, 8 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 7 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 6 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 5 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 4 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s35, v40, 3 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s34, v40, 2 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 -; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s63, v40, 29 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s62, v40, 28 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s61, v40, 27 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s60, v40, 26 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s59, v40, 25 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s58, v40, 24 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s57, v40, 23 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s56, v40, 22 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s55, v40, 21 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s54, v40, 20 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s53, v40, 19 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s52, v40, 18 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s51, v40, 17 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s50, v40, 16 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s49, v40, 15 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s48, v40, 14 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s47, v40, 13 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s46, v40, 12 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s45, v40, 11 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s44, v40, 10 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s43, v40, 9 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s42, v40, 8 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s41, v40, 7 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s40, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s39, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s38, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s37, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s36, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s35, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s34, v40, 0 ; GFX10-SCRATCH-NEXT: s_addk_i32 s32, 0xffe0 -; GFX10-SCRATCH-NEXT: s_mov_b32 s33, s4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 30 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 ; GFX10-SCRATCH-NEXT: scratch_load_dword v40, off, s32 offset:24 ; 4-byte Folded Reload ; GFX10-SCRATCH-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-SCRATCH-NEXT: s_mov_b32 exec_lo, s0 ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) -; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31] +; GFX10-SCRATCH-NEXT: s_setpc_b64 s[4:5] entry: %alloca = alloca double, align 8, addrspace(5) tail call amdgpu_gfx void @byval_align16_f64_arg(<32 x i32> %val, double addrspace(5)* byval(double) align 16 %alloca) @@ -5136,13 +5132,13 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_mov_b32_e32 v0, 1 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i1_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i1_inreg@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i1_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i1_inreg@rel32@hi+12 ; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -5163,15 +5159,15 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX10-NEXT: v_mov_b32_e32 v0, 1 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i1_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i1_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: buffer_store_byte v0, off, s[0:3], s32 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i1_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i1_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -5200,8 +5196,8 @@ define amdgpu_gfx void @test_call_external_void_func_i1_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: scratch_store_byte off, v0, s32 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -5228,12 +5224,12 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_movk_i32 s4, 0x7b ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i8_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i8_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i8_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i8_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -5254,16 +5250,16 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i8_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i8_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x7b ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i8_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i8_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -5293,8 +5289,8 @@ define amdgpu_gfx void @test_call_external_void_func_i8_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 @@ -5322,12 +5318,12 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_movk_i32 s4, 0x7b ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -5348,16 +5344,16 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x7b ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i16_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -5387,8 +5383,8 @@ define amdgpu_gfx void @test_call_external_void_func_i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 @@ -5416,12 +5412,12 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_mov_b32 s4, 42 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -5442,16 +5438,16 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 42 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -5481,8 +5477,8 @@ define amdgpu_gfx void @test_call_external_void_func_i32_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 @@ -5512,12 +5508,12 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX9-NEXT: s_movk_i32 s4, 0x7b ; GFX9-NEXT: s_mov_b32 s5, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -5539,18 +5535,18 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_i64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x7b ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_i64_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -5583,8 +5579,8 @@ define amdgpu_gfx void @test_call_external_void_func_i64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -5610,19 +5606,19 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 -; GFX9-NEXT: s_mov_b64 s[34:35], 0 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: s_mov_b64 s[30:31], 0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 +; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -5644,22 +5640,22 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 6 -; GFX10-NEXT: s_mov_b64 s[34:35], 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: s_mov_b64 s[30:31], 0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -5696,8 +5692,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -5735,12 +5731,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s6, 3 ; GFX9-NEXT: s_mov_b32 s7, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -5764,9 +5760,6 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -5777,9 +5770,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX10-NEXT: s_mov_b32 s7, 4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i64_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -5818,8 +5814,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -5847,23 +5843,23 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 -; GFX9-NEXT: s_mov_b64 s[34:35], 0 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 +; GFX9-NEXT: v_writelane_b32 v40, s30, 6 +; GFX9-NEXT: v_writelane_b32 v40, s31, 7 +; GFX9-NEXT: s_mov_b64 s[30:31], 0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 6 ; GFX9-NEXT: s_mov_b32 s8, 1 ; GFX9-NEXT: s_mov_b32 s9, 2 -; GFX9-NEXT: v_writelane_b32 v40, s31, 7 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 7 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 6 +; GFX9-NEXT: v_readlane_b32 s31, v40, 7 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 @@ -5887,26 +5883,26 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 8 -; GFX10-NEXT: s_mov_b64 s[34:35], 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-NEXT: s_mov_b32 s8, 1 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5 ; GFX10-NEXT: s_mov_b32 s9, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 6 ; GFX10-NEXT: v_writelane_b32 v40, s31, 7 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 7 +; GFX10-NEXT: s_mov_b64 s[30:31], 0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i64_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 6 +; GFX10-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 @@ -5949,8 +5945,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 6 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 7 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 @@ -5984,26 +5980,26 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 -; GFX9-NEXT: s_mov_b64 s[34:35], 0 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7 +; GFX9-NEXT: v_writelane_b32 v40, s30, 8 +; GFX9-NEXT: v_writelane_b32 v40, s31, 9 +; GFX9-NEXT: s_mov_b64 s[30:31], 0 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 8 ; GFX9-NEXT: s_mov_b32 s8, 1 ; GFX9-NEXT: s_mov_b32 s9, 2 ; GFX9-NEXT: s_mov_b32 s10, 3 ; GFX9-NEXT: s_mov_b32 s11, 4 -; GFX9-NEXT: v_writelane_b32 v40, s31, 9 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 9 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 8 +; GFX9-NEXT: v_readlane_b32 s31, v40, 9 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 @@ -6029,17 +6025,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 10 -; GFX10-NEXT: s_mov_b64 s[34:35], 0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s8, 4 ; GFX10-NEXT: s_mov_b32 s8, 1 ; GFX10-NEXT: v_writelane_b32 v40, s9, 5 @@ -6050,9 +6041,14 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX10-NEXT: s_mov_b32 s11, 4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-NEXT: v_writelane_b32 v40, s31, 9 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 9 +; GFX10-NEXT: s_mov_b64 s[30:31], 0 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i64_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 @@ -6101,8 +6097,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i64_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 @@ -6139,12 +6135,12 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_movk_i32 s4, 0x4400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -6165,16 +6161,16 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_movk_i32 s4, 0x4400 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f16_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -6204,8 +6200,8 @@ define amdgpu_gfx void @test_call_external_void_func_f16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 @@ -6233,12 +6229,12 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_mov_b32 s4, 4.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -6259,16 +6255,16 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 4.0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -6298,8 +6294,8 @@ define amdgpu_gfx void @test_call_external_void_func_f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 @@ -6329,12 +6325,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s4, 1.0 ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -6356,18 +6352,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1.0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2.0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -6400,8 +6396,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -6434,12 +6430,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s5, 2.0 ; GFX9-NEXT: s_mov_b32 s6, 4.0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 4 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 4 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 3 +; GFX9-NEXT: v_readlane_b32 s31, v40, 4 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 @@ -6462,9 +6458,6 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 5 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1.0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -6473,9 +6466,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX10-NEXT: s_mov_b32 s6, 4.0 ; GFX10-NEXT: v_writelane_b32 v40, s30, 3 ; GFX10-NEXT: v_writelane_b32 v40, s31, 4 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 4 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 3 +; GFX10-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 @@ -6511,8 +6507,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 3 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 4 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 @@ -6550,12 +6546,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s7, -1.0 ; GFX9-NEXT: s_mov_b32 s8, 0.5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 6 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5f32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 6 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5f32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 5 +; GFX9-NEXT: v_readlane_b32 s31, v40, 6 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 @@ -6580,9 +6576,6 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 7 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5f32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5f32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1.0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -6595,9 +6588,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX10-NEXT: s_mov_b32 s8, 0.5 ; GFX10-NEXT: v_writelane_b32 v40, s30, 5 ; GFX10-NEXT: v_writelane_b32 v40, s31, 6 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 6 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5f32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5f32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 5 +; GFX10-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 @@ -6639,8 +6635,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5f32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 5 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 6 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 @@ -6674,12 +6670,12 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s4, 0 ; GFX9-NEXT: s_mov_b32 s5, 0x40100000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_f64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_f64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_f64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_f64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -6701,18 +6697,18 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_f64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_f64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 0x40100000 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_f64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_f64_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -6745,8 +6741,8 @@ define amdgpu_gfx void @test_call_external_void_func_f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -6781,12 +6777,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s6, 0 ; GFX9-NEXT: s_mov_b32 s7, 0x40100000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -6810,9 +6806,6 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -6823,9 +6816,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX10-NEXT: s_mov_b32 s7, 0x40100000 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f64_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -6864,8 +6860,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -6906,12 +6902,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s8, 0 ; GFX9-NEXT: s_mov_b32 s9, 0x40200000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 7 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f64_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 7 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f64_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 6 +; GFX9-NEXT: v_readlane_b32 s31, v40, 7 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 @@ -6937,9 +6933,6 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 8 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f64_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f64_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -6954,9 +6947,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX10-NEXT: s_mov_b32 s9, 0x40200000 ; GFX10-NEXT: v_writelane_b32 v40, s30, 6 ; GFX10-NEXT: v_writelane_b32 v40, s31, 7 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 7 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f64_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f64_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 6 +; GFX10-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 @@ -7001,8 +6997,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f64_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 6 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 7 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 6 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 @@ -7030,17 +7026,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: s_load_dword s4, s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -7062,15 +7058,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_load_dword s4, s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: s_load_dword s4, s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i16_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -7100,8 +7096,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 @@ -7126,17 +7122,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s33, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7160,15 +7156,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -7200,8 +7196,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -7227,17 +7223,17 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s33, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7261,15 +7257,15 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -7301,8 +7297,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -7334,12 +7330,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s4, 0x20001 ; GFX9-NEXT: s_mov_b32 s5, 3 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7361,18 +7357,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0x20001 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 3 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i16_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -7405,8 +7401,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -7437,12 +7433,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s4, 0x40003c00 ; GFX9-NEXT: s_movk_i32 s5, 0x4400 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7464,18 +7460,18 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3f16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0x40003c00 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_movk_i32 s5, 0x4400 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3f16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3f16_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -7508,8 +7504,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3f16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -7534,17 +7530,17 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s33, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7568,15 +7564,15 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -7608,8 +7604,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -7641,12 +7637,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s4, 0x20001 ; GFX9-NEXT: s_mov_b32 s5, 0x40003 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7668,18 +7664,18 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 0x20001 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 0x40003 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i16_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -7712,8 +7708,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i16_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -7737,17 +7733,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 -; GFX9-NEXT: s_load_dword s4, s[34:35], 0x0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: s_load_dword s4, s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2f16_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2f16_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -7769,15 +7765,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 -; GFX10-NEXT: s_load_dword s4, s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2f16_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2f16_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 +; GFX10-NEXT: s_load_dword s4, s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2f16_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2f16_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -7807,8 +7803,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2f16_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 2 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 3 @@ -7833,17 +7829,17 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s33, 4 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 -; GFX9-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7867,15 +7863,15 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 -; GFX10-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -7907,8 +7903,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -7940,12 +7936,12 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s4, 1 ; GFX9-NEXT: s_mov_b32 s5, 2 ; GFX9-NEXT: v_writelane_b32 v40, s31, 3 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -7967,18 +7963,18 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 4 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v2i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v2i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: s_mov_b32 s5, 2 ; GFX10-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-NEXT: v_writelane_b32 v40, s31, 3 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v2i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v2i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -8011,8 +8007,8 @@ define amdgpu_gfx void @test_call_external_void_func_v2i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 2 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 3 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 @@ -8045,12 +8041,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX9-NEXT: s_mov_b32 s5, 4 ; GFX9-NEXT: s_mov_b32 s6, 5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 4 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 4 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 3 +; GFX9-NEXT: v_readlane_b32 s31, v40, 4 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 @@ -8073,9 +8069,6 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 5 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 3 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8084,9 +8077,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX10-NEXT: s_mov_b32 s6, 5 ; GFX10-NEXT: v_writelane_b32 v40, s30, 3 ; GFX10-NEXT: v_writelane_b32 v40, s31, 4 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 4 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 3 +; GFX10-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 @@ -8122,8 +8118,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_imm_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 3 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 4 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 3 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s4, v40, 0 @@ -8159,12 +8155,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX9-NEXT: s_mov_b32 s6, 5 ; GFX9-NEXT: s_mov_b32 s7, 6 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -8188,9 +8184,6 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v3i32_i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v3i32_i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 3 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8201,9 +8194,12 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX10-NEXT: s_mov_b32 s7, 6 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v3i32_i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v3i32_i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -8242,8 +8238,8 @@ define amdgpu_gfx void @test_call_external_void_func_v3i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -8272,17 +8268,17 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 -; GFX9-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 4 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -8310,15 +8306,15 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 ; GFX10-NEXT: v_writelane_b32 v40, s6, 2 ; GFX10-NEXT: v_writelane_b32 v40, s7, 3 -; GFX10-NEXT: s_load_dwordx4 s[4:7], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[30:31], 0x0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -8354,8 +8350,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -8393,12 +8389,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s6, 3 ; GFX9-NEXT: s_mov_b32 s7, 4 ; GFX9-NEXT: v_writelane_b32 v40, s31, 5 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 5 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 4 +; GFX9-NEXT: v_readlane_b32 s31, v40, 5 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 @@ -8422,9 +8418,6 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 6 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v4i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v4i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8435,9 +8428,12 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX10-NEXT: s_mov_b32 s7, 4 ; GFX10-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 5 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 5 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v4i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v4i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 @@ -8476,8 +8472,8 @@ define amdgpu_gfx void @test_call_external_void_func_v4i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 4 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 5 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 4 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 5 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s5, v40, 1 @@ -8516,12 +8512,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s7, 4 ; GFX9-NEXT: s_mov_b32 s8, 5 ; GFX9-NEXT: v_writelane_b32 v40, s31, 6 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v5i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 6 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v5i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 5 +; GFX9-NEXT: v_readlane_b32 s31, v40, 6 ; GFX9-NEXT: v_readlane_b32 s8, v40, 4 ; GFX9-NEXT: v_readlane_b32 s7, v40, 3 ; GFX9-NEXT: v_readlane_b32 s6, v40, 2 @@ -8546,9 +8542,6 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 7 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v5i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v5i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8561,9 +8554,12 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX10-NEXT: s_mov_b32 s8, 5 ; GFX10-NEXT: v_writelane_b32 v40, s30, 5 ; GFX10-NEXT: v_writelane_b32 v40, s31, 6 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 6 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v5i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v5i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 5 +; GFX10-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-NEXT: v_readlane_b32 s6, v40, 2 @@ -8605,8 +8601,8 @@ define amdgpu_gfx void @test_call_external_void_func_v5i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 5 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 6 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 5 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s8, v40, 4 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s7, v40, 3 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s6, v40, 2 @@ -8635,24 +8631,24 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 ; GFX9-NEXT: v_writelane_b32 v40, s5, 1 ; GFX9-NEXT: v_writelane_b32 v40, s6, 2 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s7, 3 ; GFX9-NEXT: v_writelane_b32 v40, s8, 4 ; GFX9-NEXT: v_writelane_b32 v40, s9, 5 ; GFX9-NEXT: v_writelane_b32 v40, s10, 6 ; GFX9-NEXT: v_writelane_b32 v40, s11, 7 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx8 s[4:11], s[34:35], 0x0 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 8 ; GFX9-NEXT: v_writelane_b32 v40, s31, 9 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 9 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dwordx8 s[4:11], s[30:31], 0x0 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v8i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 8 +; GFX9-NEXT: v_readlane_b32 s31, v40, 9 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 @@ -8678,7 +8674,6 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 10 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 @@ -8689,16 +8684,17 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s9, 5 ; GFX10-NEXT: v_writelane_b32 v40, s10, 6 ; GFX10-NEXT: v_writelane_b32 v40, s11, 7 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_load_dwordx8 s[4:11], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-NEXT: v_writelane_b32 v40, s31, 9 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 9 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_load_dwordx8 s[4:11], s[30:31], 0x0 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 @@ -8744,8 +8740,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 @@ -8796,12 +8792,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s10, 7 ; GFX9-NEXT: s_mov_b32 s11, 8 ; GFX9-NEXT: v_writelane_b32 v40, s31, 9 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 9 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v8i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 8 +; GFX9-NEXT: v_readlane_b32 s31, v40, 9 ; GFX9-NEXT: v_readlane_b32 s11, v40, 7 ; GFX9-NEXT: v_readlane_b32 s10, v40, 6 ; GFX9-NEXT: v_readlane_b32 s9, v40, 5 @@ -8829,9 +8825,6 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 10 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v8i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v8i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 ; GFX10-NEXT: s_mov_b32 s4, 1 ; GFX10-NEXT: v_writelane_b32 v40, s5, 1 @@ -8850,9 +8843,12 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX10-NEXT: s_mov_b32 s11, 8 ; GFX10-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-NEXT: v_writelane_b32 v40, s31, 9 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 9 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v8i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v8i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-NEXT: v_readlane_b32 s9, v40, 5 @@ -8903,8 +8899,8 @@ define amdgpu_gfx void @test_call_external_void_func_v8i32_imm_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 9 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 8 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 9 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s11, v40, 7 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s10, v40, 6 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s9, v40, 5 @@ -8944,24 +8940,24 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s12, 8 ; GFX9-NEXT: v_writelane_b32 v40, s13, 9 ; GFX9-NEXT: v_writelane_b32 v40, s14, 10 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s15, 11 ; GFX9-NEXT: v_writelane_b32 v40, s16, 12 ; GFX9-NEXT: v_writelane_b32 v40, s17, 13 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 16 ; GFX9-NEXT: v_writelane_b32 v40, s31, 17 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v16i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 17 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v16i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v16i32_inreg@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 16 +; GFX9-NEXT: v_readlane_b32 s31, v40, 17 ; GFX9-NEXT: v_readlane_b32 s19, v40, 15 ; GFX9-NEXT: v_readlane_b32 s18, v40, 14 ; GFX9-NEXT: v_readlane_b32 s17, v40, 13 @@ -8995,7 +8991,6 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 18 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 @@ -9014,16 +9009,17 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v16i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v16i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 16 ; GFX10-NEXT: v_writelane_b32 v40, s31, 17 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 17 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v16i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v16i32_inreg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 16 +; GFX10-NEXT: v_readlane_b32 s31, v40, 17 ; GFX10-NEXT: v_readlane_b32 s19, v40, 15 ; GFX10-NEXT: v_readlane_b32 s18, v40, 14 ; GFX10-NEXT: v_readlane_b32 s17, v40, 13 @@ -9085,8 +9081,8 @@ define amdgpu_gfx void @test_call_external_void_func_v16i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 16 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 17 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 17 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 16 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 17 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s19, v40, 15 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s18, v40, 14 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s17, v40, 13 @@ -9141,24 +9137,29 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX9-NEXT: v_writelane_b32 v40, s17, 13 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s20, 16 ; GFX9-NEXT: v_writelane_b32 v40, s21, 17 ; GFX9-NEXT: v_writelane_b32 v40, s22, 18 ; GFX9-NEXT: v_writelane_b32 v40, s23, 19 ; GFX9-NEXT: v_writelane_b32 v40, s24, 20 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 -; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s25, 21 ; GFX9-NEXT: v_writelane_b32 v40, s26, 22 ; GFX9-NEXT: v_writelane_b32 v40, s27, 23 +; GFX9-NEXT: v_writelane_b32 v40, s28, 24 +; GFX9-NEXT: v_writelane_b32 v40, s29, 25 +; GFX9-NEXT: v_writelane_b32 v40, s30, 26 +; GFX9-NEXT: v_writelane_b32 v40, s31, 27 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s28, 24 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40 +; GFX9-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_inreg@rel32@hi+12 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mov_b32_e32 v0, s46 -; GFX9-NEXT: v_writelane_b32 v40, s29, 25 ; GFX9-NEXT: v_mov_b32_e32 v1, s47 ; GFX9-NEXT: v_mov_b32_e32 v2, s48 ; GFX9-NEXT: v_mov_b32_e32 v3, s49 @@ -9167,7 +9168,6 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; GFX9-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; GFX9-NEXT: v_mov_b32_e32 v0, s50 -; GFX9-NEXT: v_writelane_b32 v40, s30, 26 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; GFX9-NEXT: v_mov_b32_e32 v0, s51 ; GFX9-NEXT: s_mov_b32 s20, s36 @@ -9180,14 +9180,10 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX9-NEXT: s_mov_b32 s27, s43 ; GFX9-NEXT: s_mov_b32 s28, s44 ; GFX9-NEXT: s_mov_b32 s29, s45 -; GFX9-NEXT: v_writelane_b32 v40, s31, 27 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 27 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 26 +; GFX9-NEXT: v_readlane_b32 s31, v40, 27 ; GFX9-NEXT: v_readlane_b32 s29, v40, 25 ; GFX9-NEXT: v_readlane_b32 s28, v40, 24 ; GFX9-NEXT: v_readlane_b32 s27, v40, 23 @@ -9231,7 +9227,6 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 28 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 @@ -9250,30 +9245,41 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15 -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_clause 0x1 -; GFX10-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 -; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_inreg@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s20, 16 ; GFX10-NEXT: v_writelane_b32 v40, s21, 17 ; GFX10-NEXT: v_writelane_b32 v40, s22, 18 +; GFX10-NEXT: v_writelane_b32 v40, s23, 19 +; GFX10-NEXT: v_writelane_b32 v40, s24, 20 +; GFX10-NEXT: v_writelane_b32 v40, s25, 21 +; GFX10-NEXT: v_writelane_b32 v40, s26, 22 +; GFX10-NEXT: v_writelane_b32 v40, s27, 23 +; GFX10-NEXT: v_writelane_b32 v40, s28, 24 +; GFX10-NEXT: v_writelane_b32 v40, s29, 25 +; GFX10-NEXT: v_writelane_b32 v40, s30, 26 +; GFX10-NEXT: v_writelane_b32 v40, s31, 27 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_clause 0x1 +; GFX10-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40 +; GFX10-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_inreg@rel32@hi+12 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, s46 -; GFX10-NEXT: v_writelane_b32 v40, s23, 19 ; GFX10-NEXT: v_mov_b32_e32 v1, s47 ; GFX10-NEXT: v_mov_b32_e32 v2, s48 ; GFX10-NEXT: v_mov_b32_e32 v3, s49 ; GFX10-NEXT: s_mov_b32 s20, s36 -; GFX10-NEXT: v_writelane_b32 v40, s24, 20 ; GFX10-NEXT: s_mov_b32 s21, s37 ; GFX10-NEXT: s_mov_b32 s22, s38 ; GFX10-NEXT: s_mov_b32 s23, s39 ; GFX10-NEXT: s_mov_b32 s24, s40 -; GFX10-NEXT: v_writelane_b32 v40, s25, 21 ; GFX10-NEXT: s_mov_b32 s25, s41 +; GFX10-NEXT: s_mov_b32 s26, s42 +; GFX10-NEXT: s_mov_b32 s27, s43 +; GFX10-NEXT: s_mov_b32 s28, s44 +; GFX10-NEXT: s_mov_b32 s29, s45 ; GFX10-NEXT: v_mov_b32_e32 v4, s50 ; GFX10-NEXT: v_mov_b32_e32 v5, s51 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 @@ -9282,19 +9288,9 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20 -; GFX10-NEXT: v_writelane_b32 v40, s26, 22 -; GFX10-NEXT: s_mov_b32 s26, s42 -; GFX10-NEXT: v_writelane_b32 v40, s27, 23 -; GFX10-NEXT: s_mov_b32 s27, s43 -; GFX10-NEXT: v_writelane_b32 v40, s28, 24 -; GFX10-NEXT: s_mov_b32 s28, s44 -; GFX10-NEXT: v_writelane_b32 v40, s29, 25 -; GFX10-NEXT: s_mov_b32 s29, s45 -; GFX10-NEXT: v_writelane_b32 v40, s30, 26 -; GFX10-NEXT: v_writelane_b32 v40, s31, 27 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 27 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 26 +; GFX10-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-NEXT: v_readlane_b32 s29, v40, 25 ; GFX10-NEXT: v_readlane_b32 s28, v40, 24 ; GFX10-NEXT: v_readlane_b32 s27, v40, 23 @@ -9397,8 +9393,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_inreg() #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s29, v40, 25 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s28, v40, 24 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s27, v40, 23 @@ -9462,39 +9458,43 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX9-NEXT: v_writelane_b32 v40, s16, 12 ; GFX9-NEXT: v_writelane_b32 v40, s17, 13 ; GFX9-NEXT: v_writelane_b32 v40, s18, 14 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s19, 15 ; GFX9-NEXT: v_writelane_b32 v40, s20, 16 ; GFX9-NEXT: v_writelane_b32 v40, s21, 17 ; GFX9-NEXT: v_writelane_b32 v40, s22, 18 ; GFX9-NEXT: v_writelane_b32 v40, s23, 19 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_load_dword s52, s[34:35], 0x0 -; GFX9-NEXT: ; kill: killed $sgpr34_sgpr35 -; GFX9-NEXT: ; kill: killed $sgpr34_sgpr35 -; GFX9-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 -; GFX9-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 ; GFX9-NEXT: v_writelane_b32 v40, s24, 20 ; GFX9-NEXT: v_writelane_b32 v40, s25, 21 +; GFX9-NEXT: v_writelane_b32 v40, s26, 22 +; GFX9-NEXT: v_writelane_b32 v40, s27, 23 +; GFX9-NEXT: v_writelane_b32 v40, s28, 24 +; GFX9-NEXT: v_writelane_b32 v40, s29, 25 +; GFX9-NEXT: v_writelane_b32 v40, s30, 26 +; GFX9-NEXT: v_writelane_b32 v40, s31, 27 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s26, 22 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, s52 -; GFX9-NEXT: v_writelane_b32 v40, s27, 23 +; GFX9-NEXT: s_load_dword s34, s[30:31], 0x0 +; GFX9-NEXT: ; kill: killed $sgpr30_sgpr31 +; GFX9-NEXT: ; kill: killed $sgpr30_sgpr31 +; GFX9-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40 +; GFX9-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s34 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32_inreg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32_inreg@rel32@hi+12 ; GFX9-NEXT: v_mov_b32_e32 v0, s46 -; GFX9-NEXT: v_writelane_b32 v40, s28, 24 ; GFX9-NEXT: v_mov_b32_e32 v1, s47 -; GFX9-NEXT: v_mov_b32_e32 v2, s48 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 -; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 +; GFX9-NEXT: v_mov_b32_e32 v0, s48 +; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; GFX9-NEXT: v_mov_b32_e32 v0, s49 -; GFX9-NEXT: v_writelane_b32 v40, s29, 25 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:12 ; GFX9-NEXT: v_mov_b32_e32 v0, s50 -; GFX9-NEXT: v_writelane_b32 v40, s30, 26 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:16 ; GFX9-NEXT: v_mov_b32_e32 v0, s51 ; GFX9-NEXT: s_mov_b32 s20, s36 @@ -9507,14 +9507,10 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX9-NEXT: s_mov_b32 s27, s43 ; GFX9-NEXT: s_mov_b32 s28, s44 ; GFX9-NEXT: s_mov_b32 s29, s45 -; GFX9-NEXT: v_writelane_b32 v40, s31, 27 ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:20 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32_inreg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32_inreg@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 27 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 26 +; GFX9-NEXT: v_readlane_b32 s31, v40, 27 ; GFX9-NEXT: v_readlane_b32 s29, v40, 25 ; GFX9-NEXT: v_readlane_b32 s28, v40, 24 ; GFX9-NEXT: v_readlane_b32 s27, v40, 23 @@ -9558,7 +9554,6 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_mov_b32 exec_lo, s34 ; GFX10-NEXT: v_writelane_b32 v40, s33, 28 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s4, 0 @@ -9577,56 +9572,57 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-NEXT: v_writelane_b32 v40, s17, 13 ; GFX10-NEXT: v_writelane_b32 v40, s18, 14 ; GFX10-NEXT: v_writelane_b32 v40, s19, 15 +; GFX10-NEXT: v_writelane_b32 v40, s20, 16 +; GFX10-NEXT: v_writelane_b32 v40, s21, 17 +; GFX10-NEXT: v_writelane_b32 v40, s22, 18 +; GFX10-NEXT: v_writelane_b32 v40, s23, 19 +; GFX10-NEXT: v_writelane_b32 v40, s24, 20 +; GFX10-NEXT: v_writelane_b32 v40, s25, 21 +; GFX10-NEXT: v_writelane_b32 v40, s26, 22 +; GFX10-NEXT: v_writelane_b32 v40, s27, 23 +; GFX10-NEXT: v_writelane_b32 v40, s28, 24 +; GFX10-NEXT: v_writelane_b32 v40, s29, 25 +; GFX10-NEXT: v_writelane_b32 v40, s30, 26 +; GFX10-NEXT: v_writelane_b32 v40, s31, 27 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_clause 0x2 -; GFX10-NEXT: s_load_dword s52, s[34:35], 0x0 +; GFX10-NEXT: s_load_dword s34, s[30:31], 0x0 ; GFX10-NEXT: ; meta instruction ; GFX10-NEXT: ; meta instruction -; GFX10-NEXT: s_load_dwordx16 s[36:51], s[34:35], 0x40 -; GFX10-NEXT: s_load_dwordx16 s[4:19], s[34:35], 0x0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_v32i32_i32_inreg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_v32i32_i32_inreg@rel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v40, s20, 16 -; GFX10-NEXT: v_writelane_b32 v40, s21, 17 -; GFX10-NEXT: v_writelane_b32 v40, s22, 18 +; GFX10-NEXT: s_load_dwordx16 s[36:51], s[30:31], 0x40 +; GFX10-NEXT: s_load_dwordx16 s[4:19], s[30:31], 0x0 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_v32i32_i32_inreg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_v32i32_i32_inreg@rel32@hi+12 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_mov_b32_e32 v0, s52 +; GFX10-NEXT: v_mov_b32_e32 v0, s34 ; GFX10-NEXT: v_mov_b32_e32 v1, s47 -; GFX10-NEXT: v_writelane_b32 v40, s23, 19 +; GFX10-NEXT: v_mov_b32_e32 v2, s48 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:24 ; GFX10-NEXT: v_mov_b32_e32 v0, s46 -; GFX10-NEXT: v_mov_b32_e32 v2, s48 ; GFX10-NEXT: v_mov_b32_e32 v3, s49 -; GFX10-NEXT: v_writelane_b32 v40, s24, 20 ; GFX10-NEXT: s_mov_b32 s20, s36 ; GFX10-NEXT: s_mov_b32 s21, s37 ; GFX10-NEXT: s_mov_b32 s22, s38 ; GFX10-NEXT: s_mov_b32 s23, s39 -; GFX10-NEXT: v_writelane_b32 v40, s25, 21 ; GFX10-NEXT: s_mov_b32 s24, s40 ; GFX10-NEXT: s_mov_b32 s25, s41 +; GFX10-NEXT: s_mov_b32 s26, s42 +; GFX10-NEXT: s_mov_b32 s27, s43 +; GFX10-NEXT: s_mov_b32 s28, s44 +; GFX10-NEXT: s_mov_b32 s29, s45 ; GFX10-NEXT: v_mov_b32_e32 v4, s50 ; GFX10-NEXT: v_mov_b32_e32 v5, s51 -; GFX10-NEXT: v_writelane_b32 v40, s26, 22 -; GFX10-NEXT: s_mov_b32 s26, s42 ; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:8 ; GFX10-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; GFX10-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:16 ; GFX10-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:20 -; GFX10-NEXT: v_writelane_b32 v40, s27, 23 -; GFX10-NEXT: s_mov_b32 s27, s43 -; GFX10-NEXT: v_writelane_b32 v40, s28, 24 -; GFX10-NEXT: s_mov_b32 s28, s44 -; GFX10-NEXT: v_writelane_b32 v40, s29, 25 -; GFX10-NEXT: s_mov_b32 s29, s45 -; GFX10-NEXT: v_writelane_b32 v40, s30, 26 -; GFX10-NEXT: v_writelane_b32 v40, s31, 27 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 27 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 26 +; GFX10-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-NEXT: v_readlane_b32 s29, v40, 25 ; GFX10-NEXT: v_readlane_b32 s28, v40, 24 ; GFX10-NEXT: v_readlane_b32 s27, v40, 23 @@ -9734,8 +9730,8 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 { ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s30, 26 ; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s31, 27 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 26 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 27 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s29, v40, 25 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s28, v40, 24 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s27, v40, 23 @@ -9791,16 +9787,16 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, stack_passed_f64_arg@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, stack_passed_f64_arg@rel32@hi+12 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, stack_passed_f64_arg@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, stack_passed_f64_arg@rel32@hi+12 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -9824,17 +9820,17 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX10-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:4 ; GFX10-NEXT: s_addk_i32 s32, 0x200 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, stack_passed_f64_arg@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, stack_passed_f64_arg@rel32@hi+12 ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, stack_passed_f64_arg@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, stack_passed_f64_arg@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -9864,8 +9860,8 @@ define amdgpu_gfx void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, d ; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0) ; GFX10-SCRATCH-NEXT: scratch_store_dwordx2 off, v[32:33], s32 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -9931,12 +9927,12 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v30, 10 ; GFX9-NEXT: v_mov_b32_e32 v31, 11 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_12xv3i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_12xv3i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_12xv3i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_12xv3i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -9998,12 +9994,12 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v30, 10 ; GFX10-NEXT: v_mov_b32_e32 v31, 11 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_12xv3i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_12xv3i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_12xv3i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_12xv3i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -10067,8 +10063,8 @@ define amdgpu_gfx void @stack_12xv3i32() #0 { ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_12xv3i32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_12xv3i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -10154,12 +10150,12 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v30, 6 ; GFX9-NEXT: v_mov_b32_e32 v31, 7 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_8xv5i32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5i32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_8xv5i32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5i32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -10229,12 +10225,12 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v30, 6 ; GFX10-NEXT: v_mov_b32_e32 v31, 7 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_8xv5i32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5i32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_8xv5i32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5i32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -10303,8 +10299,8 @@ define amdgpu_gfx void @stack_8xv5i32() #0 { ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_8xv5i32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5i32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 @@ -10386,12 +10382,12 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX9-NEXT: v_mov_b32_e32 v30, 0x40c00000 ; GFX9-NEXT: v_mov_b32_e32 v31, 0x40e00000 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_8xv5f32@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5f32@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_8xv5f32@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5f32@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -10461,12 +10457,12 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-NEXT: v_mov_b32_e32 v30, 0x40c00000 ; GFX10-NEXT: v_mov_b32_e32 v31, 0x40e00000 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_8xv5f32@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_8xv5f32@rel32@hi+12 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_8xv5f32@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_8xv5f32@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -10535,8 +10531,8 @@ define amdgpu_gfx void @stack_8xv5f32() #0 { ; GFX10-SCRATCH-NEXT: s_add_u32 s0, s0, external_void_func_8xv5f32@rel32@lo+4 ; GFX10-SCRATCH-NEXT: s_addc_u32 s1, s1, external_void_func_8xv5f32@rel32@hi+12 ; GFX10-SCRATCH-NEXT: s_swappc_b64 s[30:31], s[0:1] -; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-SCRATCH-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-SCRATCH-NEXT: s_add_i32 s32, s32, -16 ; GFX10-SCRATCH-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-SCRATCH-NEXT: s_or_saveexec_b32 s0, -1 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll index 26bc62d0a99b..fa8f51cf412f 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll @@ -25,8 +25,8 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s30, v40, 2 +; GFX9-NEXT: v_readlane_b32 s31, v40, 3 ; GFX9-NEXT: v_readlane_b32 s5, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 @@ -59,8 +59,8 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] -; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s30, v40, 2 +; GFX10-NEXT: v_readlane_b32 s31, v40, 3 ; GFX10-NEXT: v_readlane_b32 s5, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 @@ -77,54 +77,26 @@ define amdgpu_gfx void @test_call_external_void_func_void_clobber_s30_s31_call_e ret void } -define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 { -; GFX9-LABEL: void_func_void_clobber_s28_s29: +define amdgpu_gfx void @void_func_void_clobber_s30_s31() #1 { +; GFX9-LABEL: void_func_void_clobber_s30_s31: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v0, s28, 0 -; GFX9-NEXT: v_writelane_b32 v0, s29, 1 +; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; clobber ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_readlane_b32 s29, v0, 1 -; GFX9-NEXT: v_readlane_b32 s28, v0, 0 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-NEXT: s_setpc_b64 s[36:37] ; -; GFX10-LABEL: void_func_void_clobber_s28_s29: +; GFX10-LABEL: void_func_void_clobber_s30_s31: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v0, s28, 0 -; GFX10-NEXT: v_writelane_b32 v0, s29, 1 +; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; clobber ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_readlane_b32 s29, v0, 1 -; GFX10-NEXT: v_readlane_b32 s28, v0, 0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; GCN: v_writelane_b32 v0, s28, 0 -; GCN: v_writelane_b32 v0, s29, 1 - -; GCN: v_readlane_b32 s28, v0, 0 -; GCN: v_readlane_b32 s29, v0, 1 - call void asm sideeffect "; clobber", "~{s[28:29]}"() #0 +; GFX10-NEXT: s_setpc_b64 s[36:37] + call void asm sideeffect "; clobber", "~{s[30:31]}"() #0 ret void } @@ -137,24 +109,24 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1) ; GFX9-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-NEXT: v_writelane_b32 v40, s33, 3 ; GFX9-NEXT: v_writelane_b32 v40, s4, 0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: v_writelane_b32 v40, s30, 1 ; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s31 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_mov_b32 s4, s31 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: s_mov_b32 s31, s4 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s31 ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -190,8 +162,8 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_s31(i32 addrspace(1) ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s31 ; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -224,17 +196,17 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1) ; GFX9-NEXT: ; def v31 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_mov_b32_e32 v41, v31 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_mov_b32_e32 v31, v41 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use v31 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -260,18 +232,18 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(i32 addrspace(1) ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_mov_b32_e32 v41, v31 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_mov_b32_e32 v31, v41 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use v31 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -304,16 +276,16 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)* ; GFX9-NEXT: ; def s33 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_mov_b32 s4, s33 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: s_mov_b32 s33, s4 +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s33 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -334,9 +306,6 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)* ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s33 ; GFX10-NEXT: ;;#ASMEND @@ -344,13 +313,16 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s33(i32 addrspace(1)* ; GFX10-NEXT: s_mov_b32 s4, s33 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: s_mov_b32 s33, s4 +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s33 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -378,21 +350,21 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)* ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 1 +; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def s34 ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: v_writelane_b32 v40, s31, 2 ; GFX9-NEXT: s_mov_b32 s4, s34 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: s_mov_b32 s34, s4 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s34 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -413,9 +385,6 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)* ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[36:37] -; GFX10-NEXT: s_add_u32 s36, s36, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s37, s37, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s34 ; GFX10-NEXT: ;;#ASMEND @@ -423,13 +392,16 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_s34(i32 addrspace(1)* ; GFX10-NEXT: s_mov_b32 s4, s34 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[36:37] +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: s_mov_b32 s34, s4 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s34 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -461,16 +433,16 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)* ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; def v40 ; GFX9-NEXT: ;;#ASMEND -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use v40 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v41, 1 ; GFX9-NEXT: v_readlane_b32 s30, v41, 0 +; GFX9-NEXT: v_readlane_b32 s31, v41, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v41, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -495,17 +467,17 @@ define amdgpu_gfx void @test_call_void_func_void_preserves_v40(i32 addrspace(1)* ; GFX10-NEXT: ; def v40 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_writelane_b32 v41, s30, 0 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v41, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use v40 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX10-NEXT: v_readlane_b32 s31, v41, 1 ; GFX10-NEXT: v_readlane_b32 s30, v41, 0 +; GFX10-NEXT: v_readlane_b32 s31, v41, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v41, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -616,12 +588,12 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s33@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s33@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, void_func_void_clobber_s33@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, void_func_void_clobber_s33@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -641,14 +613,14 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s33() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s33@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s33@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, void_func_void_clobber_s33@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, void_func_void_clobber_s33@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -673,12 +645,12 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { ; GFX9-NEXT: s_addk_i32 s32, 0x400 ; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s34@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s34@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, void_func_void_clobber_s34@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, void_func_void_clobber_s34@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s31, v40, 1 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 2 ; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -698,14 +670,14 @@ define amdgpu_gfx void @test_call_void_func_void_clobber_s34() #0 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 2 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, void_func_void_clobber_s34@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, void_func_void_clobber_s34@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 0 ; GFX10-NEXT: v_writelane_b32 v40, s31, 1 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v40, 1 +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, void_func_void_clobber_s34@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, void_func_void_clobber_s34@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 +; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 2 ; GFX10-NEXT: s_or_saveexec_b32 s34, -1 @@ -735,15 +707,15 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX9-NEXT: ; def s40 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: s_mov_b32 s4, s40 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s4 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_readlane_b32 s31, v40, 2 -; GFX9-NEXT: v_readlane_b32 s30, v40, 1 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -764,9 +736,6 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX10-NEXT: v_writelane_b32 v40, s33, 3 ; GFX10-NEXT: s_mov_b32 s33, s32 ; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; def s40 ; GFX10-NEXT: ;;#ASMEND @@ -774,12 +743,15 @@ define amdgpu_gfx void @callee_saved_sgpr_kernel() #1 { ; GFX10-NEXT: s_mov_b32 s4, s40 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s4 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_readlane_b32 s31, v40, 2 -; GFX10-NEXT: v_readlane_b32 s30, v40, 1 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 @@ -817,10 +789,10 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX9-NEXT: ; def v32 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: v_mov_b32_e32 v41, v32 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: ;;#ASMSTART ; GFX9-NEXT: ; use s4 ; GFX9-NEXT: ;;#ASMEND @@ -828,8 +800,8 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX9-NEXT: ; use v41 ; GFX9-NEXT: ;;#ASMEND ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s30, v40, 1 +; GFX9-NEXT: v_readlane_b32 s31, v40, 2 ; GFX9-NEXT: v_readlane_b32 s4, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xfc00 ; GFX9-NEXT: v_readlane_b32 s33, v40, 3 @@ -860,12 +832,12 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX10-NEXT: ; def v32 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: v_mov_b32_e32 v41, v32 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, external_void_func_void@rel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, external_void_func_void@rel32@hi+12 ; GFX10-NEXT: v_writelane_b32 v40, s30, 1 ; GFX10-NEXT: v_writelane_b32 v40, s31, 2 -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, external_void_func_void@rel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, external_void_func_void@rel32@hi+12 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: ;;#ASMSTART ; GFX10-NEXT: ; use s4 ; GFX10-NEXT: ;;#ASMEND @@ -873,8 +845,8 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 { ; GFX10-NEXT: ; use v41 ; GFX10-NEXT: ;;#ASMEND ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload -; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s30, v40, 1 +; GFX10-NEXT: v_readlane_b32 s31, v40, 2 ; GFX10-NEXT: v_readlane_b32 s4, v40, 0 ; GFX10-NEXT: s_addk_i32 s32, 0xfe00 ; GFX10-NEXT: v_readlane_b32 s33, v40, 3 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll index 1f2243116812..1b84b0a45458 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll @@ -23,59 +23,27 @@ define amdgpu_gfx void @call_i1() #0 { ; GFX9-LABEL: call_i1: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v1, s33, 2 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, return_i1@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, return_i1@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX9-NEXT: v_writelane_b32 v1, s30, 0 -; GFX9-NEXT: v_writelane_b32 v1, s31, 1 +; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, return_i1@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, return_i1@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v1, 1 -; GFX9-NEXT: v_readlane_b32 s30, v1, 0 -; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v1, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_setpc_b64 s[36:37] ; ; GFX10-LABEL: call_i1: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v1, s33, 2 -; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, return_i1@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, return_i1@gotpcrel32@hi+12 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX10-NEXT: v_writelane_b32 v1, s30, 0 -; GFX10-NEXT: v_writelane_b32 v1, s31, 1 +; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, return_i1@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, return_i1@gotpcrel32@hi+12 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v1, 1 -; GFX10-NEXT: v_readlane_b32 s30, v1, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v1, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_setpc_b64 s[36:37] entry: call amdgpu_gfx i1 @return_i1() ret void @@ -102,59 +70,27 @@ define amdgpu_gfx void @call_i16() #0 { ; GFX9-LABEL: call_i16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v1, s33, 2 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, return_i16@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, return_i16@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX9-NEXT: v_writelane_b32 v1, s30, 0 -; GFX9-NEXT: v_writelane_b32 v1, s31, 1 +; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, return_i16@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, return_i16@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v1, 1 -; GFX9-NEXT: v_readlane_b32 s30, v1, 0 -; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v1, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_setpc_b64 s[36:37] ; ; GFX10-LABEL: call_i16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v1, s33, 2 -; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, return_i16@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, return_i16@gotpcrel32@hi+12 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX10-NEXT: v_writelane_b32 v1, s30, 0 -; GFX10-NEXT: v_writelane_b32 v1, s31, 1 +; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, return_i16@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, return_i16@gotpcrel32@hi+12 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v1, 1 -; GFX10-NEXT: v_readlane_b32 s30, v1, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v1, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_setpc_b64 s[36:37] entry: call amdgpu_gfx i16 @return_i16() ret void @@ -181,59 +117,27 @@ define amdgpu_gfx void @call_2xi16() #0 { ; GFX9-LABEL: call_2xi16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v1, s33, 2 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, return_2xi16@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, return_2xi16@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX9-NEXT: v_writelane_b32 v1, s30, 0 -; GFX9-NEXT: v_writelane_b32 v1, s31, 1 +; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, return_2xi16@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, return_2xi16@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v1, 1 -; GFX9-NEXT: v_readlane_b32 s30, v1, 0 -; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v1, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_setpc_b64 s[36:37] ; ; GFX10-LABEL: call_2xi16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v1, s33, 2 -; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, return_2xi16@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, return_2xi16@gotpcrel32@hi+12 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX10-NEXT: v_writelane_b32 v1, s30, 0 -; GFX10-NEXT: v_writelane_b32 v1, s31, 1 +; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, return_2xi16@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, return_2xi16@gotpcrel32@hi+12 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v1, 1 -; GFX10-NEXT: v_readlane_b32 s30, v1, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v1, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_setpc_b64 s[36:37] entry: call amdgpu_gfx <2 x i16> @return_2xi16() ret void @@ -262,59 +166,27 @@ define amdgpu_gfx void @call_3xi16() #0 { ; GFX9-LABEL: call_3xi16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v2, s33, 2 -; GFX9-NEXT: s_mov_b32 s33, s32 -; GFX9-NEXT: s_addk_i32 s32, 0x400 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, return_3xi16@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, return_3xi16@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX9-NEXT: v_writelane_b32 v2, s30, 0 -; GFX9-NEXT: v_writelane_b32 v2, s31, 1 +; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, return_3xi16@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, return_3xi16@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v2, 1 -; GFX9-NEXT: v_readlane_b32 s30, v2, 0 -; GFX9-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-NEXT: v_readlane_b32 s33, v2, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-NEXT: s_setpc_b64 s[36:37] ; ; GFX10-LABEL: call_3xi16: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v2, s33, 2 -; GFX10-NEXT: s_mov_b32 s33, s32 -; GFX10-NEXT: s_addk_i32 s32, 0x200 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, return_3xi16@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, return_3xi16@gotpcrel32@hi+12 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX10-NEXT: v_writelane_b32 v2, s30, 0 -; GFX10-NEXT: v_writelane_b32 v2, s31, 1 +; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, return_3xi16@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, return_3xi16@gotpcrel32@hi+12 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v2, 1 -; GFX10-NEXT: v_readlane_b32 s30, v2, 0 -; GFX10-NEXT: s_addk_i32 s32, 0xfe00 -; GFX10-NEXT: v_readlane_b32 s33, v2, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX10-NEXT: s_setpc_b64 s[36:37] entry: call amdgpu_gfx <3 x i16> @return_3xi16() ret void @@ -1369,63 +1241,41 @@ define amdgpu_gfx void @call_512xi32() #0 { ; GFX9-LABEL: call_512xi32: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Spill -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: v_writelane_b32 v2, s33, 2 +; GFX9-NEXT: s_mov_b32 s34, s33 ; GFX9-NEXT: s_add_i32 s33, s32, 0x1ffc0 ; GFX9-NEXT: s_and_b32 s33, s33, 0xfffe0000 ; GFX9-NEXT: s_add_i32 s32, s32, 0x60000 -; GFX9-NEXT: s_getpc_b64 s[34:35] -; GFX9-NEXT: s_add_u32 s34, s34, return_512xi32@gotpcrel32@lo+4 -; GFX9-NEXT: s_addc_u32 s35, s35, return_512xi32@gotpcrel32@hi+12 -; GFX9-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 -; GFX9-NEXT: v_writelane_b32 v2, s30, 0 +; GFX9-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX9-NEXT: s_getpc_b64 s[30:31] +; GFX9-NEXT: s_add_u32 s30, s30, return_512xi32@gotpcrel32@lo+4 +; GFX9-NEXT: s_addc_u32 s31, s31, return_512xi32@gotpcrel32@hi+12 +; GFX9-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-NEXT: v_lshrrev_b32_e64 v0, 6, s33 -; GFX9-NEXT: v_writelane_b32 v2, s31, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX9-NEXT: v_readlane_b32 s31, v2, 1 -; GFX9-NEXT: v_readlane_b32 s30, v2, 0 +; GFX9-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-NEXT: s_add_i32 s32, s32, 0xfffa0000 -; GFX9-NEXT: v_readlane_b32 s33, v2, 2 -; GFX9-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-NEXT: s_mov_b32 s33, s34 +; GFX9-NEXT: s_setpc_b64 s[36:37] ; ; GFX10-LABEL: call_512xi32: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Spill -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: v_writelane_b32 v2, s33, 2 +; GFX10-NEXT: s_mov_b32 s34, s33 ; GFX10-NEXT: s_add_i32 s33, s32, 0xffe0 ; GFX10-NEXT: s_add_i32 s32, s32, 0x30000 ; GFX10-NEXT: s_and_b32 s33, s33, 0xffff0000 -; GFX10-NEXT: s_getpc_b64 s[34:35] -; GFX10-NEXT: s_add_u32 s34, s34, return_512xi32@gotpcrel32@lo+4 -; GFX10-NEXT: s_addc_u32 s35, s35, return_512xi32@gotpcrel32@hi+12 -; GFX10-NEXT: v_writelane_b32 v2, s30, 0 -; GFX10-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX10-NEXT: s_mov_b64 s[36:37], s[30:31] +; GFX10-NEXT: s_getpc_b64 s[30:31] +; GFX10-NEXT: s_add_u32 s30, s30, return_512xi32@gotpcrel32@lo+4 +; GFX10-NEXT: s_addc_u32 s31, s31, return_512xi32@gotpcrel32@hi+12 ; GFX10-NEXT: v_lshrrev_b32_e64 v0, 5, s33 -; GFX10-NEXT: v_writelane_b32 v2, s31, 1 +; GFX10-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GFX10-NEXT: v_readlane_b32 s31, v2, 1 -; GFX10-NEXT: v_readlane_b32 s30, v2, 0 +; GFX10-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX10-NEXT: s_add_i32 s32, s32, 0xfffd0000 -; GFX10-NEXT: v_readlane_b32 s33, v2, 2 -; GFX10-NEXT: s_or_saveexec_b32 s34, -1 -; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:2048 ; 4-byte Folded Reload -; GFX10-NEXT: s_waitcnt_depctr 0xffe3 -; GFX10-NEXT: s_mov_b32 exec_lo, s34 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: s_setpc_b64 s[30:31] +; GFX10-NEXT: s_mov_b32 s33, s34 +; GFX10-NEXT: s_setpc_b64 s[36:37] entry: call amdgpu_gfx <512 x i32> @return_512xi32() ret void diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call.ll b/llvm/test/CodeGen/AMDGPU/indirect-call.ll index a1a6a805f367..a47606e14745 100644 --- a/llvm/test/CodeGen/AMDGPU/indirect-call.ll +++ b/llvm/test/CodeGen/AMDGPU/indirect-call.ll @@ -399,23 +399,23 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) { ; GCN-NEXT: v_writelane_b32 v40, s33, 17 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s46, 13 -; GCN-NEXT: v_writelane_b32 v40, s47, 14 -; GCN-NEXT: v_writelane_b32 v40, s48, 15 -; GCN-NEXT: v_writelane_b32 v40, s49, 16 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s36, 2 +; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s40, 6 +; GCN-NEXT: v_writelane_b32 v40, s41, 7 +; GCN-NEXT: v_writelane_b32 v40, s42, 8 +; GCN-NEXT: v_writelane_b32 v40, s43, 9 +; GCN-NEXT: v_writelane_b32 v40, s44, 10 +; GCN-NEXT: v_writelane_b32 v40, s46, 11 +; GCN-NEXT: v_writelane_b32 v40, s47, 12 +; GCN-NEXT: v_writelane_b32 v40, s48, 13 +; GCN-NEXT: v_writelane_b32 v40, s49, 14 +; GCN-NEXT: v_writelane_b32 v40, s30, 15 +; GCN-NEXT: v_writelane_b32 v40, s31, 16 ; GCN-NEXT: s_mov_b32 s42, s14 ; GCN-NEXT: s_mov_b32 s43, s13 ; GCN-NEXT: s_mov_b32 s44, s12 @@ -443,30 +443,30 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) { ; GCN-NEXT: s_cbranch_execnz .LBB2_1 ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[46:47] -; GCN-NEXT: v_readlane_b32 s49, v40, 16 -; GCN-NEXT: v_readlane_b32 s48, v40, 15 -; GCN-NEXT: v_readlane_b32 s47, v40, 14 -; GCN-NEXT: v_readlane_b32 s46, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s4, v40, 15 +; GCN-NEXT: v_readlane_b32 s5, v40, 16 +; GCN-NEXT: v_readlane_b32 s49, v40, 14 +; GCN-NEXT: v_readlane_b32 s48, v40, 13 +; GCN-NEXT: v_readlane_b32 s47, v40, 12 +; GCN-NEXT: v_readlane_b32 s46, v40, 11 +; GCN-NEXT: v_readlane_b32 s44, v40, 10 +; GCN-NEXT: v_readlane_b32 s43, v40, 9 +; GCN-NEXT: v_readlane_b32 s42, v40, 8 +; GCN-NEXT: v_readlane_b32 s41, v40, 7 +; GCN-NEXT: v_readlane_b32 s40, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 +; GCN-NEXT: v_readlane_b32 s37, v40, 3 +; GCN-NEXT: v_readlane_b32 s36, v40, 2 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 17 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN-NEXT: s_setpc_b64 s[4:5] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr: ; GISEL: ; %bb.0: @@ -477,23 +477,21 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) { ; GISEL-NEXT: v_writelane_b32 v40, s33, 17 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s46, 13 -; GISEL-NEXT: v_writelane_b32 v40, s47, 14 -; GISEL-NEXT: v_writelane_b32 v40, s48, 15 -; GISEL-NEXT: v_writelane_b32 v40, s49, 16 +; GISEL-NEXT: v_writelane_b32 v40, s34, 0 +; GISEL-NEXT: v_writelane_b32 v40, s35, 1 +; GISEL-NEXT: v_writelane_b32 v40, s36, 2 +; GISEL-NEXT: v_writelane_b32 v40, s37, 3 +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s40, 6 +; GISEL-NEXT: v_writelane_b32 v40, s41, 7 +; GISEL-NEXT: v_writelane_b32 v40, s42, 8 +; GISEL-NEXT: v_writelane_b32 v40, s43, 9 +; GISEL-NEXT: v_writelane_b32 v40, s44, 10 +; GISEL-NEXT: v_writelane_b32 v40, s46, 11 +; GISEL-NEXT: v_writelane_b32 v40, s47, 12 +; GISEL-NEXT: v_writelane_b32 v40, s48, 13 +; GISEL-NEXT: v_writelane_b32 v40, s49, 14 ; GISEL-NEXT: s_mov_b32 s42, s14 ; GISEL-NEXT: s_mov_b32 s43, s13 ; GISEL-NEXT: s_mov_b32 s44, s12 @@ -501,6 +499,8 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) { ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] +; GISEL-NEXT: v_writelane_b32 v40, s30, 15 +; GISEL-NEXT: v_writelane_b32 v40, s31, 16 ; GISEL-NEXT: s_mov_b64 s[46:47], exec ; GISEL-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0 @@ -521,30 +521,30 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) { ; GISEL-NEXT: s_cbranch_execnz .LBB2_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[46:47] -; GISEL-NEXT: v_readlane_b32 s49, v40, 16 -; GISEL-NEXT: v_readlane_b32 s48, v40, 15 -; GISEL-NEXT: v_readlane_b32 s47, v40, 14 -; GISEL-NEXT: v_readlane_b32 s46, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: v_readlane_b32 s4, v40, 15 +; GISEL-NEXT: v_readlane_b32 s5, v40, 16 +; GISEL-NEXT: v_readlane_b32 s49, v40, 14 +; GISEL-NEXT: v_readlane_b32 s48, v40, 13 +; GISEL-NEXT: v_readlane_b32 s47, v40, 12 +; GISEL-NEXT: v_readlane_b32 s46, v40, 11 +; GISEL-NEXT: v_readlane_b32 s44, v40, 10 +; GISEL-NEXT: v_readlane_b32 s43, v40, 9 +; GISEL-NEXT: v_readlane_b32 s42, v40, 8 +; GISEL-NEXT: v_readlane_b32 s41, v40, 7 +; GISEL-NEXT: v_readlane_b32 s40, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 +; GISEL-NEXT: v_readlane_b32 s37, v40, 3 +; GISEL-NEXT: v_readlane_b32 s36, v40, 2 +; GISEL-NEXT: v_readlane_b32 s35, v40, 1 +; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 ; GISEL-NEXT: v_readlane_b32 s33, v40, 17 -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[4:5] +; GISEL-NEXT: s_mov_b64 exec, s[6:7] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL-NEXT: s_setpc_b64 s[4:5] call void %fptr() ret void } @@ -559,23 +559,23 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) { ; GCN-NEXT: v_writelane_b32 v40, s33, 17 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s46, 13 -; GCN-NEXT: v_writelane_b32 v40, s47, 14 -; GCN-NEXT: v_writelane_b32 v40, s48, 15 -; GCN-NEXT: v_writelane_b32 v40, s49, 16 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s36, 2 +; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s40, 6 +; GCN-NEXT: v_writelane_b32 v40, s41, 7 +; GCN-NEXT: v_writelane_b32 v40, s42, 8 +; GCN-NEXT: v_writelane_b32 v40, s43, 9 +; GCN-NEXT: v_writelane_b32 v40, s44, 10 +; GCN-NEXT: v_writelane_b32 v40, s46, 11 +; GCN-NEXT: v_writelane_b32 v40, s47, 12 +; GCN-NEXT: v_writelane_b32 v40, s48, 13 +; GCN-NEXT: v_writelane_b32 v40, s49, 14 +; GCN-NEXT: v_writelane_b32 v40, s30, 15 +; GCN-NEXT: v_writelane_b32 v40, s31, 16 ; GCN-NEXT: s_mov_b32 s42, s14 ; GCN-NEXT: s_mov_b32 s43, s13 ; GCN-NEXT: s_mov_b32 s44, s12 @@ -606,30 +606,30 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) { ; GCN-NEXT: s_cbranch_execnz .LBB3_1 ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[46:47] -; GCN-NEXT: v_readlane_b32 s49, v40, 16 -; GCN-NEXT: v_readlane_b32 s48, v40, 15 -; GCN-NEXT: v_readlane_b32 s47, v40, 14 -; GCN-NEXT: v_readlane_b32 s46, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s4, v40, 15 +; GCN-NEXT: v_readlane_b32 s5, v40, 16 +; GCN-NEXT: v_readlane_b32 s49, v40, 14 +; GCN-NEXT: v_readlane_b32 s48, v40, 13 +; GCN-NEXT: v_readlane_b32 s47, v40, 12 +; GCN-NEXT: v_readlane_b32 s46, v40, 11 +; GCN-NEXT: v_readlane_b32 s44, v40, 10 +; GCN-NEXT: v_readlane_b32 s43, v40, 9 +; GCN-NEXT: v_readlane_b32 s42, v40, 8 +; GCN-NEXT: v_readlane_b32 s41, v40, 7 +; GCN-NEXT: v_readlane_b32 s40, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 +; GCN-NEXT: v_readlane_b32 s37, v40, 3 +; GCN-NEXT: v_readlane_b32 s36, v40, 2 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 17 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN-NEXT: s_setpc_b64 s[4:5] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg: ; GISEL: ; %bb.0: @@ -640,23 +640,21 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) { ; GISEL-NEXT: v_writelane_b32 v40, s33, 17 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s46, 13 -; GISEL-NEXT: v_writelane_b32 v40, s47, 14 -; GISEL-NEXT: v_writelane_b32 v40, s48, 15 -; GISEL-NEXT: v_writelane_b32 v40, s49, 16 +; GISEL-NEXT: v_writelane_b32 v40, s34, 0 +; GISEL-NEXT: v_writelane_b32 v40, s35, 1 +; GISEL-NEXT: v_writelane_b32 v40, s36, 2 +; GISEL-NEXT: v_writelane_b32 v40, s37, 3 +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s40, 6 +; GISEL-NEXT: v_writelane_b32 v40, s41, 7 +; GISEL-NEXT: v_writelane_b32 v40, s42, 8 +; GISEL-NEXT: v_writelane_b32 v40, s43, 9 +; GISEL-NEXT: v_writelane_b32 v40, s44, 10 +; GISEL-NEXT: v_writelane_b32 v40, s46, 11 +; GISEL-NEXT: v_writelane_b32 v40, s47, 12 +; GISEL-NEXT: v_writelane_b32 v40, s48, 13 +; GISEL-NEXT: v_writelane_b32 v40, s49, 14 ; GISEL-NEXT: s_mov_b32 s42, s14 ; GISEL-NEXT: s_mov_b32 s43, s13 ; GISEL-NEXT: s_mov_b32 s44, s12 @@ -664,6 +662,8 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) { ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] +; GISEL-NEXT: v_writelane_b32 v40, s30, 15 +; GISEL-NEXT: v_writelane_b32 v40, s31, 16 ; GISEL-NEXT: s_mov_b64 s[46:47], exec ; GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0 @@ -685,30 +685,30 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) { ; GISEL-NEXT: s_cbranch_execnz .LBB3_1 ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[46:47] -; GISEL-NEXT: v_readlane_b32 s49, v40, 16 -; GISEL-NEXT: v_readlane_b32 s48, v40, 15 -; GISEL-NEXT: v_readlane_b32 s47, v40, 14 -; GISEL-NEXT: v_readlane_b32 s46, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: v_readlane_b32 s4, v40, 15 +; GISEL-NEXT: v_readlane_b32 s5, v40, 16 +; GISEL-NEXT: v_readlane_b32 s49, v40, 14 +; GISEL-NEXT: v_readlane_b32 s48, v40, 13 +; GISEL-NEXT: v_readlane_b32 s47, v40, 12 +; GISEL-NEXT: v_readlane_b32 s46, v40, 11 +; GISEL-NEXT: v_readlane_b32 s44, v40, 10 +; GISEL-NEXT: v_readlane_b32 s43, v40, 9 +; GISEL-NEXT: v_readlane_b32 s42, v40, 8 +; GISEL-NEXT: v_readlane_b32 s41, v40, 7 +; GISEL-NEXT: v_readlane_b32 s40, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 +; GISEL-NEXT: v_readlane_b32 s37, v40, 3 +; GISEL-NEXT: v_readlane_b32 s36, v40, 2 +; GISEL-NEXT: v_readlane_b32 s35, v40, 1 +; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 ; GISEL-NEXT: v_readlane_b32 s33, v40, 17 -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[4:5] +; GISEL-NEXT: s_mov_b64 exec, s[6:7] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL-NEXT: s_setpc_b64 s[4:5] call void %fptr(i32 123) ret void } @@ -723,23 +723,23 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) { ; GCN-NEXT: v_writelane_b32 v40, s33, 17 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s46, 13 -; GCN-NEXT: v_writelane_b32 v40, s47, 14 -; GCN-NEXT: v_writelane_b32 v40, s48, 15 -; GCN-NEXT: v_writelane_b32 v40, s49, 16 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s36, 2 +; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s40, 6 +; GCN-NEXT: v_writelane_b32 v40, s41, 7 +; GCN-NEXT: v_writelane_b32 v40, s42, 8 +; GCN-NEXT: v_writelane_b32 v40, s43, 9 +; GCN-NEXT: v_writelane_b32 v40, s44, 10 +; GCN-NEXT: v_writelane_b32 v40, s46, 11 +; GCN-NEXT: v_writelane_b32 v40, s47, 12 +; GCN-NEXT: v_writelane_b32 v40, s48, 13 +; GCN-NEXT: v_writelane_b32 v40, s49, 14 +; GCN-NEXT: v_writelane_b32 v40, s30, 15 +; GCN-NEXT: v_writelane_b32 v40, s31, 16 ; GCN-NEXT: s_mov_b32 s42, s14 ; GCN-NEXT: s_mov_b32 s43, s13 ; GCN-NEXT: s_mov_b32 s44, s12 @@ -769,30 +769,30 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) { ; GCN-NEXT: ; %bb.2: ; GCN-NEXT: s_mov_b64 exec, s[46:47] ; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v2 -; GCN-NEXT: v_readlane_b32 s49, v40, 16 -; GCN-NEXT: v_readlane_b32 s48, v40, 15 -; GCN-NEXT: v_readlane_b32 s47, v40, 14 -; GCN-NEXT: v_readlane_b32 s46, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s4, v40, 15 +; GCN-NEXT: v_readlane_b32 s5, v40, 16 +; GCN-NEXT: v_readlane_b32 s49, v40, 14 +; GCN-NEXT: v_readlane_b32 s48, v40, 13 +; GCN-NEXT: v_readlane_b32 s47, v40, 12 +; GCN-NEXT: v_readlane_b32 s46, v40, 11 +; GCN-NEXT: v_readlane_b32 s44, v40, 10 +; GCN-NEXT: v_readlane_b32 s43, v40, 9 +; GCN-NEXT: v_readlane_b32 s42, v40, 8 +; GCN-NEXT: v_readlane_b32 s41, v40, 7 +; GCN-NEXT: v_readlane_b32 s40, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 +; GCN-NEXT: v_readlane_b32 s37, v40, 3 +; GCN-NEXT: v_readlane_b32 s36, v40, 2 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 17 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN-NEXT: s_setpc_b64 s[4:5] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_ret: ; GISEL: ; %bb.0: @@ -803,23 +803,21 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) { ; GISEL-NEXT: v_writelane_b32 v40, s33, 17 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s46, 13 -; GISEL-NEXT: v_writelane_b32 v40, s47, 14 -; GISEL-NEXT: v_writelane_b32 v40, s48, 15 -; GISEL-NEXT: v_writelane_b32 v40, s49, 16 +; GISEL-NEXT: v_writelane_b32 v40, s34, 0 +; GISEL-NEXT: v_writelane_b32 v40, s35, 1 +; GISEL-NEXT: v_writelane_b32 v40, s36, 2 +; GISEL-NEXT: v_writelane_b32 v40, s37, 3 +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s40, 6 +; GISEL-NEXT: v_writelane_b32 v40, s41, 7 +; GISEL-NEXT: v_writelane_b32 v40, s42, 8 +; GISEL-NEXT: v_writelane_b32 v40, s43, 9 +; GISEL-NEXT: v_writelane_b32 v40, s44, 10 +; GISEL-NEXT: v_writelane_b32 v40, s46, 11 +; GISEL-NEXT: v_writelane_b32 v40, s47, 12 +; GISEL-NEXT: v_writelane_b32 v40, s48, 13 +; GISEL-NEXT: v_writelane_b32 v40, s49, 14 ; GISEL-NEXT: s_mov_b32 s42, s14 ; GISEL-NEXT: s_mov_b32 s43, s13 ; GISEL-NEXT: s_mov_b32 s44, s12 @@ -827,6 +825,8 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) { ; GISEL-NEXT: s_mov_b64 s[36:37], s[8:9] ; GISEL-NEXT: s_mov_b64 s[38:39], s[6:7] ; GISEL-NEXT: s_mov_b64 s[40:41], s[4:5] +; GISEL-NEXT: v_writelane_b32 v40, s30, 15 +; GISEL-NEXT: v_writelane_b32 v40, s31, 16 ; GISEL-NEXT: s_mov_b64 s[46:47], exec ; GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0 @@ -849,30 +849,30 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) { ; GISEL-NEXT: ; %bb.2: ; GISEL-NEXT: s_mov_b64 exec, s[46:47] ; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v2 -; GISEL-NEXT: v_readlane_b32 s49, v40, 16 -; GISEL-NEXT: v_readlane_b32 s48, v40, 15 -; GISEL-NEXT: v_readlane_b32 s47, v40, 14 -; GISEL-NEXT: v_readlane_b32 s46, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: v_readlane_b32 s4, v40, 15 +; GISEL-NEXT: v_readlane_b32 s5, v40, 16 +; GISEL-NEXT: v_readlane_b32 s49, v40, 14 +; GISEL-NEXT: v_readlane_b32 s48, v40, 13 +; GISEL-NEXT: v_readlane_b32 s47, v40, 12 +; GISEL-NEXT: v_readlane_b32 s46, v40, 11 +; GISEL-NEXT: v_readlane_b32 s44, v40, 10 +; GISEL-NEXT: v_readlane_b32 s43, v40, 9 +; GISEL-NEXT: v_readlane_b32 s42, v40, 8 +; GISEL-NEXT: v_readlane_b32 s41, v40, 7 +; GISEL-NEXT: v_readlane_b32 s40, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 +; GISEL-NEXT: v_readlane_b32 s37, v40, 3 +; GISEL-NEXT: v_readlane_b32 s36, v40, 2 +; GISEL-NEXT: v_readlane_b32 s35, v40, 1 +; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 ; GISEL-NEXT: v_readlane_b32 s33, v40, 17 -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[4:5] +; GISEL-NEXT: s_mov_b64 exec, s[6:7] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL-NEXT: s_setpc_b64 s[4:5] %a = call i32 %fptr() %b = add i32 %a, 1 ret i32 %b @@ -888,25 +888,23 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) { ; GCN-NEXT: v_writelane_b32 v40, s33, 19 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s46, 13 -; GCN-NEXT: v_writelane_b32 v40, s47, 14 -; GCN-NEXT: v_writelane_b32 v40, s48, 15 -; GCN-NEXT: v_writelane_b32 v40, s49, 16 -; GCN-NEXT: v_writelane_b32 v40, s50, 17 -; GCN-NEXT: v_writelane_b32 v40, s51, 18 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s36, 2 +; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s40, 6 +; GCN-NEXT: v_writelane_b32 v40, s41, 7 +; GCN-NEXT: v_writelane_b32 v40, s42, 8 +; GCN-NEXT: v_writelane_b32 v40, s43, 9 +; GCN-NEXT: v_writelane_b32 v40, s44, 10 +; GCN-NEXT: v_writelane_b32 v40, s46, 11 +; GCN-NEXT: v_writelane_b32 v40, s47, 12 +; GCN-NEXT: v_writelane_b32 v40, s48, 13 +; GCN-NEXT: v_writelane_b32 v40, s49, 14 +; GCN-NEXT: v_writelane_b32 v40, s50, 15 +; GCN-NEXT: v_writelane_b32 v40, s51, 16 ; GCN-NEXT: s_mov_b32 s42, s14 ; GCN-NEXT: s_mov_b32 s43, s13 ; GCN-NEXT: s_mov_b32 s44, s12 @@ -919,6 +917,8 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) { ; GCN-NEXT: s_and_saveexec_b64 s[46:47], vcc ; GCN-NEXT: s_cbranch_execz .LBB5_4 ; GCN-NEXT: ; %bb.1: ; %bb1 +; GCN-NEXT: v_writelane_b32 v40, s30, 17 +; GCN-NEXT: v_writelane_b32 v40, s31, 18 ; GCN-NEXT: s_mov_b64 s[48:49], exec ; GCN-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 ; GCN-NEXT: v_readfirstlane_b32 s16, v0 @@ -939,27 +939,27 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) { ; GCN-NEXT: s_cbranch_execnz .LBB5_2 ; GCN-NEXT: ; %bb.3: ; GCN-NEXT: s_mov_b64 exec, s[48:49] +; GCN-NEXT: v_readlane_b32 s30, v40, 17 +; GCN-NEXT: v_readlane_b32 s31, v40, 18 ; GCN-NEXT: .LBB5_4: ; %bb2 ; GCN-NEXT: s_or_b64 exec, exec, s[46:47] -; GCN-NEXT: v_readlane_b32 s51, v40, 18 -; GCN-NEXT: v_readlane_b32 s50, v40, 17 -; GCN-NEXT: v_readlane_b32 s49, v40, 16 -; GCN-NEXT: v_readlane_b32 s48, v40, 15 -; GCN-NEXT: v_readlane_b32 s47, v40, 14 -; GCN-NEXT: v_readlane_b32 s46, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s51, v40, 16 +; GCN-NEXT: v_readlane_b32 s50, v40, 15 +; GCN-NEXT: v_readlane_b32 s49, v40, 14 +; GCN-NEXT: v_readlane_b32 s48, v40, 13 +; GCN-NEXT: v_readlane_b32 s47, v40, 12 +; GCN-NEXT: v_readlane_b32 s46, v40, 11 +; GCN-NEXT: v_readlane_b32 s44, v40, 10 +; GCN-NEXT: v_readlane_b32 s43, v40, 9 +; GCN-NEXT: v_readlane_b32 s42, v40, 8 +; GCN-NEXT: v_readlane_b32 s41, v40, 7 +; GCN-NEXT: v_readlane_b32 s40, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 +; GCN-NEXT: v_readlane_b32 s37, v40, 3 +; GCN-NEXT: v_readlane_b32 s36, v40, 2 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 19 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 @@ -977,25 +977,23 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) { ; GISEL-NEXT: v_writelane_b32 v40, s33, 19 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s46, 13 -; GISEL-NEXT: v_writelane_b32 v40, s47, 14 -; GISEL-NEXT: v_writelane_b32 v40, s48, 15 -; GISEL-NEXT: v_writelane_b32 v40, s49, 16 -; GISEL-NEXT: v_writelane_b32 v40, s50, 17 -; GISEL-NEXT: v_writelane_b32 v40, s51, 18 +; GISEL-NEXT: v_writelane_b32 v40, s34, 0 +; GISEL-NEXT: v_writelane_b32 v40, s35, 1 +; GISEL-NEXT: v_writelane_b32 v40, s36, 2 +; GISEL-NEXT: v_writelane_b32 v40, s37, 3 +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s40, 6 +; GISEL-NEXT: v_writelane_b32 v40, s41, 7 +; GISEL-NEXT: v_writelane_b32 v40, s42, 8 +; GISEL-NEXT: v_writelane_b32 v40, s43, 9 +; GISEL-NEXT: v_writelane_b32 v40, s44, 10 +; GISEL-NEXT: v_writelane_b32 v40, s46, 11 +; GISEL-NEXT: v_writelane_b32 v40, s47, 12 +; GISEL-NEXT: v_writelane_b32 v40, s48, 13 +; GISEL-NEXT: v_writelane_b32 v40, s49, 14 +; GISEL-NEXT: v_writelane_b32 v40, s50, 15 +; GISEL-NEXT: v_writelane_b32 v40, s51, 16 ; GISEL-NEXT: s_mov_b32 s42, s14 ; GISEL-NEXT: s_mov_b32 s43, s13 ; GISEL-NEXT: s_mov_b32 s44, s12 @@ -1008,6 +1006,8 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) { ; GISEL-NEXT: s_and_saveexec_b64 s[46:47], vcc ; GISEL-NEXT: s_cbranch_execz .LBB5_4 ; GISEL-NEXT: ; %bb.1: ; %bb1 +; GISEL-NEXT: v_writelane_b32 v40, s30, 17 +; GISEL-NEXT: v_writelane_b32 v40, s31, 18 ; GISEL-NEXT: s_mov_b64 s[48:49], exec ; GISEL-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1 ; GISEL-NEXT: v_readfirstlane_b32 s16, v0 @@ -1028,27 +1028,27 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) { ; GISEL-NEXT: s_cbranch_execnz .LBB5_2 ; GISEL-NEXT: ; %bb.3: ; GISEL-NEXT: s_mov_b64 exec, s[48:49] +; GISEL-NEXT: v_readlane_b32 s30, v40, 17 +; GISEL-NEXT: v_readlane_b32 s31, v40, 18 ; GISEL-NEXT: .LBB5_4: ; %bb2 ; GISEL-NEXT: s_or_b64 exec, exec, s[46:47] -; GISEL-NEXT: v_readlane_b32 s51, v40, 18 -; GISEL-NEXT: v_readlane_b32 s50, v40, 17 -; GISEL-NEXT: v_readlane_b32 s49, v40, 16 -; GISEL-NEXT: v_readlane_b32 s48, v40, 15 -; GISEL-NEXT: v_readlane_b32 s47, v40, 14 -; GISEL-NEXT: v_readlane_b32 s46, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: v_readlane_b32 s51, v40, 16 +; GISEL-NEXT: v_readlane_b32 s50, v40, 15 +; GISEL-NEXT: v_readlane_b32 s49, v40, 14 +; GISEL-NEXT: v_readlane_b32 s48, v40, 13 +; GISEL-NEXT: v_readlane_b32 s47, v40, 12 +; GISEL-NEXT: v_readlane_b32 s46, v40, 11 +; GISEL-NEXT: v_readlane_b32 s44, v40, 10 +; GISEL-NEXT: v_readlane_b32 s43, v40, 9 +; GISEL-NEXT: v_readlane_b32 s42, v40, 8 +; GISEL-NEXT: v_readlane_b32 s41, v40, 7 +; GISEL-NEXT: v_readlane_b32 s40, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 +; GISEL-NEXT: v_readlane_b32 s37, v40, 3 +; GISEL-NEXT: v_readlane_b32 s36, v40, 2 +; GISEL-NEXT: v_readlane_b32 s35, v40, 1 +; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 ; GISEL-NEXT: v_readlane_b32 s33, v40, 19 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 @@ -1074,93 +1074,90 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) { ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 32 +; GCN-NEXT: v_writelane_b32 v40, s33, 30 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: v_writelane_b32 v40, s50, 18 -; GCN-NEXT: v_writelane_b32 v40, s51, 19 -; GCN-NEXT: v_writelane_b32 v40, s52, 20 -; GCN-NEXT: v_writelane_b32 v40, s53, 21 -; GCN-NEXT: v_writelane_b32 v40, s54, 22 -; GCN-NEXT: v_writelane_b32 v40, s55, 23 -; GCN-NEXT: v_writelane_b32 v40, s56, 24 -; GCN-NEXT: v_writelane_b32 v40, s57, 25 -; GCN-NEXT: v_writelane_b32 v40, s58, 26 -; GCN-NEXT: v_writelane_b32 v40, s59, 27 -; GCN-NEXT: v_writelane_b32 v40, s60, 28 -; GCN-NEXT: v_writelane_b32 v40, s61, 29 -; GCN-NEXT: v_writelane_b32 v40, s62, 30 -; GCN-NEXT: v_writelane_b32 v40, s63, 31 -; GCN-NEXT: s_mov_b64 s[6:7], exec +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s36, 2 +; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s40, 6 +; GCN-NEXT: v_writelane_b32 v40, s41, 7 +; GCN-NEXT: v_writelane_b32 v40, s42, 8 +; GCN-NEXT: v_writelane_b32 v40, s43, 9 +; GCN-NEXT: v_writelane_b32 v40, s44, 10 +; GCN-NEXT: v_writelane_b32 v40, s45, 11 +; GCN-NEXT: v_writelane_b32 v40, s46, 12 +; GCN-NEXT: v_writelane_b32 v40, s47, 13 +; GCN-NEXT: v_writelane_b32 v40, s48, 14 +; GCN-NEXT: v_writelane_b32 v40, s49, 15 +; GCN-NEXT: v_writelane_b32 v40, s50, 16 +; GCN-NEXT: v_writelane_b32 v40, s51, 17 +; GCN-NEXT: v_writelane_b32 v40, s52, 18 +; GCN-NEXT: v_writelane_b32 v40, s53, 19 +; GCN-NEXT: v_writelane_b32 v40, s54, 20 +; GCN-NEXT: v_writelane_b32 v40, s55, 21 +; GCN-NEXT: v_writelane_b32 v40, s56, 22 +; GCN-NEXT: v_writelane_b32 v40, s57, 23 +; GCN-NEXT: v_writelane_b32 v40, s58, 24 +; GCN-NEXT: v_writelane_b32 v40, s59, 25 +; GCN-NEXT: v_writelane_b32 v40, s60, 26 +; GCN-NEXT: v_writelane_b32 v40, s61, 27 +; GCN-NEXT: v_writelane_b32 v40, s62, 28 +; GCN-NEXT: v_writelane_b32 v40, s63, 29 +; GCN-NEXT: s_mov_b64 s[6:7], s[30:31] +; GCN-NEXT: s_mov_b64 s[8:9], exec ; GCN-NEXT: s_movk_i32 s4, 0x7b ; GCN-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s10, v0 -; GCN-NEXT: v_readfirstlane_b32 s11, v1 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11] +; GCN-NEXT: v_readfirstlane_b32 s12, v0 +; GCN-NEXT: v_readfirstlane_b32 s13, v1 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[12:13], v[0:1] +; GCN-NEXT: s_and_saveexec_b64 s[10:11], vcc +; GCN-NEXT: s_swappc_b64 s[30:31], s[12:13] ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] +; GCN-NEXT: s_xor_b64 exec, exec, s[10:11] ; GCN-NEXT: s_cbranch_execnz .LBB6_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[6:7] -; GCN-NEXT: v_readlane_b32 s63, v40, 31 -; GCN-NEXT: v_readlane_b32 s62, v40, 30 -; GCN-NEXT: v_readlane_b32 s61, v40, 29 -; GCN-NEXT: v_readlane_b32 s60, v40, 28 -; GCN-NEXT: v_readlane_b32 s59, v40, 27 -; GCN-NEXT: v_readlane_b32 s58, v40, 26 -; GCN-NEXT: v_readlane_b32 s57, v40, 25 -; GCN-NEXT: v_readlane_b32 s56, v40, 24 -; GCN-NEXT: v_readlane_b32 s55, v40, 23 -; GCN-NEXT: v_readlane_b32 s54, v40, 22 -; GCN-NEXT: v_readlane_b32 s53, v40, 21 -; GCN-NEXT: v_readlane_b32 s52, v40, 20 -; GCN-NEXT: v_readlane_b32 s51, v40, 19 -; GCN-NEXT: v_readlane_b32 s50, v40, 18 -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: s_mov_b64 exec, s[8:9] +; GCN-NEXT: v_readlane_b32 s63, v40, 29 +; GCN-NEXT: v_readlane_b32 s62, v40, 28 +; GCN-NEXT: v_readlane_b32 s61, v40, 27 +; GCN-NEXT: v_readlane_b32 s60, v40, 26 +; GCN-NEXT: v_readlane_b32 s59, v40, 25 +; GCN-NEXT: v_readlane_b32 s58, v40, 24 +; GCN-NEXT: v_readlane_b32 s57, v40, 23 +; GCN-NEXT: v_readlane_b32 s56, v40, 22 +; GCN-NEXT: v_readlane_b32 s55, v40, 21 +; GCN-NEXT: v_readlane_b32 s54, v40, 20 +; GCN-NEXT: v_readlane_b32 s53, v40, 19 +; GCN-NEXT: v_readlane_b32 s52, v40, 18 +; GCN-NEXT: v_readlane_b32 s51, v40, 17 +; GCN-NEXT: v_readlane_b32 s50, v40, 16 +; GCN-NEXT: v_readlane_b32 s49, v40, 15 +; GCN-NEXT: v_readlane_b32 s48, v40, 14 +; GCN-NEXT: v_readlane_b32 s47, v40, 13 +; GCN-NEXT: v_readlane_b32 s46, v40, 12 +; GCN-NEXT: v_readlane_b32 s45, v40, 11 +; GCN-NEXT: v_readlane_b32 s44, v40, 10 +; GCN-NEXT: v_readlane_b32 s43, v40, 9 +; GCN-NEXT: v_readlane_b32 s42, v40, 8 +; GCN-NEXT: v_readlane_b32 s41, v40, 7 +; GCN-NEXT: v_readlane_b32 s40, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 +; GCN-NEXT: v_readlane_b32 s37, v40, 3 +; GCN-NEXT: v_readlane_b32 s36, v40, 2 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 32 +; GCN-NEXT: v_readlane_b32 s33, v40, 30 ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[4:5] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN-NEXT: s_setpc_b64 s[6:7] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_inreg_arg: ; GISEL: ; %bb.0: @@ -1168,93 +1165,90 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) { ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 32 +; GISEL-NEXT: v_writelane_b32 v40, s33, 30 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: v_writelane_b32 v40, s50, 18 -; GISEL-NEXT: v_writelane_b32 v40, s51, 19 -; GISEL-NEXT: v_writelane_b32 v40, s52, 20 -; GISEL-NEXT: v_writelane_b32 v40, s53, 21 -; GISEL-NEXT: v_writelane_b32 v40, s54, 22 -; GISEL-NEXT: v_writelane_b32 v40, s55, 23 -; GISEL-NEXT: v_writelane_b32 v40, s56, 24 -; GISEL-NEXT: v_writelane_b32 v40, s57, 25 -; GISEL-NEXT: v_writelane_b32 v40, s58, 26 -; GISEL-NEXT: v_writelane_b32 v40, s59, 27 -; GISEL-NEXT: v_writelane_b32 v40, s60, 28 -; GISEL-NEXT: v_writelane_b32 v40, s61, 29 -; GISEL-NEXT: v_writelane_b32 v40, s62, 30 -; GISEL-NEXT: v_writelane_b32 v40, s63, 31 +; GISEL-NEXT: v_writelane_b32 v40, s34, 0 +; GISEL-NEXT: v_writelane_b32 v40, s35, 1 +; GISEL-NEXT: v_writelane_b32 v40, s36, 2 +; GISEL-NEXT: v_writelane_b32 v40, s37, 3 +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s40, 6 +; GISEL-NEXT: v_writelane_b32 v40, s41, 7 +; GISEL-NEXT: v_writelane_b32 v40, s42, 8 +; GISEL-NEXT: v_writelane_b32 v40, s43, 9 +; GISEL-NEXT: v_writelane_b32 v40, s44, 10 +; GISEL-NEXT: v_writelane_b32 v40, s45, 11 +; GISEL-NEXT: v_writelane_b32 v40, s46, 12 +; GISEL-NEXT: v_writelane_b32 v40, s47, 13 +; GISEL-NEXT: v_writelane_b32 v40, s48, 14 +; GISEL-NEXT: v_writelane_b32 v40, s49, 15 +; GISEL-NEXT: v_writelane_b32 v40, s50, 16 +; GISEL-NEXT: v_writelane_b32 v40, s51, 17 +; GISEL-NEXT: v_writelane_b32 v40, s52, 18 +; GISEL-NEXT: v_writelane_b32 v40, s53, 19 +; GISEL-NEXT: v_writelane_b32 v40, s54, 20 +; GISEL-NEXT: v_writelane_b32 v40, s55, 21 +; GISEL-NEXT: v_writelane_b32 v40, s56, 22 +; GISEL-NEXT: v_writelane_b32 v40, s57, 23 +; GISEL-NEXT: v_writelane_b32 v40, s58, 24 +; GISEL-NEXT: v_writelane_b32 v40, s59, 25 +; GISEL-NEXT: v_writelane_b32 v40, s60, 26 +; GISEL-NEXT: v_writelane_b32 v40, s61, 27 +; GISEL-NEXT: v_writelane_b32 v40, s62, 28 +; GISEL-NEXT: v_writelane_b32 v40, s63, 29 +; GISEL-NEXT: s_mov_b64 s[6:7], s[30:31] ; GISEL-NEXT: s_movk_i32 s4, 0x7b -; GISEL-NEXT: s_mov_b64 s[6:7], exec +; GISEL-NEXT: s_mov_b64 s[8:9], exec ; GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s8, v0 -; GISEL-NEXT: v_readfirstlane_b32 s9, v1 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc -; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GISEL-NEXT: v_readfirstlane_b32 s10, v0 +; GISEL-NEXT: v_readfirstlane_b32 s11, v1 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1] +; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc +; GISEL-NEXT: s_swappc_b64 s[30:31], s[10:11] ; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] +; GISEL-NEXT: s_xor_b64 exec, exec, s[12:13] ; GISEL-NEXT: s_cbranch_execnz .LBB6_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[6:7] -; GISEL-NEXT: v_readlane_b32 s63, v40, 31 -; GISEL-NEXT: v_readlane_b32 s62, v40, 30 -; GISEL-NEXT: v_readlane_b32 s61, v40, 29 -; GISEL-NEXT: v_readlane_b32 s60, v40, 28 -; GISEL-NEXT: v_readlane_b32 s59, v40, 27 -; GISEL-NEXT: v_readlane_b32 s58, v40, 26 -; GISEL-NEXT: v_readlane_b32 s57, v40, 25 -; GISEL-NEXT: v_readlane_b32 s56, v40, 24 -; GISEL-NEXT: v_readlane_b32 s55, v40, 23 -; GISEL-NEXT: v_readlane_b32 s54, v40, 22 -; GISEL-NEXT: v_readlane_b32 s53, v40, 21 -; GISEL-NEXT: v_readlane_b32 s52, v40, 20 -; GISEL-NEXT: v_readlane_b32 s51, v40, 19 -; GISEL-NEXT: v_readlane_b32 s50, v40, 18 -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: s_mov_b64 exec, s[8:9] +; GISEL-NEXT: v_readlane_b32 s63, v40, 29 +; GISEL-NEXT: v_readlane_b32 s62, v40, 28 +; GISEL-NEXT: v_readlane_b32 s61, v40, 27 +; GISEL-NEXT: v_readlane_b32 s60, v40, 26 +; GISEL-NEXT: v_readlane_b32 s59, v40, 25 +; GISEL-NEXT: v_readlane_b32 s58, v40, 24 +; GISEL-NEXT: v_readlane_b32 s57, v40, 23 +; GISEL-NEXT: v_readlane_b32 s56, v40, 22 +; GISEL-NEXT: v_readlane_b32 s55, v40, 21 +; GISEL-NEXT: v_readlane_b32 s54, v40, 20 +; GISEL-NEXT: v_readlane_b32 s53, v40, 19 +; GISEL-NEXT: v_readlane_b32 s52, v40, 18 +; GISEL-NEXT: v_readlane_b32 s51, v40, 17 +; GISEL-NEXT: v_readlane_b32 s50, v40, 16 +; GISEL-NEXT: v_readlane_b32 s49, v40, 15 +; GISEL-NEXT: v_readlane_b32 s48, v40, 14 +; GISEL-NEXT: v_readlane_b32 s47, v40, 13 +; GISEL-NEXT: v_readlane_b32 s46, v40, 12 +; GISEL-NEXT: v_readlane_b32 s45, v40, 11 +; GISEL-NEXT: v_readlane_b32 s44, v40, 10 +; GISEL-NEXT: v_readlane_b32 s43, v40, 9 +; GISEL-NEXT: v_readlane_b32 s42, v40, 8 +; GISEL-NEXT: v_readlane_b32 s41, v40, 7 +; GISEL-NEXT: v_readlane_b32 s40, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 +; GISEL-NEXT: v_readlane_b32 s37, v40, 3 +; GISEL-NEXT: v_readlane_b32 s36, v40, 2 +; GISEL-NEXT: v_readlane_b32 s35, v40, 1 +; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 32 +; GISEL-NEXT: v_readlane_b32 s33, v40, 30 ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GISEL-NEXT: s_mov_b64 exec, s[4:5] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL-NEXT: s_setpc_b64 s[6:7] call amdgpu_gfx void %fptr(i32 inreg 123) ret void } @@ -1266,97 +1260,94 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr) ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 32 +; GCN-NEXT: v_writelane_b32 v40, s33, 30 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: v_writelane_b32 v40, s50, 18 -; GCN-NEXT: v_writelane_b32 v40, s51, 19 -; GCN-NEXT: v_writelane_b32 v40, s52, 20 -; GCN-NEXT: v_writelane_b32 v40, s53, 21 -; GCN-NEXT: v_writelane_b32 v40, s54, 22 -; GCN-NEXT: v_writelane_b32 v40, s55, 23 -; GCN-NEXT: v_writelane_b32 v40, s56, 24 -; GCN-NEXT: v_writelane_b32 v40, s57, 25 -; GCN-NEXT: v_writelane_b32 v40, s58, 26 -; GCN-NEXT: v_writelane_b32 v40, s59, 27 -; GCN-NEXT: v_writelane_b32 v40, s60, 28 -; GCN-NEXT: v_writelane_b32 v40, s61, 29 -; GCN-NEXT: v_writelane_b32 v40, s62, 30 -; GCN-NEXT: v_writelane_b32 v40, s63, 31 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s36, 2 +; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s40, 6 +; GCN-NEXT: v_writelane_b32 v40, s41, 7 +; GCN-NEXT: v_writelane_b32 v40, s42, 8 +; GCN-NEXT: v_writelane_b32 v40, s43, 9 +; GCN-NEXT: v_writelane_b32 v40, s44, 10 +; GCN-NEXT: v_writelane_b32 v40, s45, 11 +; GCN-NEXT: v_writelane_b32 v40, s46, 12 +; GCN-NEXT: v_writelane_b32 v40, s47, 13 +; GCN-NEXT: v_writelane_b32 v40, s48, 14 +; GCN-NEXT: v_writelane_b32 v40, s49, 15 +; GCN-NEXT: v_writelane_b32 v40, s50, 16 +; GCN-NEXT: v_writelane_b32 v40, s51, 17 +; GCN-NEXT: v_writelane_b32 v40, s52, 18 +; GCN-NEXT: v_writelane_b32 v40, s53, 19 +; GCN-NEXT: v_writelane_b32 v40, s54, 20 +; GCN-NEXT: v_writelane_b32 v40, s55, 21 +; GCN-NEXT: v_writelane_b32 v40, s56, 22 +; GCN-NEXT: v_writelane_b32 v40, s57, 23 +; GCN-NEXT: v_writelane_b32 v40, s58, 24 +; GCN-NEXT: v_writelane_b32 v40, s59, 25 +; GCN-NEXT: v_writelane_b32 v40, s60, 26 +; GCN-NEXT: v_writelane_b32 v40, s61, 27 +; GCN-NEXT: v_writelane_b32 v40, s62, 28 +; GCN-NEXT: v_writelane_b32 v40, s63, 29 +; GCN-NEXT: s_mov_b64 s[4:5], s[30:31] ; GCN-NEXT: v_mov_b32_e32 v41, v0 -; GCN-NEXT: s_mov_b64 s[4:5], exec +; GCN-NEXT: s_mov_b64 s[6:7], exec ; GCN-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s8, v1 -; GCN-NEXT: v_readfirstlane_b32 s9, v2 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] -; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GCN-NEXT: v_readfirstlane_b32 s10, v1 +; GCN-NEXT: v_readfirstlane_b32 s11, v2 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[1:2] +; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GCN-NEXT: v_mov_b32_e32 v0, v41 -; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11] ; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GCN-NEXT: s_xor_b64 exec, exec, s[6:7] +; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] ; GCN-NEXT: s_cbranch_execnz .LBB7_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: v_mov_b32_e32 v0, v41 -; GCN-NEXT: v_readlane_b32 s63, v40, 31 -; GCN-NEXT: v_readlane_b32 s62, v40, 30 -; GCN-NEXT: v_readlane_b32 s61, v40, 29 -; GCN-NEXT: v_readlane_b32 s60, v40, 28 -; GCN-NEXT: v_readlane_b32 s59, v40, 27 -; GCN-NEXT: v_readlane_b32 s58, v40, 26 -; GCN-NEXT: v_readlane_b32 s57, v40, 25 -; GCN-NEXT: v_readlane_b32 s56, v40, 24 -; GCN-NEXT: v_readlane_b32 s55, v40, 23 -; GCN-NEXT: v_readlane_b32 s54, v40, 22 -; GCN-NEXT: v_readlane_b32 s53, v40, 21 -; GCN-NEXT: v_readlane_b32 s52, v40, 20 -; GCN-NEXT: v_readlane_b32 s51, v40, 19 -; GCN-NEXT: v_readlane_b32 s50, v40, 18 -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s63, v40, 29 +; GCN-NEXT: v_readlane_b32 s62, v40, 28 +; GCN-NEXT: v_readlane_b32 s61, v40, 27 +; GCN-NEXT: v_readlane_b32 s60, v40, 26 +; GCN-NEXT: v_readlane_b32 s59, v40, 25 +; GCN-NEXT: v_readlane_b32 s58, v40, 24 +; GCN-NEXT: v_readlane_b32 s57, v40, 23 +; GCN-NEXT: v_readlane_b32 s56, v40, 22 +; GCN-NEXT: v_readlane_b32 s55, v40, 21 +; GCN-NEXT: v_readlane_b32 s54, v40, 20 +; GCN-NEXT: v_readlane_b32 s53, v40, 19 +; GCN-NEXT: v_readlane_b32 s52, v40, 18 +; GCN-NEXT: v_readlane_b32 s51, v40, 17 +; GCN-NEXT: v_readlane_b32 s50, v40, 16 +; GCN-NEXT: v_readlane_b32 s49, v40, 15 +; GCN-NEXT: v_readlane_b32 s48, v40, 14 +; GCN-NEXT: v_readlane_b32 s47, v40, 13 +; GCN-NEXT: v_readlane_b32 s46, v40, 12 +; GCN-NEXT: v_readlane_b32 s45, v40, 11 +; GCN-NEXT: v_readlane_b32 s44, v40, 10 +; GCN-NEXT: v_readlane_b32 s43, v40, 9 +; GCN-NEXT: v_readlane_b32 s42, v40, 8 +; GCN-NEXT: v_readlane_b32 s41, v40, 7 +; GCN-NEXT: v_readlane_b32 s40, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 +; GCN-NEXT: v_readlane_b32 s37, v40, 3 +; GCN-NEXT: v_readlane_b32 s36, v40, 2 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 32 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN-NEXT: v_readlane_b32 s33, v40, 30 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN-NEXT: s_setpc_b64 s[4:5] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_reuse: ; GISEL: ; %bb.0: @@ -1364,97 +1355,94 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr) ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 32 +; GISEL-NEXT: v_writelane_b32 v40, s33, 30 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 ; GISEL-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: v_writelane_b32 v40, s50, 18 -; GISEL-NEXT: v_writelane_b32 v40, s51, 19 -; GISEL-NEXT: v_writelane_b32 v40, s52, 20 -; GISEL-NEXT: v_writelane_b32 v40, s53, 21 -; GISEL-NEXT: v_writelane_b32 v40, s54, 22 -; GISEL-NEXT: v_writelane_b32 v40, s55, 23 -; GISEL-NEXT: v_writelane_b32 v40, s56, 24 -; GISEL-NEXT: v_writelane_b32 v40, s57, 25 -; GISEL-NEXT: v_writelane_b32 v40, s58, 26 -; GISEL-NEXT: v_writelane_b32 v40, s59, 27 -; GISEL-NEXT: v_writelane_b32 v40, s60, 28 -; GISEL-NEXT: v_writelane_b32 v40, s61, 29 -; GISEL-NEXT: v_writelane_b32 v40, s62, 30 -; GISEL-NEXT: v_writelane_b32 v40, s63, 31 +; GISEL-NEXT: v_writelane_b32 v40, s34, 0 +; GISEL-NEXT: v_writelane_b32 v40, s35, 1 +; GISEL-NEXT: v_writelane_b32 v40, s36, 2 +; GISEL-NEXT: v_writelane_b32 v40, s37, 3 +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s40, 6 +; GISEL-NEXT: v_writelane_b32 v40, s41, 7 +; GISEL-NEXT: v_writelane_b32 v40, s42, 8 +; GISEL-NEXT: v_writelane_b32 v40, s43, 9 +; GISEL-NEXT: v_writelane_b32 v40, s44, 10 +; GISEL-NEXT: v_writelane_b32 v40, s45, 11 +; GISEL-NEXT: v_writelane_b32 v40, s46, 12 +; GISEL-NEXT: v_writelane_b32 v40, s47, 13 +; GISEL-NEXT: v_writelane_b32 v40, s48, 14 +; GISEL-NEXT: v_writelane_b32 v40, s49, 15 +; GISEL-NEXT: v_writelane_b32 v40, s50, 16 +; GISEL-NEXT: v_writelane_b32 v40, s51, 17 +; GISEL-NEXT: v_writelane_b32 v40, s52, 18 +; GISEL-NEXT: v_writelane_b32 v40, s53, 19 +; GISEL-NEXT: v_writelane_b32 v40, s54, 20 +; GISEL-NEXT: v_writelane_b32 v40, s55, 21 +; GISEL-NEXT: v_writelane_b32 v40, s56, 22 +; GISEL-NEXT: v_writelane_b32 v40, s57, 23 +; GISEL-NEXT: v_writelane_b32 v40, s58, 24 +; GISEL-NEXT: v_writelane_b32 v40, s59, 25 +; GISEL-NEXT: v_writelane_b32 v40, s60, 26 +; GISEL-NEXT: v_writelane_b32 v40, s61, 27 +; GISEL-NEXT: v_writelane_b32 v40, s62, 28 +; GISEL-NEXT: v_writelane_b32 v40, s63, 29 ; GISEL-NEXT: v_mov_b32_e32 v41, v0 -; GISEL-NEXT: s_mov_b64 s[4:5], exec +; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31] +; GISEL-NEXT: s_mov_b64 s[6:7], exec ; GISEL-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s6, v1 -; GISEL-NEXT: v_readfirstlane_b32 s7, v2 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] -; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GISEL-NEXT: v_readfirstlane_b32 s8, v1 +; GISEL-NEXT: v_readfirstlane_b32 s9, v2 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] +; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc ; GISEL-NEXT: v_mov_b32_e32 v0, v41 -; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2 -; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9] +; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] ; GISEL-NEXT: s_cbranch_execnz .LBB7_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[4:5] +; GISEL-NEXT: s_mov_b64 exec, s[6:7] ; GISEL-NEXT: v_mov_b32_e32 v0, v41 -; GISEL-NEXT: v_readlane_b32 s63, v40, 31 -; GISEL-NEXT: v_readlane_b32 s62, v40, 30 -; GISEL-NEXT: v_readlane_b32 s61, v40, 29 -; GISEL-NEXT: v_readlane_b32 s60, v40, 28 -; GISEL-NEXT: v_readlane_b32 s59, v40, 27 -; GISEL-NEXT: v_readlane_b32 s58, v40, 26 -; GISEL-NEXT: v_readlane_b32 s57, v40, 25 -; GISEL-NEXT: v_readlane_b32 s56, v40, 24 -; GISEL-NEXT: v_readlane_b32 s55, v40, 23 -; GISEL-NEXT: v_readlane_b32 s54, v40, 22 -; GISEL-NEXT: v_readlane_b32 s53, v40, 21 -; GISEL-NEXT: v_readlane_b32 s52, v40, 20 -; GISEL-NEXT: v_readlane_b32 s51, v40, 19 -; GISEL-NEXT: v_readlane_b32 s50, v40, 18 -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: v_readlane_b32 s63, v40, 29 +; GISEL-NEXT: v_readlane_b32 s62, v40, 28 +; GISEL-NEXT: v_readlane_b32 s61, v40, 27 +; GISEL-NEXT: v_readlane_b32 s60, v40, 26 +; GISEL-NEXT: v_readlane_b32 s59, v40, 25 +; GISEL-NEXT: v_readlane_b32 s58, v40, 24 +; GISEL-NEXT: v_readlane_b32 s57, v40, 23 +; GISEL-NEXT: v_readlane_b32 s56, v40, 22 +; GISEL-NEXT: v_readlane_b32 s55, v40, 21 +; GISEL-NEXT: v_readlane_b32 s54, v40, 20 +; GISEL-NEXT: v_readlane_b32 s53, v40, 19 +; GISEL-NEXT: v_readlane_b32 s52, v40, 18 +; GISEL-NEXT: v_readlane_b32 s51, v40, 17 +; GISEL-NEXT: v_readlane_b32 s50, v40, 16 +; GISEL-NEXT: v_readlane_b32 s49, v40, 15 +; GISEL-NEXT: v_readlane_b32 s48, v40, 14 +; GISEL-NEXT: v_readlane_b32 s47, v40, 13 +; GISEL-NEXT: v_readlane_b32 s46, v40, 12 +; GISEL-NEXT: v_readlane_b32 s45, v40, 11 +; GISEL-NEXT: v_readlane_b32 s44, v40, 10 +; GISEL-NEXT: v_readlane_b32 s43, v40, 9 +; GISEL-NEXT: v_readlane_b32 s42, v40, 8 +; GISEL-NEXT: v_readlane_b32 s41, v40, 7 +; GISEL-NEXT: v_readlane_b32 s40, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 +; GISEL-NEXT: v_readlane_b32 s37, v40, 3 +; GISEL-NEXT: v_readlane_b32 s36, v40, 2 +; GISEL-NEXT: v_readlane_b32 s35, v40, 1 +; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 32 -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GISEL-NEXT: v_readlane_b32 s33, v40, 30 +; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[4:5] +; GISEL-NEXT: s_mov_b64 exec, s[6:7] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL-NEXT: s_setpc_b64 s[4:5] call amdgpu_gfx void %fptr(i32 %i) ret i32 %i } @@ -1470,95 +1458,92 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr) ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 32 +; GCN-NEXT: v_writelane_b32 v40, s33, 30 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: v_writelane_b32 v40, s50, 18 -; GCN-NEXT: v_writelane_b32 v40, s51, 19 -; GCN-NEXT: v_writelane_b32 v40, s52, 20 -; GCN-NEXT: v_writelane_b32 v40, s53, 21 -; GCN-NEXT: v_writelane_b32 v40, s54, 22 -; GCN-NEXT: v_writelane_b32 v40, s55, 23 -; GCN-NEXT: v_writelane_b32 v40, s56, 24 -; GCN-NEXT: v_writelane_b32 v40, s57, 25 -; GCN-NEXT: v_writelane_b32 v40, s58, 26 -; GCN-NEXT: v_writelane_b32 v40, s59, 27 -; GCN-NEXT: v_writelane_b32 v40, s60, 28 -; GCN-NEXT: v_writelane_b32 v40, s61, 29 -; GCN-NEXT: v_writelane_b32 v40, s62, 30 -; GCN-NEXT: v_writelane_b32 v40, s63, 31 -; GCN-NEXT: s_mov_b64 s[4:5], exec +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s36, 2 +; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s40, 6 +; GCN-NEXT: v_writelane_b32 v40, s41, 7 +; GCN-NEXT: v_writelane_b32 v40, s42, 8 +; GCN-NEXT: v_writelane_b32 v40, s43, 9 +; GCN-NEXT: v_writelane_b32 v40, s44, 10 +; GCN-NEXT: v_writelane_b32 v40, s45, 11 +; GCN-NEXT: v_writelane_b32 v40, s46, 12 +; GCN-NEXT: v_writelane_b32 v40, s47, 13 +; GCN-NEXT: v_writelane_b32 v40, s48, 14 +; GCN-NEXT: v_writelane_b32 v40, s49, 15 +; GCN-NEXT: v_writelane_b32 v40, s50, 16 +; GCN-NEXT: v_writelane_b32 v40, s51, 17 +; GCN-NEXT: v_writelane_b32 v40, s52, 18 +; GCN-NEXT: v_writelane_b32 v40, s53, 19 +; GCN-NEXT: v_writelane_b32 v40, s54, 20 +; GCN-NEXT: v_writelane_b32 v40, s55, 21 +; GCN-NEXT: v_writelane_b32 v40, s56, 22 +; GCN-NEXT: v_writelane_b32 v40, s57, 23 +; GCN-NEXT: v_writelane_b32 v40, s58, 24 +; GCN-NEXT: v_writelane_b32 v40, s59, 25 +; GCN-NEXT: v_writelane_b32 v40, s60, 26 +; GCN-NEXT: v_writelane_b32 v40, s61, 27 +; GCN-NEXT: v_writelane_b32 v40, s62, 28 +; GCN-NEXT: v_writelane_b32 v40, s63, 29 +; GCN-NEXT: s_mov_b64 s[4:5], s[30:31] +; GCN-NEXT: s_mov_b64 s[6:7], exec ; GCN-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s8, v1 -; GCN-NEXT: v_readfirstlane_b32 s9, v2 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] -; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc -; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GCN-NEXT: v_readfirstlane_b32 s10, v1 +; GCN-NEXT: v_readfirstlane_b32 s11, v2 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[1:2] +; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11] ; GCN-NEXT: v_mov_b32_e32 v3, v0 ; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2 ; GCN-NEXT: ; implicit-def: $vgpr0 -; GCN-NEXT: s_xor_b64 exec, exec, s[6:7] +; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] ; GCN-NEXT: s_cbranch_execnz .LBB8_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: v_mov_b32_e32 v0, v3 -; GCN-NEXT: v_readlane_b32 s63, v40, 31 -; GCN-NEXT: v_readlane_b32 s62, v40, 30 -; GCN-NEXT: v_readlane_b32 s61, v40, 29 -; GCN-NEXT: v_readlane_b32 s60, v40, 28 -; GCN-NEXT: v_readlane_b32 s59, v40, 27 -; GCN-NEXT: v_readlane_b32 s58, v40, 26 -; GCN-NEXT: v_readlane_b32 s57, v40, 25 -; GCN-NEXT: v_readlane_b32 s56, v40, 24 -; GCN-NEXT: v_readlane_b32 s55, v40, 23 -; GCN-NEXT: v_readlane_b32 s54, v40, 22 -; GCN-NEXT: v_readlane_b32 s53, v40, 21 -; GCN-NEXT: v_readlane_b32 s52, v40, 20 -; GCN-NEXT: v_readlane_b32 s51, v40, 19 -; GCN-NEXT: v_readlane_b32 s50, v40, 18 -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s63, v40, 29 +; GCN-NEXT: v_readlane_b32 s62, v40, 28 +; GCN-NEXT: v_readlane_b32 s61, v40, 27 +; GCN-NEXT: v_readlane_b32 s60, v40, 26 +; GCN-NEXT: v_readlane_b32 s59, v40, 25 +; GCN-NEXT: v_readlane_b32 s58, v40, 24 +; GCN-NEXT: v_readlane_b32 s57, v40, 23 +; GCN-NEXT: v_readlane_b32 s56, v40, 22 +; GCN-NEXT: v_readlane_b32 s55, v40, 21 +; GCN-NEXT: v_readlane_b32 s54, v40, 20 +; GCN-NEXT: v_readlane_b32 s53, v40, 19 +; GCN-NEXT: v_readlane_b32 s52, v40, 18 +; GCN-NEXT: v_readlane_b32 s51, v40, 17 +; GCN-NEXT: v_readlane_b32 s50, v40, 16 +; GCN-NEXT: v_readlane_b32 s49, v40, 15 +; GCN-NEXT: v_readlane_b32 s48, v40, 14 +; GCN-NEXT: v_readlane_b32 s47, v40, 13 +; GCN-NEXT: v_readlane_b32 s46, v40, 12 +; GCN-NEXT: v_readlane_b32 s45, v40, 11 +; GCN-NEXT: v_readlane_b32 s44, v40, 10 +; GCN-NEXT: v_readlane_b32 s43, v40, 9 +; GCN-NEXT: v_readlane_b32 s42, v40, 8 +; GCN-NEXT: v_readlane_b32 s41, v40, 7 +; GCN-NEXT: v_readlane_b32 s40, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 +; GCN-NEXT: v_readlane_b32 s37, v40, 3 +; GCN-NEXT: v_readlane_b32 s36, v40, 2 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 32 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN-NEXT: v_readlane_b32 s33, v40, 30 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN-NEXT: s_setpc_b64 s[4:5] ; ; GISEL-LABEL: test_indirect_call_vgpr_ptr_arg_and_return: ; GISEL: ; %bb.0: @@ -1566,95 +1551,92 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr) ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 32 +; GISEL-NEXT: v_writelane_b32 v40, s33, 30 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: v_writelane_b32 v40, s50, 18 -; GISEL-NEXT: v_writelane_b32 v40, s51, 19 -; GISEL-NEXT: v_writelane_b32 v40, s52, 20 -; GISEL-NEXT: v_writelane_b32 v40, s53, 21 -; GISEL-NEXT: v_writelane_b32 v40, s54, 22 -; GISEL-NEXT: v_writelane_b32 v40, s55, 23 -; GISEL-NEXT: v_writelane_b32 v40, s56, 24 -; GISEL-NEXT: v_writelane_b32 v40, s57, 25 -; GISEL-NEXT: v_writelane_b32 v40, s58, 26 -; GISEL-NEXT: v_writelane_b32 v40, s59, 27 -; GISEL-NEXT: v_writelane_b32 v40, s60, 28 -; GISEL-NEXT: v_writelane_b32 v40, s61, 29 -; GISEL-NEXT: v_writelane_b32 v40, s62, 30 -; GISEL-NEXT: v_writelane_b32 v40, s63, 31 -; GISEL-NEXT: s_mov_b64 s[4:5], exec +; GISEL-NEXT: v_writelane_b32 v40, s34, 0 +; GISEL-NEXT: v_writelane_b32 v40, s35, 1 +; GISEL-NEXT: v_writelane_b32 v40, s36, 2 +; GISEL-NEXT: v_writelane_b32 v40, s37, 3 +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s40, 6 +; GISEL-NEXT: v_writelane_b32 v40, s41, 7 +; GISEL-NEXT: v_writelane_b32 v40, s42, 8 +; GISEL-NEXT: v_writelane_b32 v40, s43, 9 +; GISEL-NEXT: v_writelane_b32 v40, s44, 10 +; GISEL-NEXT: v_writelane_b32 v40, s45, 11 +; GISEL-NEXT: v_writelane_b32 v40, s46, 12 +; GISEL-NEXT: v_writelane_b32 v40, s47, 13 +; GISEL-NEXT: v_writelane_b32 v40, s48, 14 +; GISEL-NEXT: v_writelane_b32 v40, s49, 15 +; GISEL-NEXT: v_writelane_b32 v40, s50, 16 +; GISEL-NEXT: v_writelane_b32 v40, s51, 17 +; GISEL-NEXT: v_writelane_b32 v40, s52, 18 +; GISEL-NEXT: v_writelane_b32 v40, s53, 19 +; GISEL-NEXT: v_writelane_b32 v40, s54, 20 +; GISEL-NEXT: v_writelane_b32 v40, s55, 21 +; GISEL-NEXT: v_writelane_b32 v40, s56, 22 +; GISEL-NEXT: v_writelane_b32 v40, s57, 23 +; GISEL-NEXT: v_writelane_b32 v40, s58, 24 +; GISEL-NEXT: v_writelane_b32 v40, s59, 25 +; GISEL-NEXT: v_writelane_b32 v40, s60, 26 +; GISEL-NEXT: v_writelane_b32 v40, s61, 27 +; GISEL-NEXT: v_writelane_b32 v40, s62, 28 +; GISEL-NEXT: v_writelane_b32 v40, s63, 29 +; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31] +; GISEL-NEXT: s_mov_b64 s[6:7], exec ; GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s6, v1 -; GISEL-NEXT: v_readfirstlane_b32 s7, v2 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2] -; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GISEL-NEXT: v_readfirstlane_b32 s8, v1 +; GISEL-NEXT: v_readfirstlane_b32 s9, v2 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2] +; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc +; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GISEL-NEXT: v_mov_b32_e32 v3, v0 ; GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2 ; GISEL-NEXT: ; implicit-def: $vgpr0 -; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9] +; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] ; GISEL-NEXT: s_cbranch_execnz .LBB8_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[4:5] +; GISEL-NEXT: s_mov_b64 exec, s[6:7] ; GISEL-NEXT: v_mov_b32_e32 v0, v3 -; GISEL-NEXT: v_readlane_b32 s63, v40, 31 -; GISEL-NEXT: v_readlane_b32 s62, v40, 30 -; GISEL-NEXT: v_readlane_b32 s61, v40, 29 -; GISEL-NEXT: v_readlane_b32 s60, v40, 28 -; GISEL-NEXT: v_readlane_b32 s59, v40, 27 -; GISEL-NEXT: v_readlane_b32 s58, v40, 26 -; GISEL-NEXT: v_readlane_b32 s57, v40, 25 -; GISEL-NEXT: v_readlane_b32 s56, v40, 24 -; GISEL-NEXT: v_readlane_b32 s55, v40, 23 -; GISEL-NEXT: v_readlane_b32 s54, v40, 22 -; GISEL-NEXT: v_readlane_b32 s53, v40, 21 -; GISEL-NEXT: v_readlane_b32 s52, v40, 20 -; GISEL-NEXT: v_readlane_b32 s51, v40, 19 -; GISEL-NEXT: v_readlane_b32 s50, v40, 18 -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: v_readlane_b32 s63, v40, 29 +; GISEL-NEXT: v_readlane_b32 s62, v40, 28 +; GISEL-NEXT: v_readlane_b32 s61, v40, 27 +; GISEL-NEXT: v_readlane_b32 s60, v40, 26 +; GISEL-NEXT: v_readlane_b32 s59, v40, 25 +; GISEL-NEXT: v_readlane_b32 s58, v40, 24 +; GISEL-NEXT: v_readlane_b32 s57, v40, 23 +; GISEL-NEXT: v_readlane_b32 s56, v40, 22 +; GISEL-NEXT: v_readlane_b32 s55, v40, 21 +; GISEL-NEXT: v_readlane_b32 s54, v40, 20 +; GISEL-NEXT: v_readlane_b32 s53, v40, 19 +; GISEL-NEXT: v_readlane_b32 s52, v40, 18 +; GISEL-NEXT: v_readlane_b32 s51, v40, 17 +; GISEL-NEXT: v_readlane_b32 s50, v40, 16 +; GISEL-NEXT: v_readlane_b32 s49, v40, 15 +; GISEL-NEXT: v_readlane_b32 s48, v40, 14 +; GISEL-NEXT: v_readlane_b32 s47, v40, 13 +; GISEL-NEXT: v_readlane_b32 s46, v40, 12 +; GISEL-NEXT: v_readlane_b32 s45, v40, 11 +; GISEL-NEXT: v_readlane_b32 s44, v40, 10 +; GISEL-NEXT: v_readlane_b32 s43, v40, 9 +; GISEL-NEXT: v_readlane_b32 s42, v40, 8 +; GISEL-NEXT: v_readlane_b32 s41, v40, 7 +; GISEL-NEXT: v_readlane_b32 s40, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 +; GISEL-NEXT: v_readlane_b32 s37, v40, 3 +; GISEL-NEXT: v_readlane_b32 s36, v40, 2 +; GISEL-NEXT: v_readlane_b32 s35, v40, 1 +; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 32 -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GISEL-NEXT: v_readlane_b32 s33, v40, 30 +; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[4:5] +; GISEL-NEXT: s_mov_b64 exec, s[6:7] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL-NEXT: s_setpc_b64 s[4:5] %ret = call amdgpu_gfx i32 %fptr(i32 %i) ret i32 %ret } @@ -1667,92 +1649,89 @@ define void @test_indirect_tail_call_vgpr_ptr(void()* %fptr) { ; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_writelane_b32 v40, s33, 32 +; GCN-NEXT: v_writelane_b32 v40, s33, 30 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_writelane_b32 v40, s48, 16 -; GCN-NEXT: v_writelane_b32 v40, s49, 17 -; GCN-NEXT: v_writelane_b32 v40, s50, 18 -; GCN-NEXT: v_writelane_b32 v40, s51, 19 -; GCN-NEXT: v_writelane_b32 v40, s52, 20 -; GCN-NEXT: v_writelane_b32 v40, s53, 21 -; GCN-NEXT: v_writelane_b32 v40, s54, 22 -; GCN-NEXT: v_writelane_b32 v40, s55, 23 -; GCN-NEXT: v_writelane_b32 v40, s56, 24 -; GCN-NEXT: v_writelane_b32 v40, s57, 25 -; GCN-NEXT: v_writelane_b32 v40, s58, 26 -; GCN-NEXT: v_writelane_b32 v40, s59, 27 -; GCN-NEXT: v_writelane_b32 v40, s60, 28 -; GCN-NEXT: v_writelane_b32 v40, s61, 29 -; GCN-NEXT: v_writelane_b32 v40, s62, 30 -; GCN-NEXT: v_writelane_b32 v40, s63, 31 -; GCN-NEXT: s_mov_b64 s[4:5], exec +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s36, 2 +; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s40, 6 +; GCN-NEXT: v_writelane_b32 v40, s41, 7 +; GCN-NEXT: v_writelane_b32 v40, s42, 8 +; GCN-NEXT: v_writelane_b32 v40, s43, 9 +; GCN-NEXT: v_writelane_b32 v40, s44, 10 +; GCN-NEXT: v_writelane_b32 v40, s45, 11 +; GCN-NEXT: v_writelane_b32 v40, s46, 12 +; GCN-NEXT: v_writelane_b32 v40, s47, 13 +; GCN-NEXT: v_writelane_b32 v40, s48, 14 +; GCN-NEXT: v_writelane_b32 v40, s49, 15 +; GCN-NEXT: v_writelane_b32 v40, s50, 16 +; GCN-NEXT: v_writelane_b32 v40, s51, 17 +; GCN-NEXT: v_writelane_b32 v40, s52, 18 +; GCN-NEXT: v_writelane_b32 v40, s53, 19 +; GCN-NEXT: v_writelane_b32 v40, s54, 20 +; GCN-NEXT: v_writelane_b32 v40, s55, 21 +; GCN-NEXT: v_writelane_b32 v40, s56, 22 +; GCN-NEXT: v_writelane_b32 v40, s57, 23 +; GCN-NEXT: v_writelane_b32 v40, s58, 24 +; GCN-NEXT: v_writelane_b32 v40, s59, 25 +; GCN-NEXT: v_writelane_b32 v40, s60, 26 +; GCN-NEXT: v_writelane_b32 v40, s61, 27 +; GCN-NEXT: v_writelane_b32 v40, s62, 28 +; GCN-NEXT: v_writelane_b32 v40, s63, 29 +; GCN-NEXT: s_mov_b64 s[4:5], s[30:31] +; GCN-NEXT: s_mov_b64 s[6:7], exec ; GCN-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GCN-NEXT: v_readfirstlane_b32 s8, v0 -; GCN-NEXT: v_readfirstlane_b32 s9, v1 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] -; GCN-NEXT: s_and_saveexec_b64 s[6:7], vcc -; GCN-NEXT: s_swappc_b64 s[30:31], s[8:9] +; GCN-NEXT: v_readfirstlane_b32 s10, v0 +; GCN-NEXT: v_readfirstlane_b32 s11, v1 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1] +; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc +; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11] ; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GCN-NEXT: s_xor_b64 exec, exec, s[6:7] +; GCN-NEXT: s_xor_b64 exec, exec, s[8:9] ; GCN-NEXT: s_cbranch_execnz .LBB9_1 ; GCN-NEXT: ; %bb.2: -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN-NEXT: v_readlane_b32 s63, v40, 31 -; GCN-NEXT: v_readlane_b32 s62, v40, 30 -; GCN-NEXT: v_readlane_b32 s61, v40, 29 -; GCN-NEXT: v_readlane_b32 s60, v40, 28 -; GCN-NEXT: v_readlane_b32 s59, v40, 27 -; GCN-NEXT: v_readlane_b32 s58, v40, 26 -; GCN-NEXT: v_readlane_b32 s57, v40, 25 -; GCN-NEXT: v_readlane_b32 s56, v40, 24 -; GCN-NEXT: v_readlane_b32 s55, v40, 23 -; GCN-NEXT: v_readlane_b32 s54, v40, 22 -; GCN-NEXT: v_readlane_b32 s53, v40, 21 -; GCN-NEXT: v_readlane_b32 s52, v40, 20 -; GCN-NEXT: v_readlane_b32 s51, v40, 19 -; GCN-NEXT: v_readlane_b32 s50, v40, 18 -; GCN-NEXT: v_readlane_b32 s49, v40, 17 -; GCN-NEXT: v_readlane_b32 s48, v40, 16 -; GCN-NEXT: v_readlane_b32 s47, v40, 15 -; GCN-NEXT: v_readlane_b32 s46, v40, 14 -; GCN-NEXT: v_readlane_b32 s45, v40, 13 -; GCN-NEXT: v_readlane_b32 s44, v40, 12 -; GCN-NEXT: v_readlane_b32 s43, v40, 11 -; GCN-NEXT: v_readlane_b32 s42, v40, 10 -; GCN-NEXT: v_readlane_b32 s41, v40, 9 -; GCN-NEXT: v_readlane_b32 s40, v40, 8 -; GCN-NEXT: v_readlane_b32 s39, v40, 7 -; GCN-NEXT: v_readlane_b32 s38, v40, 6 -; GCN-NEXT: v_readlane_b32 s37, v40, 5 -; GCN-NEXT: v_readlane_b32 s36, v40, 4 -; GCN-NEXT: v_readlane_b32 s35, v40, 3 -; GCN-NEXT: v_readlane_b32 s34, v40, 2 -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: s_mov_b64 exec, s[6:7] +; GCN-NEXT: v_readlane_b32 s63, v40, 29 +; GCN-NEXT: v_readlane_b32 s62, v40, 28 +; GCN-NEXT: v_readlane_b32 s61, v40, 27 +; GCN-NEXT: v_readlane_b32 s60, v40, 26 +; GCN-NEXT: v_readlane_b32 s59, v40, 25 +; GCN-NEXT: v_readlane_b32 s58, v40, 24 +; GCN-NEXT: v_readlane_b32 s57, v40, 23 +; GCN-NEXT: v_readlane_b32 s56, v40, 22 +; GCN-NEXT: v_readlane_b32 s55, v40, 21 +; GCN-NEXT: v_readlane_b32 s54, v40, 20 +; GCN-NEXT: v_readlane_b32 s53, v40, 19 +; GCN-NEXT: v_readlane_b32 s52, v40, 18 +; GCN-NEXT: v_readlane_b32 s51, v40, 17 +; GCN-NEXT: v_readlane_b32 s50, v40, 16 +; GCN-NEXT: v_readlane_b32 s49, v40, 15 +; GCN-NEXT: v_readlane_b32 s48, v40, 14 +; GCN-NEXT: v_readlane_b32 s47, v40, 13 +; GCN-NEXT: v_readlane_b32 s46, v40, 12 +; GCN-NEXT: v_readlane_b32 s45, v40, 11 +; GCN-NEXT: v_readlane_b32 s44, v40, 10 +; GCN-NEXT: v_readlane_b32 s43, v40, 9 +; GCN-NEXT: v_readlane_b32 s42, v40, 8 +; GCN-NEXT: v_readlane_b32 s41, v40, 7 +; GCN-NEXT: v_readlane_b32 s40, v40, 6 +; GCN-NEXT: v_readlane_b32 s39, v40, 5 +; GCN-NEXT: v_readlane_b32 s38, v40, 4 +; GCN-NEXT: v_readlane_b32 s37, v40, 3 +; GCN-NEXT: v_readlane_b32 s36, v40, 2 +; GCN-NEXT: v_readlane_b32 s35, v40, 1 +; GCN-NEXT: v_readlane_b32 s34, v40, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v40, 32 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN-NEXT: v_readlane_b32 s33, v40, 30 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN-NEXT: s_setpc_b64 s[4:5] ; ; GISEL-LABEL: test_indirect_tail_call_vgpr_ptr: ; GISEL: ; %bb.0: @@ -1760,92 +1739,89 @@ define void @test_indirect_tail_call_vgpr_ptr(void()* %fptr) { ; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 ; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill ; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_writelane_b32 v40, s33, 32 +; GISEL-NEXT: v_writelane_b32 v40, s33, 30 ; GISEL-NEXT: s_mov_b32 s33, s32 ; GISEL-NEXT: s_addk_i32 s32, 0x400 -; GISEL-NEXT: v_writelane_b32 v40, s30, 0 -; GISEL-NEXT: v_writelane_b32 v40, s31, 1 -; GISEL-NEXT: v_writelane_b32 v40, s34, 2 -; GISEL-NEXT: v_writelane_b32 v40, s35, 3 -; GISEL-NEXT: v_writelane_b32 v40, s36, 4 -; GISEL-NEXT: v_writelane_b32 v40, s37, 5 -; GISEL-NEXT: v_writelane_b32 v40, s38, 6 -; GISEL-NEXT: v_writelane_b32 v40, s39, 7 -; GISEL-NEXT: v_writelane_b32 v40, s40, 8 -; GISEL-NEXT: v_writelane_b32 v40, s41, 9 -; GISEL-NEXT: v_writelane_b32 v40, s42, 10 -; GISEL-NEXT: v_writelane_b32 v40, s43, 11 -; GISEL-NEXT: v_writelane_b32 v40, s44, 12 -; GISEL-NEXT: v_writelane_b32 v40, s45, 13 -; GISEL-NEXT: v_writelane_b32 v40, s46, 14 -; GISEL-NEXT: v_writelane_b32 v40, s47, 15 -; GISEL-NEXT: v_writelane_b32 v40, s48, 16 -; GISEL-NEXT: v_writelane_b32 v40, s49, 17 -; GISEL-NEXT: v_writelane_b32 v40, s50, 18 -; GISEL-NEXT: v_writelane_b32 v40, s51, 19 -; GISEL-NEXT: v_writelane_b32 v40, s52, 20 -; GISEL-NEXT: v_writelane_b32 v40, s53, 21 -; GISEL-NEXT: v_writelane_b32 v40, s54, 22 -; GISEL-NEXT: v_writelane_b32 v40, s55, 23 -; GISEL-NEXT: v_writelane_b32 v40, s56, 24 -; GISEL-NEXT: v_writelane_b32 v40, s57, 25 -; GISEL-NEXT: v_writelane_b32 v40, s58, 26 -; GISEL-NEXT: v_writelane_b32 v40, s59, 27 -; GISEL-NEXT: v_writelane_b32 v40, s60, 28 -; GISEL-NEXT: v_writelane_b32 v40, s61, 29 -; GISEL-NEXT: v_writelane_b32 v40, s62, 30 -; GISEL-NEXT: v_writelane_b32 v40, s63, 31 -; GISEL-NEXT: s_mov_b64 s[4:5], exec +; GISEL-NEXT: v_writelane_b32 v40, s34, 0 +; GISEL-NEXT: v_writelane_b32 v40, s35, 1 +; GISEL-NEXT: v_writelane_b32 v40, s36, 2 +; GISEL-NEXT: v_writelane_b32 v40, s37, 3 +; GISEL-NEXT: v_writelane_b32 v40, s38, 4 +; GISEL-NEXT: v_writelane_b32 v40, s39, 5 +; GISEL-NEXT: v_writelane_b32 v40, s40, 6 +; GISEL-NEXT: v_writelane_b32 v40, s41, 7 +; GISEL-NEXT: v_writelane_b32 v40, s42, 8 +; GISEL-NEXT: v_writelane_b32 v40, s43, 9 +; GISEL-NEXT: v_writelane_b32 v40, s44, 10 +; GISEL-NEXT: v_writelane_b32 v40, s45, 11 +; GISEL-NEXT: v_writelane_b32 v40, s46, 12 +; GISEL-NEXT: v_writelane_b32 v40, s47, 13 +; GISEL-NEXT: v_writelane_b32 v40, s48, 14 +; GISEL-NEXT: v_writelane_b32 v40, s49, 15 +; GISEL-NEXT: v_writelane_b32 v40, s50, 16 +; GISEL-NEXT: v_writelane_b32 v40, s51, 17 +; GISEL-NEXT: v_writelane_b32 v40, s52, 18 +; GISEL-NEXT: v_writelane_b32 v40, s53, 19 +; GISEL-NEXT: v_writelane_b32 v40, s54, 20 +; GISEL-NEXT: v_writelane_b32 v40, s55, 21 +; GISEL-NEXT: v_writelane_b32 v40, s56, 22 +; GISEL-NEXT: v_writelane_b32 v40, s57, 23 +; GISEL-NEXT: v_writelane_b32 v40, s58, 24 +; GISEL-NEXT: v_writelane_b32 v40, s59, 25 +; GISEL-NEXT: v_writelane_b32 v40, s60, 26 +; GISEL-NEXT: v_writelane_b32 v40, s61, 27 +; GISEL-NEXT: v_writelane_b32 v40, s62, 28 +; GISEL-NEXT: v_writelane_b32 v40, s63, 29 +; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31] +; GISEL-NEXT: s_mov_b64 s[6:7], exec ; GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GISEL-NEXT: v_readfirstlane_b32 s6, v0 -; GISEL-NEXT: v_readfirstlane_b32 s7, v1 -; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[0:1] -; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc -; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] +; GISEL-NEXT: v_readfirstlane_b32 s8, v0 +; GISEL-NEXT: v_readfirstlane_b32 s9, v1 +; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] +; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc +; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9] ; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1 -; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9] +; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11] ; GISEL-NEXT: s_cbranch_execnz .LBB9_1 ; GISEL-NEXT: ; %bb.2: -; GISEL-NEXT: s_mov_b64 exec, s[4:5] -; GISEL-NEXT: v_readlane_b32 s63, v40, 31 -; GISEL-NEXT: v_readlane_b32 s62, v40, 30 -; GISEL-NEXT: v_readlane_b32 s61, v40, 29 -; GISEL-NEXT: v_readlane_b32 s60, v40, 28 -; GISEL-NEXT: v_readlane_b32 s59, v40, 27 -; GISEL-NEXT: v_readlane_b32 s58, v40, 26 -; GISEL-NEXT: v_readlane_b32 s57, v40, 25 -; GISEL-NEXT: v_readlane_b32 s56, v40, 24 -; GISEL-NEXT: v_readlane_b32 s55, v40, 23 -; GISEL-NEXT: v_readlane_b32 s54, v40, 22 -; GISEL-NEXT: v_readlane_b32 s53, v40, 21 -; GISEL-NEXT: v_readlane_b32 s52, v40, 20 -; GISEL-NEXT: v_readlane_b32 s51, v40, 19 -; GISEL-NEXT: v_readlane_b32 s50, v40, 18 -; GISEL-NEXT: v_readlane_b32 s49, v40, 17 -; GISEL-NEXT: v_readlane_b32 s48, v40, 16 -; GISEL-NEXT: v_readlane_b32 s47, v40, 15 -; GISEL-NEXT: v_readlane_b32 s46, v40, 14 -; GISEL-NEXT: v_readlane_b32 s45, v40, 13 -; GISEL-NEXT: v_readlane_b32 s44, v40, 12 -; GISEL-NEXT: v_readlane_b32 s43, v40, 11 -; GISEL-NEXT: v_readlane_b32 s42, v40, 10 -; GISEL-NEXT: v_readlane_b32 s41, v40, 9 -; GISEL-NEXT: v_readlane_b32 s40, v40, 8 -; GISEL-NEXT: v_readlane_b32 s39, v40, 7 -; GISEL-NEXT: v_readlane_b32 s38, v40, 6 -; GISEL-NEXT: v_readlane_b32 s37, v40, 5 -; GISEL-NEXT: v_readlane_b32 s36, v40, 4 -; GISEL-NEXT: v_readlane_b32 s35, v40, 3 -; GISEL-NEXT: v_readlane_b32 s34, v40, 2 -; GISEL-NEXT: v_readlane_b32 s31, v40, 1 -; GISEL-NEXT: v_readlane_b32 s30, v40, 0 +; GISEL-NEXT: s_mov_b64 exec, s[6:7] +; GISEL-NEXT: v_readlane_b32 s63, v40, 29 +; GISEL-NEXT: v_readlane_b32 s62, v40, 28 +; GISEL-NEXT: v_readlane_b32 s61, v40, 27 +; GISEL-NEXT: v_readlane_b32 s60, v40, 26 +; GISEL-NEXT: v_readlane_b32 s59, v40, 25 +; GISEL-NEXT: v_readlane_b32 s58, v40, 24 +; GISEL-NEXT: v_readlane_b32 s57, v40, 23 +; GISEL-NEXT: v_readlane_b32 s56, v40, 22 +; GISEL-NEXT: v_readlane_b32 s55, v40, 21 +; GISEL-NEXT: v_readlane_b32 s54, v40, 20 +; GISEL-NEXT: v_readlane_b32 s53, v40, 19 +; GISEL-NEXT: v_readlane_b32 s52, v40, 18 +; GISEL-NEXT: v_readlane_b32 s51, v40, 17 +; GISEL-NEXT: v_readlane_b32 s50, v40, 16 +; GISEL-NEXT: v_readlane_b32 s49, v40, 15 +; GISEL-NEXT: v_readlane_b32 s48, v40, 14 +; GISEL-NEXT: v_readlane_b32 s47, v40, 13 +; GISEL-NEXT: v_readlane_b32 s46, v40, 12 +; GISEL-NEXT: v_readlane_b32 s45, v40, 11 +; GISEL-NEXT: v_readlane_b32 s44, v40, 10 +; GISEL-NEXT: v_readlane_b32 s43, v40, 9 +; GISEL-NEXT: v_readlane_b32 s42, v40, 8 +; GISEL-NEXT: v_readlane_b32 s41, v40, 7 +; GISEL-NEXT: v_readlane_b32 s40, v40, 6 +; GISEL-NEXT: v_readlane_b32 s39, v40, 5 +; GISEL-NEXT: v_readlane_b32 s38, v40, 4 +; GISEL-NEXT: v_readlane_b32 s37, v40, 3 +; GISEL-NEXT: v_readlane_b32 s36, v40, 2 +; GISEL-NEXT: v_readlane_b32 s35, v40, 1 +; GISEL-NEXT: v_readlane_b32 s34, v40, 0 ; GISEL-NEXT: s_addk_i32 s32, 0xfc00 -; GISEL-NEXT: v_readlane_b32 s33, v40, 32 -; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GISEL-NEXT: v_readlane_b32 s33, v40, 30 +; GISEL-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GISEL-NEXT: s_mov_b64 exec, s[4:5] +; GISEL-NEXT: s_mov_b64 exec, s[6:7] ; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL-NEXT: s_setpc_b64 s[4:5] tail call amdgpu_gfx void %fptr() ret void } diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll index d3e18dbaaa6d..b303e69064a0 100644 --- a/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-asm.i128.ll @@ -8,15 +8,15 @@ define amdgpu_kernel void @s_input_output_i128() { ; GFX908-LABEL: name: s_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5242890 /* regdef:SGPR_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:SGPR_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5242889 /* reguse:SGPR_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5767177 /* reguse:SGPR_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: s_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5242890 /* regdef:SGPR_128 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5767178 /* regdef:SGPR_128 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5242889 /* reguse:SGPR_128 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5767177 /* reguse:SGPR_128 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=s"() call void asm sideeffect "; use $0", "s"(i128 %val) @@ -26,15 +26,15 @@ define amdgpu_kernel void @s_input_output_i128() { define amdgpu_kernel void @v_input_output_i128() { ; GFX908-LABEL: name: v_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4784138 /* regdef:VReg_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5308426 /* regdef:VReg_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:vreg_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4784137 /* reguse:VReg_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5308425 /* reguse:VReg_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: v_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4980746 /* regdef:VReg_128_Align2 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5505034 /* regdef:VReg_128_Align2 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:vreg_128_align2 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4980745 /* reguse:VReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5505033 /* reguse:VReg_128_Align2 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = tail call i128 asm sideeffect "; def $0", "=v"() call void asm sideeffect "; use $0", "v"(i128 %val) @@ -44,15 +44,15 @@ define amdgpu_kernel void @v_input_output_i128() { define amdgpu_kernel void @a_input_output_i128() { ; GFX908-LABEL: name: a_input_output_i128 ; GFX908: bb.0 (%ir-block.0): - ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4718602 /* regdef:AReg_128 */, def %4 + ; GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5242890 /* regdef:AReg_128 */, def %4 ; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY %4 - ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4718601 /* reguse:AReg_128 */, [[COPY]] + ; GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5242889 /* reguse:AReg_128 */, [[COPY]] ; GFX908-NEXT: S_ENDPGM 0 ; GFX90A-LABEL: name: a_input_output_i128 ; GFX90A: bb.0 (%ir-block.0): - ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 4915210 /* regdef:AReg_128_Align2 */, def %4 + ; GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 5439498 /* regdef:AReg_128_Align2 */, def %4 ; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY %4 - ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 4915209 /* reguse:AReg_128_Align2 */, [[COPY]] + ; GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 5439497 /* reguse:AReg_128_Align2 */, [[COPY]] ; GFX90A-NEXT: S_ENDPGM 0 %val = call i128 asm sideeffect "; def $0", "=a"() call void asm sideeffect "; use $0", "a"(i128 %val) diff --git a/llvm/test/CodeGen/AMDGPU/ipra.ll b/llvm/test/CodeGen/AMDGPU/ipra.ll index 6e4752b05110..1466d3b76bec 100644 --- a/llvm/test/CodeGen/AMDGPU/ipra.ll +++ b/llvm/test/CodeGen/AMDGPU/ipra.ll @@ -40,16 +40,20 @@ define amdgpu_kernel void @kernel_call() #0 { } ; GCN-LABEL: {{^}}func_regular_call: +; GCN-NOT: buffer_store ; GCN-NOT: buffer_load ; GCN-NOT: readlane -; GCN: flat_load_dword v9 +; GCN-NOT: writelane +; GCN: flat_load_dword v8 ; GCN: s_swappc_b64 +; GCN-NOT: buffer_store ; GCN-NOT: buffer_load ; GCN-NOT: readlane -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v9 +; GCN-NOT: writelane +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8 -; GCN: ; NumSgprs: 34 -; GCN: ; NumVgprs: 10 +; GCN: ; NumSgprs: 32 +; GCN: ; NumVgprs: 9 define void @func_regular_call() #1 { %vgpr = load volatile i32, i32 addrspace(1)* undef tail call void @func() @@ -72,13 +76,13 @@ define void @func_tail_call() #1 { } ; GCN-LABEL: {{^}}func_call_tail_call: -; GCN: flat_load_dword v9 +; GCN: flat_load_dword v8 ; GCN: s_swappc_b64 -; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v9 +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8 ; GCN: s_setpc_b64 -; GCN: ; NumSgprs: 34 -; GCN: ; NumVgprs: 10 +; GCN: ; NumSgprs: 32 +; GCN: ; NumVgprs: 9 define void @func_call_tail_call() #1 { %vgpr = load volatile i32, i32 addrspace(1)* undef tail call void @func() @@ -93,11 +97,8 @@ define void @void_func_void() noinline { ; Make sure we don't get save/restore of FP between calls. ; GCN-LABEL: {{^}}test_funcx2: -; GCN: s_getpc_b64 +; GCN-NOT: s5 ; GCN-NOT: s32 -; GCN: s_swappc_b64 -; GCN-NOT: s32 -; GCN: s_swappc_b64 define void @test_funcx2() #0 { call void @void_func_void() call void @void_func_void() diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll index 581bb7233e50..9b6143d7a952 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll @@ -197,8 +197,6 @@ define amdgpu_kernel void @opencl_kernel_call_implicitarg_ptr_func([112 x i8]) # ; GCN-NOT: s8 ; GCN-NOT: s9 ; GCN-NOT: s[8:9] -; GCN: s_swappc_b64 -; GCN: s_setpc_b64 s[30:31] define void @func_call_implicitarg_ptr_func() #0 { call void @func_implicitarg_ptr() ret void @@ -208,8 +206,6 @@ define void @func_call_implicitarg_ptr_func() #0 { ; GCN-NOT: s8 ; GCN-NOT: s9 ; GCN-NOT: s[8:9] -; GCN: s_swappc_b64 -; GCN: s_setpc_b64 s[30:31] define void @opencl_func_call_implicitarg_ptr_func() #0 { call void @func_implicitarg_ptr() ret void diff --git a/llvm/test/CodeGen/AMDGPU/llvm.powi.ll b/llvm/test/CodeGen/AMDGPU/llvm.powi.ll index a95df6268034..7838d5ab8def 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.powi.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.powi.ll @@ -57,9 +57,9 @@ define float @v_powi_neg1_f32(float %l) { ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 ; GFX7-NEXT: v_rcp_f32_e32 v2, v1 +; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 ; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 -; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 -; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 ; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 ; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 ; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 @@ -104,9 +104,9 @@ define float @v_powi_neg2_f32(float %l) { ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 ; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 ; GFX7-NEXT: v_rcp_f32_e32 v2, v1 +; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 ; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 -; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 -; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 ; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 ; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 ; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 @@ -200,9 +200,9 @@ define float @v_powi_neg128_f32(float %l) { ; GFX7-NEXT: v_mul_f32_e32 v0, v0, v0 ; GFX7-NEXT: v_div_scale_f32 v1, s[4:5], v0, v0, 1.0 ; GFX7-NEXT: v_rcp_f32_e32 v2, v1 +; GFX7-NEXT: v_fma_f32 v3, -v1, v2, 1.0 +; GFX7-NEXT: v_fma_f32 v2, v3, v2, v2 ; GFX7-NEXT: v_div_scale_f32 v3, vcc, 1.0, v0, 1.0 -; GFX7-NEXT: v_fma_f32 v4, -v1, v2, 1.0 -; GFX7-NEXT: v_fma_f32 v2, v4, v2, v2 ; GFX7-NEXT: v_mul_f32_e32 v4, v3, v2 ; GFX7-NEXT: v_fma_f32 v5, -v1, v4, v3 ; GFX7-NEXT: v_fma_f32 v4, v5, v2, v4 diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll index 7ae67bad68ae..2fac70d96bc3 100644 --- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll +++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll @@ -193,22 +193,22 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 { ; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: s_mov_b64 exec, s[4:5] ; GFX9-NEXT: v_writelane_b32 v40, s33, 4 -; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9-NEXT: s_mov_b32 s33, s32 ; GFX9-NEXT: s_addk_i32 s32, 0x800 -; GFX9-NEXT: v_writelane_b32 v40, s31, 1 -; GFX9-NEXT: v_writelane_b32 v40, s34, 2 +; GFX9-NEXT: v_writelane_b32 v40, s34, 0 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, foo@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, foo@gotpcrel32@hi+12 -; GFX9-NEXT: v_writelane_b32 v40, s35, 3 +; GFX9-NEXT: v_writelane_b32 v40, s35, 1 ; GFX9-NEXT: s_load_dwordx2 s[34:35], s[4:5], 0x0 ; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill ; GFX9-NEXT: v_mov_b32_e32 v41, v1 ; GFX9-NEXT: v_mov_b32_e32 v42, v0 +; GFX9-NEXT: v_writelane_b32 v40, s30, 2 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, v42, v41 +; GFX9-NEXT: v_writelane_b32 v40, s31, 3 ; GFX9-NEXT: v_and_b32_e32 v43, 0xffffff, v41 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35] @@ -220,17 +220,17 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 { ; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload -; GFX9-NEXT: v_readlane_b32 s35, v40, 3 -; GFX9-NEXT: v_readlane_b32 s34, v40, 2 -; GFX9-NEXT: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 +; GFX9-NEXT: v_readlane_b32 s4, v40, 2 +; GFX9-NEXT: v_readlane_b32 s5, v40, 3 +; GFX9-NEXT: v_readlane_b32 s35, v40, 1 +; GFX9-NEXT: v_readlane_b32 s34, v40, 0 ; GFX9-NEXT: s_addk_i32 s32, 0xf800 ; GFX9-NEXT: v_readlane_b32 s33, v40, 4 -; GFX9-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; GFX9-NEXT: s_mov_b64 exec, s[4:5] +; GFX9-NEXT: s_mov_b64 exec, s[6:7] ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX9-NEXT: s_setpc_b64 s[4:5] %b = and i32 %b.arg, 16777215 %s = and i32 %s.arg, 16777215 diff --git a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll index 109163bf7373..68646ad229a7 100644 --- a/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll +++ b/llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll @@ -44,11 +44,11 @@ define internal fastcc void @csr_vgpr_spill_fp_callee() #0 { ; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9] ; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11] ; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] +; CHECK-NEXT: v_readlane_b32 s30, v1, 0 +; CHECK-NEXT: v_readlane_b32 s31, v1, 1 ; CHECK-NEXT: ;;#ASMSTART ; CHECK-NEXT: ; clobber csr v40 ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_readlane_b32 s31, v1, 1 -; CHECK-NEXT: v_readlane_b32 s30, v1, 0 ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload ; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 ; CHECK-NEXT: v_readlane_b32 s33, v1, 2 @@ -168,8 +168,8 @@ define hidden i32 @caller_save_vgpr_spill_fp_tail_call() #0 { ; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9] ; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11] ; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] -; CHECK-NEXT: v_readlane_b32 s31, v1, 1 ; CHECK-NEXT: v_readlane_b32 s30, v1, 0 +; CHECK-NEXT: v_readlane_b32 s31, v1, 1 ; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 ; CHECK-NEXT: v_readlane_b32 s33, v1, 2 ; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 @@ -202,8 +202,8 @@ define hidden i32 @caller_save_vgpr_spill_fp() #0 { ; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9] ; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11] ; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5] -; CHECK-NEXT: v_readlane_b32 s31, v2, 1 ; CHECK-NEXT: v_readlane_b32 s30, v2, 0 +; CHECK-NEXT: v_readlane_b32 s31, v2, 1 ; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 ; CHECK-NEXT: v_readlane_b32 s33, v2, 2 ; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 diff --git a/llvm/test/CodeGen/AMDGPU/nested-calls.ll b/llvm/test/CodeGen/AMDGPU/nested-calls.ll index 2ae8530e1ed1..327136fdfe1e 100644 --- a/llvm/test/CodeGen/AMDGPU/nested-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/nested-calls.ll @@ -22,8 +22,8 @@ declare void @external_void_func_i32(i32) #0 ; GCN: s_swappc_b64 -; GCN: v_readlane_b32 s31, v40, 1 -; GCN: v_readlane_b32 s30, v40, 0 +; GCN: v_readlane_b32 s4, v40, 0 +; GCN: v_readlane_b32 s5, v40, 1 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 @@ -31,7 +31,7 @@ declare void @external_void_func_i32(i32) #0 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN-NEXT: s_setpc_b64 s[4:5] define void @test_func_call_external_void_func_i32_imm() #0 { call void @external_void_func_i32(i32 42) ret void diff --git a/llvm/test/CodeGen/AMDGPU/no-remat-indirect-mov.mir b/llvm/test/CodeGen/AMDGPU/no-remat-indirect-mov.mir index d5abbc017260..4f4bdcd6f5b4 100644 --- a/llvm/test/CodeGen/AMDGPU/no-remat-indirect-mov.mir +++ b/llvm/test/CodeGen/AMDGPU/no-remat-indirect-mov.mir @@ -30,7 +30,8 @@ body: | ; GFX9-LABEL: name: index_vgpr_waterfall_loop ; GFX9: bb.0: ; GFX9: successors: %bb.1(0x80000000) - ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16 + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $sgpr30_sgpr31 + ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr16 ; GFX9: undef %18.sub15:vreg_512 = COPY $vgpr15 ; GFX9: %18.sub14:vreg_512 = COPY $vgpr14 @@ -62,6 +63,7 @@ body: | ; GFX9: S_CBRANCH_EXECNZ %bb.1, implicit $exec ; GFX9: bb.2: ; GFX9: $exec = S_MOV_B64 [[S_MOV_B64_]] + ; GFX9: $sgpr30_sgpr31 = COPY [[COPY]] ; GFX9: $vgpr0 = COPY [[V_MOV_B32_e32_]] ; GFX9: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit undef $vgpr1, implicit undef $vgpr2, implicit undef $vgpr3 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll index 1e424ecde23d..84643f945ac5 100644 --- a/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll +++ b/llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll @@ -19,10 +19,10 @@ define hidden void @_ZL3barv() #0 !dbg !1644 { ; CHECK-NEXT: v_writelane_b32 v40, s33, 2 ; CHECK-NEXT: s_mov_b32 s33, s32 ; CHECK-NEXT: s_add_i32 s32, s32, 0x400 -; CHECK-NEXT: .Ltmp0: -; CHECK-NEXT: .loc 0 31 3 prologue_end ; lane-info.cpp:31:3 ; CHECK-NEXT: v_writelane_b32 v40, s30, 0 ; CHECK-NEXT: v_writelane_b32 v40, s31, 1 +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: .loc 0 31 3 prologue_end ; lane-info.cpp:31:3 ; CHECK-NEXT: s_getpc_b64 s[16:17] ; CHECK-NEXT: s_add_u32 s16, s16, _ZL13sleep_foreverv@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s17, s17, _ZL13sleep_foreverv@gotpcrel32@hi+12 @@ -34,14 +34,14 @@ define hidden void @_ZL3barv() #0 !dbg !1644 { ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17] ; CHECK-NEXT: .Ltmp1: -; CHECK-NEXT: .loc 0 32 1 ; lane-info.cpp:32:1 -; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: v_readlane_b32 s30, v40, 0 +; CHECK-NEXT: v_readlane_b32 s31, v40, 1 ; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00 ; CHECK-NEXT: v_readlane_b32 s33, v40, 2 ; CHECK-NEXT: s_or_saveexec_b64 s[4:5], -1 ; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload ; CHECK-NEXT: s_mov_b64 exec, s[4:5] +; CHECK-NEXT: .loc 0 32 1 ; lane-info.cpp:32:1 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] ; CHECK-NEXT: .Ltmp2: diff --git a/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll b/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll index d474ea525799..e0e211243771 100644 --- a/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll @@ -11,8 +11,8 @@ define void @child_function() #0 { ; GCN: v_writelane_b32 v255, s30, 0 ; GCN: v_writelane_b32 v255, s31, 1 ; GCN: s_swappc_b64 s[30:31], s[4:5] -; GCN: v_readlane_b32 s31, v255, 1 ; GCN: v_readlane_b32 s30, v255, 0 +; GCN: v_readlane_b32 s31, v255, 1 ; GCN: v_readlane_b32 s33, v255, 2 ; GCN: ; NumVgprs: 256 @@ -57,8 +57,8 @@ define void @reserve_vgpr_with_no_lower_vgpr_available() #0 { ; GCN: v_writelane_b32 v254, s30, 0 ; GCN: v_writelane_b32 v254, s31, 1 ; GCN: s_swappc_b64 s[30:31], s[4:5] -; GCN: v_readlane_b32 s31, v254, 1 ; GCN: v_readlane_b32 s30, v254, 0 +; GCN: v_readlane_b32 s31, v254, 1 ; GCN: v_readlane_b32 s33, v254, 2 define void @reserve_lowest_available_vgpr() #0 { @@ -99,8 +99,8 @@ define void @reserve_lowest_available_vgpr() #0 { ; GCN-LABEL: {{^}}reserve_vgpr_with_sgpr_spills: ; GCN-NOT: buffer_store_dword v255, off, s[0:3], s32 ; GCN: ; def s4 -; GCN: v_writelane_b32 v254, s4, 0 -; GCN: v_readlane_b32 s4, v254, 0 +; GCN: v_writelane_b32 v254, s4, 2 +; GCN: v_readlane_b32 s4, v254, 2 ; GCN: ; use s4 define void @reserve_vgpr_with_sgpr_spills() #0 { diff --git a/llvm/test/CodeGen/AMDGPU/save-fp.ll b/llvm/test/CodeGen/AMDGPU/save-fp.ll index ec56f41aa1a0..6bed1dbf901e 100644 --- a/llvm/test/CodeGen/AMDGPU/save-fp.ll +++ b/llvm/test/CodeGen/AMDGPU/save-fp.ll @@ -11,14 +11,16 @@ bb: ; GCN-LABEL: {{^}}caller: -; GCN: v_writelane_b32 v2, s33, 2 -; GCN: s_mov_b32 s33, s32 +; GFX900: s_mov_b32 [[SAVED_FP:s[0-9]+]], s33 +; GFX900: s_mov_b32 s33, s32 +; GFX908-NOT: s_mov_b32 s33, s32 ; GFX900: buffer_store_dword +; GFX908-DAG: s_mov_b32 [[SAVED_FP:s[0-9]+]], s33 ; GFX908-DAG: v_accvgpr_write_b32 ; GCN: s_swappc_b64 ; GFX900: buffer_load_dword ; GFX908: v_accvgpr_read_b32 -; GCN: v_readlane_b32 s33, v2, 2 +; GCN: s_mov_b32 s33, [[SAVED_FP]] define i64 @caller() { bb: call void asm sideeffect "", "~{v40}" () diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll index 054d4ef3651e..fccfb8fe2f21 100644 --- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll @@ -201,18 +201,18 @@ entry: ; GCN: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1 ; GCN-NEXT: buffer_store_dword [[CSRV:v[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec -; GCN: v_writelane_b32 [[CSRV]], s33, 2 +; GCN: s_mov_b32 s33, s32 ; GCN-DAG: s_addk_i32 s32, 0x400 +; GCN-DAG: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill +; GCN-DAG: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill +; GCN-DAG: v_writelane_b32 [[CSRV]], s34, 0 +; GCN-DAG: v_writelane_b32 [[CSRV]], s35, 1 + ; GCN-DAG: s_getpc_b64 s[4:5] ; GCN-DAG: s_add_u32 s4, s4, i32_fastcc_i32_i32@gotpcrel32@lo+4 ; GCN-DAG: s_addc_u32 s5, s5, i32_fastcc_i32_i32@gotpcrel32@hi+12 -; GCN-DAG: v_writelane_b32 [[CSRV]], s30, 0 -; GCN-DAG: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill -; GCN-DAG: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-DAG: v_writelane_b32 [[CSRV]], s31, 1 - ; GCN: s_swappc_b64 @@ -223,8 +223,8 @@ entry: ; GCN-NEXT: s_add_u32 s4, s4, sibling_call_i32_fastcc_i32_i32@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s5, s5, sibling_call_i32_fastcc_i32_i32@rel32@hi+12 -; GCN-DAG: v_readlane_b32 s30, [[CSRV]], 0 -; GCN-DAG: v_readlane_b32 s31, [[CSRV]], 1 +; GCN-DAG: v_readlane_b32 s34, [[CSRV]], 0 +; GCN-DAG: v_readlane_b32 s35, [[CSRV]], 1 ; GCN: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir index d7e368f4d6d9..23714f0323ef 100644 --- a/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir +++ b/llvm/test/CodeGen/AMDGPU/splitkit-copy-bundle.mir @@ -89,11 +89,11 @@ body: | ; VR: renamable $sgpr37 = S_MOV_B32 -1 ; VR: renamable $sgpr36 = S_MOV_B32 -1 ; VR: renamable $sgpr68 = S_MOV_B32 0 - ; VR: renamable $sgpr30_sgpr31 = IMPLICIT_DEF ; VR: renamable $sgpr34_sgpr35 = IMPLICIT_DEF + ; VR: renamable $sgpr66_sgpr67 = IMPLICIT_DEF ; VR: bb.1: ; VR: successors: %bb.2(0x80000000) - ; VR: liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x000000000000000F, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0000000000000003, $sgpr30_sgpr31, $sgpr34_sgpr35 + ; VR: liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x000000000000000F, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0000000000000003, $sgpr34_sgpr35, $sgpr66_sgpr67 ; VR: renamable $sgpr38 = COPY renamable $sgpr36 ; VR: renamable $sgpr39 = COPY renamable $sgpr37 ; VR: renamable $sgpr40 = COPY renamable $sgpr36 @@ -155,8 +155,8 @@ body: | ; VR: renamable $sgpr99 = COPY renamable $sgpr68 ; VR: bb.2: ; VR: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; VR: liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x000000000000000F, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0000000000000003, $sgpr30_sgpr31, $sgpr34_sgpr35 - ; VR: S_NOP 0, csr_amdgpu_highregs, implicit renamable $sgpr30_sgpr31, implicit renamable $sgpr34_sgpr35 + ; VR: liveins: $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67:0x000000000000000F, $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99:0x0000000000000003, $sgpr34_sgpr35, $sgpr66_sgpr67 + ; VR: S_NOP 0, csr_amdgpu_highregs, implicit renamable $sgpr34_sgpr35, implicit renamable $sgpr66_sgpr67 ; VR: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc ; VR: S_BRANCH %bb.2 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign.ll b/llvm/test/CodeGen/AMDGPU/stack-realign.ll index 37df92e9700a..c7031a892150 100644 --- a/llvm/test/CodeGen/AMDGPU/stack-realign.ll +++ b/llvm/test/CodeGen/AMDGPU/stack-realign.ll @@ -171,15 +171,12 @@ define void @func_call_align1024_bp_gets_vgpr_spill(<32 x i32> %a, i32 %b) #0 { ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32 ; GCN: s_swappc_b64 s[30:31], -; GCN: v_readlane_b32 s31, [[VGPR_REG]], 1 -; GCN: v_readlane_b32 s30, [[VGPR_REG]], 0 ; GCN: s_add_i32 s32, s32, 0xfffd0000 ; GCN-NEXT: v_readlane_b32 s33, [[VGPR_REG]], 2 ; GCN-NEXT: v_readlane_b32 s34, [[VGPR_REG]], 3 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword [[VGPR_REG]], off, s[0:3], s32 offset:1028 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] -; GCN: s_setpc_b64 s[30:31] +; GCN-NEXT: s_mov_b64 exec, s[6:7] %temp = alloca i32, align 1024, addrspace(5) store volatile i32 0, i32 addrspace(5)* %temp, align 1024 call void @extern_func(<32 x i32> %a, i32 %b) diff --git a/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll index bece8aab1e3d..884b56e7a530 100644 --- a/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll @@ -186,13 +186,13 @@ define <4 x half> @v_constained_fadd_v4f16_fpexcept_strict(<4 x half> %x, <4 x h ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff -; GFX10-NEXT: v_add_f16_e32 v5, v0, v2 +; GFX10-NEXT: v_add_f16_e32 v4, v0, v2 +; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff ; GFX10-NEXT: v_add_f16_e32 v6, v1, v3 ; GFX10-NEXT: v_add_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10-NEXT: v_add_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX10-NEXT: v_and_b32_e32 v2, v4, v5 -; GFX10-NEXT: v_and_b32_e32 v3, v4, v6 +; GFX10-NEXT: v_and_b32_e32 v2, v5, v4 +; GFX10-NEXT: v_and_b32_e32 v3, v5, v6 ; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v2 ; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll index 00aa96fea128..34c5e908ac31 100644 --- a/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll @@ -186,13 +186,13 @@ define <4 x half> @v_constained_fmul_v4f16_fpexcept_strict(<4 x half> %x, <4 x h ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff -; GFX10-NEXT: v_mul_f16_e32 v5, v0, v2 +; GFX10-NEXT: v_mul_f16_e32 v4, v0, v2 +; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff ; GFX10-NEXT: v_mul_f16_e32 v6, v1, v3 ; GFX10-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX10-NEXT: v_and_b32_e32 v2, v4, v5 -; GFX10-NEXT: v_and_b32_e32 v3, v4, v6 +; GFX10-NEXT: v_and_b32_e32 v2, v5, v4 +; GFX10-NEXT: v_and_b32_e32 v3, v5, v6 ; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v2 ; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll index 802c08fe64a2..4cf75f68a7a1 100644 --- a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll @@ -206,13 +206,13 @@ define <4 x half> @v_constained_fsub_v4f16_fpexcept_strict(<4 x half> %x, <4 x h ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff -; GFX10-NEXT: v_sub_f16_e32 v5, v0, v2 +; GFX10-NEXT: v_sub_f16_e32 v4, v0, v2 +; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff ; GFX10-NEXT: v_sub_f16_e32 v6, v1, v3 ; GFX10-NEXT: v_sub_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX10-NEXT: v_sub_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 -; GFX10-NEXT: v_and_b32_e32 v2, v4, v5 -; GFX10-NEXT: v_and_b32_e32 v3, v4, v6 +; GFX10-NEXT: v_and_b32_e32 v2, v5, v4 +; GFX10-NEXT: v_and_b32_e32 v3, v5, v6 ; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v2 ; GFX10-NEXT: v_lshl_or_b32 v1, v1, 16, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll b/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll index fba11bf0124e..4894eeca5825 100644 --- a/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll +++ b/llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll @@ -20,28 +20,25 @@ define amdgpu_gfx float @caller(float %arg0) { ; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[34:35] -; GCN-NEXT: v_writelane_b32 v1, s33, 3 -; GCN-NEXT: v_writelane_b32 v1, s4, 0 +; GCN-NEXT: v_writelane_b32 v1, s33, 1 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x400 -; GCN-NEXT: v_writelane_b32 v1, s30, 1 +; GCN-NEXT: v_writelane_b32 v1, s4, 0 ; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0 ; GCN-NEXT: s_mov_b32 s4, 2.0 -; GCN-NEXT: v_writelane_b32 v1, s31, 2 -; GCN-NEXT: s_getpc_b64 s[34:35] -; GCN-NEXT: s_add_u32 s34, s34, callee@rel32@lo+4 -; GCN-NEXT: s_addc_u32 s35, s35, callee@rel32@hi+12 -; GCN-NEXT: s_swappc_b64 s[30:31], s[34:35] -; GCN-NEXT: v_readlane_b32 s31, v1, 2 -; GCN-NEXT: v_readlane_b32 s30, v1, 1 +; GCN-NEXT: s_mov_b64 s[36:37], s[30:31] +; GCN-NEXT: s_getpc_b64 s[30:31] +; GCN-NEXT: s_add_u32 s30, s30, callee@rel32@lo+4 +; GCN-NEXT: s_addc_u32 s31, s31, callee@rel32@hi+12 +; GCN-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GCN-NEXT: v_readlane_b32 s4, v1, 0 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 -; GCN-NEXT: v_readlane_b32 s33, v1, 3 -; GCN-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GCN-NEXT: v_readlane_b32 s33, v1, 1 +; GCN-NEXT: s_or_saveexec_b64 s[30:31], -1 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[34:35] +; GCN-NEXT: s_mov_b64 exec, s[30:31] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN-NEXT: s_setpc_b64 s[36:37] %add = fadd float %arg0, 1.0 %call = tail call amdgpu_gfx float @callee(float %add, float inreg 2.0) ret float %call diff --git a/llvm/test/CodeGen/AMDGPU/udiv.ll b/llvm/test/CodeGen/AMDGPU/udiv.ll index b991d6cd5149..d678b64f54b6 100644 --- a/llvm/test/CodeGen/AMDGPU/udiv.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv.ll @@ -2500,7 +2500,7 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) { ; SI-NEXT: v_madak_f32 v2, 0, v2, 0x47c35000 ; SI-NEXT: v_rcp_f32_e32 v2, v2 ; SI-NEXT: s_mov_b32 s4, 0xfffe7960 -; SI-NEXT: v_mov_b32_e32 v8, 0 +; SI-NEXT: v_mov_b32_e32 v9, 0 ; SI-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; SI-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; SI-NEXT: v_trunc_f32_e32 v3, v3 @@ -2508,22 +2508,22 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) { ; SI-NEXT: v_cvt_u32_f32_e32 v2, v2 ; SI-NEXT: v_cvt_u32_f32_e32 v3, v3 ; SI-NEXT: v_mul_hi_u32 v4, v2, s4 -; SI-NEXT: v_mul_lo_u32 v6, v3, s4 -; SI-NEXT: v_mul_lo_u32 v5, v2, s4 +; SI-NEXT: v_mul_lo_u32 v5, v3, s4 +; SI-NEXT: v_mul_lo_u32 v6, v2, s4 ; SI-NEXT: v_subrev_i32_e32 v4, vcc, v2, v4 -; SI-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; SI-NEXT: v_mul_hi_u32 v7, v2, v5 -; SI-NEXT: v_mul_lo_u32 v6, v2, v4 -; SI-NEXT: v_mul_hi_u32 v9, v2, v4 +; SI-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; SI-NEXT: v_mul_lo_u32 v5, v2, v4 +; SI-NEXT: v_mul_hi_u32 v7, v2, v6 +; SI-NEXT: v_mul_hi_u32 v8, v2, v4 ; SI-NEXT: v_mul_hi_u32 v10, v3, v4 ; SI-NEXT: v_mul_lo_u32 v4, v3, v4 -; SI-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; SI-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc -; SI-NEXT: v_mul_lo_u32 v9, v3, v5 -; SI-NEXT: v_mul_hi_u32 v5, v3, v5 -; SI-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; SI-NEXT: v_addc_u32_e32 v5, vcc, v7, v5, vcc -; SI-NEXT: v_addc_u32_e32 v6, vcc, v10, v8, vcc +; SI-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; SI-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc +; SI-NEXT: v_mul_lo_u32 v8, v3, v6 +; SI-NEXT: v_mul_hi_u32 v6, v3, v6 +; SI-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; SI-NEXT: v_addc_u32_e32 v5, vcc, v7, v6, vcc +; SI-NEXT: v_addc_u32_e32 v6, vcc, v10, v9, vcc ; SI-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; SI-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; SI-NEXT: v_add_i32_e32 v2, vcc, v2, v4 @@ -2536,16 +2536,16 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) { ; SI-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; SI-NEXT: v_mul_lo_u32 v5, v2, v4 ; SI-NEXT: v_mul_hi_u32 v7, v2, v6 -; SI-NEXT: v_mul_hi_u32 v9, v2, v4 +; SI-NEXT: v_mul_hi_u32 v8, v2, v4 ; SI-NEXT: v_mul_hi_u32 v10, v3, v4 ; SI-NEXT: v_mul_lo_u32 v4, v3, v4 ; SI-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; SI-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc -; SI-NEXT: v_mul_lo_u32 v9, v3, v6 +; SI-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc +; SI-NEXT: v_mul_lo_u32 v8, v3, v6 ; SI-NEXT: v_mul_hi_u32 v6, v3, v6 -; SI-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; SI-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; SI-NEXT: v_addc_u32_e32 v5, vcc, v7, v6, vcc -; SI-NEXT: v_addc_u32_e32 v6, vcc, v10, v8, vcc +; SI-NEXT: v_addc_u32_e32 v6, vcc, v10, v9, vcc ; SI-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; SI-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; SI-NEXT: v_add_i32_e32 v2, vcc, v2, v4 @@ -2561,7 +2561,7 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) { ; SI-NEXT: v_mul_hi_u32 v2, v1, v2 ; SI-NEXT: v_add_i32_e32 v4, vcc, v4, v6 ; SI-NEXT: v_addc_u32_e32 v2, vcc, v5, v2, vcc -; SI-NEXT: v_addc_u32_e32 v4, vcc, v7, v8, vcc +; SI-NEXT: v_addc_u32_e32 v4, vcc, v7, v9, vcc ; SI-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; SI-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; SI-NEXT: v_mul_lo_u32 v4, v3, s4 @@ -2610,15 +2610,15 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) { ; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0 ; VI-NEXT: v_mul_lo_u32 v4, v7, s6 ; VI-NEXT: v_subrev_u32_e32 v3, vcc, v6, v3 -; VI-NEXT: v_add_u32_e32 v8, vcc, v4, v3 -; VI-NEXT: v_mul_hi_u32 v5, v6, v2 -; VI-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v8, 0 -; VI-NEXT: v_add_u32_e32 v10, vcc, v5, v3 +; VI-NEXT: v_add_u32_e32 v5, vcc, v4, v3 +; VI-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0 +; VI-NEXT: v_mul_hi_u32 v8, v6, v2 +; VI-NEXT: v_add_u32_e32 v8, vcc, v8, v3 ; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v2, 0 -; VI-NEXT: v_addc_u32_e32 v11, vcc, 0, v4, vcc -; VI-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v8, 0 -; VI-NEXT: v_add_u32_e32 v2, vcc, v10, v2 -; VI-NEXT: v_addc_u32_e32 v2, vcc, v11, v3, vcc +; VI-NEXT: v_addc_u32_e32 v10, vcc, 0, v4, vcc +; VI-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v5, 0 +; VI-NEXT: v_add_u32_e32 v2, vcc, v8, v2 +; VI-NEXT: v_addc_u32_e32 v2, vcc, v10, v3, vcc ; VI-NEXT: v_addc_u32_e32 v3, vcc, v5, v9, vcc ; VI-NEXT: v_add_u32_e32 v2, vcc, v2, v4 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc @@ -2698,15 +2698,15 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) { ; GCN-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0 ; GCN-NEXT: v_mul_lo_u32 v4, v7, s6 ; GCN-NEXT: v_subrev_u32_e32 v3, vcc, v6, v3 -; GCN-NEXT: v_add_u32_e32 v8, vcc, v4, v3 -; GCN-NEXT: v_mul_hi_u32 v5, v6, v2 -; GCN-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v8, 0 -; GCN-NEXT: v_add_u32_e32 v10, vcc, v5, v3 +; GCN-NEXT: v_add_u32_e32 v5, vcc, v4, v3 +; GCN-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0 +; GCN-NEXT: v_mul_hi_u32 v8, v6, v2 +; GCN-NEXT: v_add_u32_e32 v8, vcc, v8, v3 ; GCN-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v2, 0 -; GCN-NEXT: v_addc_u32_e32 v11, vcc, 0, v4, vcc -; GCN-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v8, 0 -; GCN-NEXT: v_add_u32_e32 v2, vcc, v10, v2 -; GCN-NEXT: v_addc_u32_e32 v2, vcc, v11, v3, vcc +; GCN-NEXT: v_addc_u32_e32 v10, vcc, 0, v4, vcc +; GCN-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v5, 0 +; GCN-NEXT: v_add_u32_e32 v2, vcc, v8, v2 +; GCN-NEXT: v_addc_u32_e32 v2, vcc, v10, v3, vcc ; GCN-NEXT: v_addc_u32_e32 v3, vcc, v5, v9, vcc ; GCN-NEXT: v_add_u32_e32 v2, vcc, v2, v4 ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll index 1f9b7437e8ba..c124446beed9 100644 --- a/llvm/test/CodeGen/AMDGPU/udiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll @@ -230,10 +230,10 @@ define i64 @v_test_udiv_i64(i64 %x, i64 %y) { ; GCN-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 ; GCN-NEXT: v_cvt_u32_f32_e32 v5, v5 ; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GCN-NEXT: v_mul_lo_u32 v8, v6, v5 -; GCN-NEXT: v_mul_hi_u32 v9, v6, v4 +; GCN-NEXT: v_mul_lo_u32 v9, v6, v5 +; GCN-NEXT: v_mul_hi_u32 v8, v6, v4 ; GCN-NEXT: v_mul_lo_u32 v10, v7, v4 -; GCN-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; GCN-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; GCN-NEXT: v_mul_lo_u32 v9, v6, v4 ; GCN-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; GCN-NEXT: v_mul_lo_u32 v10, v4, v8 @@ -1558,7 +1558,7 @@ define i64 @v_test_udiv_k_den_i64(i64 %x) { ; GCN-NEXT: v_madak_f32 v2, 0, v2, 0x41c00000 ; GCN-NEXT: v_rcp_f32_e32 v2, v2 ; GCN-NEXT: s_movk_i32 s4, 0xffe8 -; GCN-NEXT: v_mov_b32_e32 v8, 0 +; GCN-NEXT: v_mov_b32_e32 v9, 0 ; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; GCN-NEXT: v_trunc_f32_e32 v3, v3 @@ -1566,22 +1566,22 @@ define i64 @v_test_udiv_k_den_i64(i64 %x) { ; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2 ; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3 ; GCN-NEXT: v_mul_hi_u32 v4, v2, s4 -; GCN-NEXT: v_mul_lo_u32 v6, v3, s4 -; GCN-NEXT: v_mul_lo_u32 v5, v2, s4 +; GCN-NEXT: v_mul_lo_u32 v5, v3, s4 +; GCN-NEXT: v_mul_lo_u32 v6, v2, s4 ; GCN-NEXT: v_subrev_i32_e32 v4, vcc, v2, v4 -; GCN-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; GCN-NEXT: v_mul_hi_u32 v7, v2, v5 -; GCN-NEXT: v_mul_lo_u32 v6, v2, v4 -; GCN-NEXT: v_mul_hi_u32 v9, v2, v4 +; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; GCN-NEXT: v_mul_lo_u32 v5, v2, v4 +; GCN-NEXT: v_mul_hi_u32 v7, v2, v6 +; GCN-NEXT: v_mul_hi_u32 v8, v2, v4 ; GCN-NEXT: v_mul_hi_u32 v10, v3, v4 ; GCN-NEXT: v_mul_lo_u32 v4, v3, v4 -; GCN-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc -; GCN-NEXT: v_mul_lo_u32 v9, v3, v5 -; GCN-NEXT: v_mul_hi_u32 v5, v3, v5 -; GCN-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; GCN-NEXT: v_addc_u32_e32 v5, vcc, v7, v5, vcc -; GCN-NEXT: v_addc_u32_e32 v6, vcc, v10, v8, vcc +; GCN-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc +; GCN-NEXT: v_mul_lo_u32 v8, v3, v6 +; GCN-NEXT: v_mul_hi_u32 v6, v3, v6 +; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GCN-NEXT: v_addc_u32_e32 v5, vcc, v7, v6, vcc +; GCN-NEXT: v_addc_u32_e32 v6, vcc, v10, v9, vcc ; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 @@ -1593,16 +1593,16 @@ define i64 @v_test_udiv_k_den_i64(i64 %x) { ; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; GCN-NEXT: v_mul_lo_u32 v5, v2, v4 ; GCN-NEXT: v_mul_hi_u32 v7, v2, v6 -; GCN-NEXT: v_mul_hi_u32 v9, v2, v4 +; GCN-NEXT: v_mul_hi_u32 v8, v2, v4 ; GCN-NEXT: v_mul_hi_u32 v10, v3, v4 ; GCN-NEXT: v_mul_lo_u32 v4, v3, v4 ; GCN-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc -; GCN-NEXT: v_mul_lo_u32 v9, v3, v6 +; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc +; GCN-NEXT: v_mul_lo_u32 v8, v3, v6 ; GCN-NEXT: v_mul_hi_u32 v6, v3, v6 -; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v7, v6, vcc -; GCN-NEXT: v_addc_u32_e32 v6, vcc, v10, v8, vcc +; GCN-NEXT: v_addc_u32_e32 v6, vcc, v10, v9, vcc ; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 @@ -1618,7 +1618,7 @@ define i64 @v_test_udiv_k_den_i64(i64 %x) { ; GCN-NEXT: v_mul_hi_u32 v2, v1, v2 ; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v6 ; GCN-NEXT: v_addc_u32_e32 v2, vcc, v5, v2, vcc -; GCN-NEXT: v_addc_u32_e32 v4, vcc, v7, v8, vcc +; GCN-NEXT: v_addc_u32_e32 v4, vcc, v7, v9, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; GCN-NEXT: v_mul_lo_u32 v4, v3, 24 diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll index f20504019f1f..347b84513a53 100644 --- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll +++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll @@ -51,15 +51,15 @@ define hidden void @widget() { ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: .LBB0_7: ; %UnifiedReturnBlock -; GCN-NEXT: v_readlane_b32 s31, v40, 1 -; GCN-NEXT: v_readlane_b32 s30, v40, 0 +; GCN-NEXT: v_readlane_b32 s4, v40, 0 +; GCN-NEXT: v_readlane_b32 s5, v40, 1 ; GCN-NEXT: s_addk_i32 s32, 0xfc00 ; GCN-NEXT: v_readlane_b32 s33, v40, 2 -; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 +; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[4:5] +; GCN-NEXT: s_mov_b64 exec, s[6:7] ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: s_setpc_b64 s[30:31] +; GCN-NEXT: s_setpc_b64 s[4:5] ; SI-OPT-LABEL: @widget( ; SI-OPT-NEXT: bb: ; SI-OPT-NEXT: [[TMP:%.*]] = load i32, i32 addrspace(1)* null, align 16 @@ -188,7 +188,7 @@ define hidden void @blam() { ; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: v_writelane_b32 v40, s33, 17 +; GCN-NEXT: v_writelane_b32 v40, s33, 15 ; GCN-NEXT: s_mov_b32 s33, s32 ; GCN-NEXT: s_addk_i32 s32, 0x800 ; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill @@ -196,23 +196,21 @@ define hidden void @blam() { ; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 ; 4-byte Folded Spill -; GCN-NEXT: v_writelane_b32 v40, s30, 0 -; GCN-NEXT: v_writelane_b32 v40, s31, 1 -; GCN-NEXT: v_writelane_b32 v40, s34, 2 -; GCN-NEXT: v_writelane_b32 v40, s35, 3 -; GCN-NEXT: v_writelane_b32 v40, s36, 4 -; GCN-NEXT: v_writelane_b32 v40, s37, 5 -; GCN-NEXT: v_writelane_b32 v40, s38, 6 -; GCN-NEXT: v_writelane_b32 v40, s39, 7 -; GCN-NEXT: v_writelane_b32 v40, s40, 8 -; GCN-NEXT: v_writelane_b32 v40, s41, 9 -; GCN-NEXT: v_writelane_b32 v40, s42, 10 -; GCN-NEXT: v_writelane_b32 v40, s43, 11 -; GCN-NEXT: v_writelane_b32 v40, s44, 12 -; GCN-NEXT: v_writelane_b32 v40, s45, 13 -; GCN-NEXT: v_writelane_b32 v40, s46, 14 -; GCN-NEXT: v_writelane_b32 v40, s48, 15 -; GCN-NEXT: v_writelane_b32 v40, s49, 16 +; GCN-NEXT: v_writelane_b32 v40, s34, 0 +; GCN-NEXT: v_writelane_b32 v40, s35, 1 +; GCN-NEXT: v_writelane_b32 v40, s36, 2 +; GCN-NEXT: v_writelane_b32 v40, s37, 3 +; GCN-NEXT: v_writelane_b32 v40, s38, 4 +; GCN-NEXT: v_writelane_b32 v40, s39, 5 +; GCN-NEXT: v_writelane_b32 v40, s40, 6 +; GCN-NEXT: v_writelane_b32 v40, s41, 7 +; GCN-NEXT: v_writelane_b32 v40, s42, 8 +; GCN-NEXT: v_writelane_b32 v40, s43, 9 +; GCN-NEXT: v_writelane_b32 v40, s44, 10 +; GCN-NEXT: v_writelane_b32 v40, s45, 11 +; GCN-NEXT: v_writelane_b32 v40, s46, 12 +; GCN-NEXT: v_writelane_b32 v40, s48, 13 +; GCN-NEXT: v_writelane_b32 v40, s49, 14 ; GCN-NEXT: v_mov_b32_e32 v41, v31 ; GCN-NEXT: s_mov_b32 s44, s14 ; GCN-NEXT: s_mov_b32 s45, s13 diff --git a/llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll index ba3d120be9ba..79723603f1c1 100644 --- a/llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/AMDGPU/urem-seteq-illegal-types.ll @@ -76,21 +76,22 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { ; CHECK-NEXT: s_mov_b32 s5, 0x8311eb33 ; CHECK-NEXT: s_mov_b32 s6, 0x20140c ; CHECK-NEXT: s_mov_b32 s7, 0xb6db6db7 -; CHECK-NEXT: s_mov_b32 s8, 0x24924924 -; CHECK-NEXT: s_mov_b32 s9, 0xaaaaaaab -; CHECK-NEXT: s_mov_b32 s10, 0x2aaaaaaa +; CHECK-NEXT: s_mov_b32 s8, 0x49249249 +; CHECK-NEXT: s_mov_b32 s9, 0x24924924 +; CHECK-NEXT: s_mov_b32 s10, 0xaaaaaaab +; CHECK-NEXT: s_mov_b32 s11, 0x2aaaaaaa ; CHECK-NEXT: v_and_b32_e32 v0, s4, v0 ; CHECK-NEXT: v_and_b32_e32 v1, s4, v1 ; CHECK-NEXT: v_and_b32_e32 v2, s4, v2 ; CHECK-NEXT: v_mul_lo_u32 v2, v2, s5 ; CHECK-NEXT: v_mul_lo_u32 v1, v1, s7 -; CHECK-NEXT: v_mul_lo_u32 v0, v0, s9 +; CHECK-NEXT: v_mul_lo_u32 v0, v0, s10 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, 0xf9dc299a, v2 -; CHECK-NEXT: v_add_i32_e32 v1, vcc, 0x49249249, v1 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, s8, v1 ; CHECK-NEXT: v_alignbit_b32 v0, v0, v0, 1 -; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s10, v0 +; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s11, v0 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s8, v1 +; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s9, v1 ; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc ; CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s6, v2 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll index b14825871cea..00bae7afa10f 100644 --- a/llvm/test/CodeGen/AMDGPU/urem64.ll +++ b/llvm/test/CodeGen/AMDGPU/urem64.ll @@ -240,10 +240,10 @@ define i64 @v_test_urem_i64(i64 %x, i64 %y) { ; GCN-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 ; GCN-NEXT: v_cvt_u32_f32_e32 v5, v5 ; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GCN-NEXT: v_mul_lo_u32 v8, v6, v5 -; GCN-NEXT: v_mul_hi_u32 v9, v6, v4 +; GCN-NEXT: v_mul_lo_u32 v9, v6, v5 +; GCN-NEXT: v_mul_hi_u32 v8, v6, v4 ; GCN-NEXT: v_mul_lo_u32 v10, v7, v4 -; GCN-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; GCN-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; GCN-NEXT: v_mul_lo_u32 v9, v6, v4 ; GCN-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; GCN-NEXT: v_mul_lo_u32 v10, v4, v8 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll index 46ee737d3125..9b4bc71eed41 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll @@ -29,8 +29,7 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9: v_writelane_b32 v40, s30, 0 -; GFX9: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 ; GFX9: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -38,10 +37,8 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload -; GFX9: v_readlane_b32 s31, v40, 1 -; GFX9: v_readlane_b32 s30, v40, 0 ; GFX9: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX9: s_setpc_b64 s[30:31] +; GFX9: s_setpc_b64 s[4:5] ; ; GFX10-LABEL: non_preserved_vgpr_tuple8: ; GFX10: buffer_store_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill @@ -66,9 +63,7 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 -; GFX10: v_writelane_b32 v40, s30, 0 ; GFX10: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX10: v_writelane_b32 v40, s31, 1 ; GFX10: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -77,10 +72,8 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:12 -; GFX10: v_readlane_b32 s31, v40, 1 -; GFX10: v_readlane_b32 s30, v40, 0 ; GFX10: buffer_load_dword v40, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload -; GFX10: s_setpc_b64 s[30:31] +; GFX10: s_setpc_b64 s[4:5] main_body: call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0 call void asm sideeffect "", "~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15}"() #0 @@ -99,8 +92,6 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-LABEL: call_preserved_vgpr_tuple8: ; GFX9: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9: v_writelane_b32 v40, s30, 0 -; GFX9: v_writelane_b32 v40, s31, 1 ; GFX9: buffer_store_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill ; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill @@ -119,7 +110,7 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 -; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9: s_waitcnt vmcnt(0) ; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5] @@ -131,10 +122,8 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload ; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload -; GFX9: v_readlane_b32 s31, v40, 1 -; GFX9-NEXT: v_readlane_b32 s30, v40, 0 ; GFX9: buffer_load_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GFX9: s_setpc_b64 s[30:31] +; GFX9: s_setpc_b64 s[4:5] ; ; GFX10-LABEL: call_preserved_vgpr_tuple8: ; GFX10: buffer_store_dword v40, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill @@ -144,20 +133,15 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill ; GFX10-NEXT: buffer_store_dword v45, off, s[0:3], s33 ; 4-byte Folded Spill -; GFX10: v_writelane_b32 v40, s30, 0 -; GFX10: v_mov_b32_e32 v41, v16 -; GFX10: v_mov_b32_e32 v42, v15 -; GFX10: v_mov_b32_e32 v43, v14 -; GFX10: v_mov_b32_e32 v44, v13 -; GFX10: v_writelane_b32 v40, s31, 1 -; GFX10: v_mov_b32_e32 v45, v12 ; GFX10: image_gather4_c_b_cl v[0:3], v[12:16], s[36:43], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_getpc_b64 s[4:5] ; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4 ; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12 +; GFX10-NEXT: v_writelane_b32 v40, s30, 8 ; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0 +; GFX10-NEXT: v_writelane_b32 v40, s31, 9 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -169,10 +153,8 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp, ; GFX10-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:8 ; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:12 ; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:16 -; GFX10: v_readlane_b32 s31, v40, 1 -; GFX10: v_readlane_b32 s30, v40, 0 ; GFX10: buffer_load_dword v40, off, s[0:3], s32 offset:20 -; GFX10: s_setpc_b64 s[30:31] +; GFX10: s_setpc_b64 s[4:5] main_body: %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0) store <4 x float> %v, <4 x float> addrspace(1)* undef diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index eecff44dec3c..fcdbeea76fa2 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -1136,8 +1136,8 @@ declare void @external_void_func_void() #1 ; GCN-DAG: v_writelane_b32 v40, s30, 0 ; GCN-DAG: v_writelane_b32 v40, s31, 1 ; GCN: s_swappc_b64 -; GCN-DAG: v_readlane_b32 s30, v40, 0 -; GCN-DAG: v_readlane_b32 s31, v40, 1 +; GCN-DAG: v_readlane_b32 s4, v40, 0 +; GCN-DAG: v_readlane_b32 s5, v40, 1 ; GFX1064: s_addk_i32 s32, 0xfc00 diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll index c01485cc3e8e..6a079bc4e52d 100644 --- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll +++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll @@ -138,6 +138,8 @@ define amdgpu_gfx void @strict_wwm_cfg(<4 x i32> inreg %tmp14, i32 %arg) { ; GFX9-O0-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O0-NEXT: buffer_store_dword v2, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-O0-NEXT: v_writelane_b32 v5, s30, 0 +; GFX9-O0-NEXT: v_writelane_b32 v5, s31, 1 ; GFX9-O0-NEXT: s_mov_b32 s36, s4 ; GFX9-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 def $sgpr36_sgpr37_sgpr38_sgpr39 ; GFX9-O0-NEXT: s_mov_b32 s37, s5 @@ -145,37 +147,37 @@ define amdgpu_gfx void @strict_wwm_cfg(<4 x i32> inreg %tmp14, i32 %arg) { ; GFX9-O0-NEXT: s_mov_b32 s39, s7 ; GFX9-O0-NEXT: s_mov_b64 s[42:43], s[38:39] ; GFX9-O0-NEXT: s_mov_b64 s[40:41], s[36:37] -; GFX9-O0-NEXT: v_writelane_b32 v5, s40, 0 -; GFX9-O0-NEXT: v_writelane_b32 v5, s41, 1 -; GFX9-O0-NEXT: v_writelane_b32 v5, s42, 2 -; GFX9-O0-NEXT: v_writelane_b32 v5, s43, 3 -; GFX9-O0-NEXT: s_mov_b32 s34, 0 -; GFX9-O0-NEXT: buffer_load_dwordx2 v[3:4], off, s[36:39], s34 +; GFX9-O0-NEXT: v_writelane_b32 v5, s40, 2 +; GFX9-O0-NEXT: v_writelane_b32 v5, s41, 3 +; GFX9-O0-NEXT: v_writelane_b32 v5, s42, 4 +; GFX9-O0-NEXT: v_writelane_b32 v5, s43, 5 +; GFX9-O0-NEXT: s_mov_b32 s30, 0 +; GFX9-O0-NEXT: buffer_load_dwordx2 v[3:4], off, s[36:39], s30 ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill -; GFX9-O0-NEXT: ; implicit-def: $sgpr36_sgpr37 +; GFX9-O0-NEXT: ; implicit-def: $sgpr34_sgpr35 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s34 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s30 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: s_or_saveexec_b64 s[36:37], -1 -; GFX9-O0-NEXT: v_mov_b32_e32 v2, s34 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s30 ; GFX9-O0-NEXT: s_nop 1 ; GFX9-O0-NEXT: v_mov_b32_dpp v2, v1 row_bcast:31 row_mask:0xc bank_mask:0xf ; GFX9-O0-NEXT: v_add_u32_e64 v1, v1, v2 -; GFX9-O0-NEXT: s_mov_b64 exec, s[36:37] +; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 ; GFX9-O0-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill -; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[36:37], v0, s34 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, s34 +; GFX9-O0-NEXT: v_cmp_eq_u32_e64 s[34:35], v0, s30 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s30 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill -; GFX9-O0-NEXT: s_mov_b64 s[34:35], exec -; GFX9-O0-NEXT: v_writelane_b32 v5, s34, 4 -; GFX9-O0-NEXT: v_writelane_b32 v5, s35, 5 -; GFX9-O0-NEXT: s_and_b64 s[34:35], s[34:35], s[36:37] -; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-O0-NEXT: s_mov_b64 s[30:31], exec +; GFX9-O0-NEXT: v_writelane_b32 v5, s30, 6 +; GFX9-O0-NEXT: v_writelane_b32 v5, s31, 7 +; GFX9-O0-NEXT: s_and_b64 s[30:31], s[30:31], s[34:35] +; GFX9-O0-NEXT: s_mov_b64 exec, s[30:31] ; GFX9-O0-NEXT: s_cbranch_execz .LBB1_2 ; GFX9-O0-NEXT: ; %bb.1: ; %if ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload @@ -183,27 +185,29 @@ define amdgpu_gfx void @strict_wwm_cfg(<4 x i32> inreg %tmp14, i32 %arg) { ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v4 -; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[30:31], -1 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-O0-NEXT: s_mov_b64 exec, s[30:31] ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 ; GFX9-O0-NEXT: s_not_b64 exec, exec ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v1 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[30:31], -1 ; GFX9-O0-NEXT: v_mov_b32_dpp v1, v2 row_bcast:31 row_mask:0xc bank_mask:0xf ; GFX9-O0-NEXT: v_add_u32_e64 v1, v2, v1 -; GFX9-O0-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-O0-NEXT: s_mov_b64 exec, s[30:31] ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O0-NEXT: .LBB1_2: ; %merge -; GFX9-O0-NEXT: v_readlane_b32 s34, v5, 4 -; GFX9-O0-NEXT: v_readlane_b32 s35, v5, 5 +; GFX9-O0-NEXT: v_readlane_b32 s34, v5, 6 +; GFX9-O0-NEXT: v_readlane_b32 s35, v5, 7 ; GFX9-O0-NEXT: s_or_b64 exec, exec, s[34:35] -; GFX9-O0-NEXT: v_readlane_b32 s36, v5, 0 -; GFX9-O0-NEXT: v_readlane_b32 s37, v5, 1 -; GFX9-O0-NEXT: v_readlane_b32 s38, v5, 2 -; GFX9-O0-NEXT: v_readlane_b32 s39, v5, 3 +; GFX9-O0-NEXT: v_readlane_b32 s30, v5, 0 +; GFX9-O0-NEXT: v_readlane_b32 s31, v5, 1 +; GFX9-O0-NEXT: v_readlane_b32 s36, v5, 2 +; GFX9-O0-NEXT: v_readlane_b32 s37, v5, 3 +; GFX9-O0-NEXT: v_readlane_b32 s38, v5, 4 +; GFX9-O0-NEXT: v_readlane_b32 s39, v5, 5 ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload ; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload ; GFX9-O0-NEXT: s_waitcnt vmcnt(0) @@ -357,22 +361,22 @@ define amdgpu_gfx void @strict_wwm_call(<4 x i32> inreg %tmp14, i32 inreg %arg) ; GFX9-O0-NEXT: v_mov_b32_e32 v2, s34 ; GFX9-O0-NEXT: s_not_b64 exec, exec ; GFX9-O0-NEXT: s_or_saveexec_b64 s[40:41], -1 -; GFX9-O0-NEXT: s_getpc_b64 s[42:43] -; GFX9-O0-NEXT: s_add_u32 s42, s42, strict_wwm_called@rel32@lo+4 -; GFX9-O0-NEXT: s_addc_u32 s43, s43, strict_wwm_called@rel32@hi+12 +; GFX9-O0-NEXT: s_getpc_b64 s[30:31] +; GFX9-O0-NEXT: s_add_u32 s30, s30, strict_wwm_called@rel32@lo+4 +; GFX9-O0-NEXT: s_addc_u32 s31, s31, strict_wwm_called@rel32@hi+12 ; GFX9-O0-NEXT: s_mov_b64 s[46:47], s[2:3] ; GFX9-O0-NEXT: s_mov_b64 s[44:45], s[0:1] ; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[44:45] ; GFX9-O0-NEXT: s_mov_b64 s[2:3], s[46:47] ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2 -; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[42:43] +; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[30:31] +; GFX9-O0-NEXT: v_readlane_b32 s30, v3, 0 +; GFX9-O0-NEXT: v_readlane_b32 s31, v3, 1 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-O0-NEXT: v_add_u32_e64 v1, v1, v2 ; GFX9-O0-NEXT: s_mov_b64 exec, s[40:41] ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O0-NEXT: buffer_store_dword v0, off, s[36:39], s34 offset:4 -; GFX9-O0-NEXT: v_readlane_b32 s31, v3, 1 -; GFX9-O0-NEXT: v_readlane_b32 s30, v3, 0 ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffffc00 ; GFX9-O0-NEXT: v_readlane_b32 s33, v3, 2 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -387,41 +391,36 @@ define amdgpu_gfx void @strict_wwm_call(<4 x i32> inreg %tmp14, i32 inreg %arg) ; GFX9-O3: ; %bb.0: ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill ; GFX9-O3-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O3-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-O3-NEXT: v_writelane_b32 v3, s33, 2 -; GFX9-O3-NEXT: v_writelane_b32 v3, s30, 0 +; GFX9-O3-NEXT: s_mov_b32 s38, s33 ; GFX9-O3-NEXT: s_mov_b32 s33, s32 ; GFX9-O3-NEXT: s_addk_i32 s32, 0x400 -; GFX9-O3-NEXT: v_writelane_b32 v3, s31, 1 +; GFX9-O3-NEXT: s_mov_b64 s[36:37], s[30:31] ; GFX9-O3-NEXT: v_mov_b32_e32 v2, s8 ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2 -; GFX9-O3-NEXT: s_getpc_b64 s[36:37] -; GFX9-O3-NEXT: s_add_u32 s36, s36, strict_wwm_called@rel32@lo+4 -; GFX9-O3-NEXT: s_addc_u32 s37, s37, strict_wwm_called@rel32@hi+12 -; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[36:37] +; GFX9-O3-NEXT: s_getpc_b64 s[30:31] +; GFX9-O3-NEXT: s_add_u32 s30, s30, strict_wwm_called@rel32@lo+4 +; GFX9-O3-NEXT: s_addc_u32 s31, s31, strict_wwm_called@rel32@hi+12 +; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-O3-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-O3-NEXT: v_add_u32_e32 v1, v1, v2 ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v1 ; GFX9-O3-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4 -; GFX9-O3-NEXT: v_readlane_b32 s31, v3, 1 -; GFX9-O3-NEXT: v_readlane_b32 s30, v3, 0 ; GFX9-O3-NEXT: s_addk_i32 s32, 0xfc00 -; GFX9-O3-NEXT: v_readlane_b32 s33, v3, 2 -; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; GFX9-O3-NEXT: s_mov_b32 s33, s38 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[30:31], -1 ; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O3-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-O3-NEXT: s_mov_b64 exec, s[30:31] ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) -; GFX9-O3-NEXT: s_setpc_b64 s[30:31] +; GFX9-O3-NEXT: s_setpc_b64 s[36:37] %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0) %tmp134 = call amdgpu_gfx i32 @strict_wwm_called(i32 %tmp107) %tmp136 = add i32 %tmp134, %tmp107 @@ -535,39 +534,39 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xc00 ; GFX9-O0-NEXT: v_writelane_b32 v10, s30, 0 ; GFX9-O0-NEXT: v_writelane_b32 v10, s31, 1 -; GFX9-O0-NEXT: s_mov_b32 s36, s8 -; GFX9-O0-NEXT: s_mov_b32 s40, s4 -; GFX9-O0-NEXT: ; kill: def $sgpr40 killed $sgpr40 def $sgpr40_sgpr41_sgpr42_sgpr43 -; GFX9-O0-NEXT: s_mov_b32 s41, s5 -; GFX9-O0-NEXT: s_mov_b32 s42, s6 -; GFX9-O0-NEXT: s_mov_b32 s43, s7 -; GFX9-O0-NEXT: v_writelane_b32 v10, s40, 2 -; GFX9-O0-NEXT: v_writelane_b32 v10, s41, 3 -; GFX9-O0-NEXT: v_writelane_b32 v10, s42, 4 -; GFX9-O0-NEXT: v_writelane_b32 v10, s43, 5 -; GFX9-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 def $sgpr36_sgpr37 -; GFX9-O0-NEXT: s_mov_b32 s37, s9 -; GFX9-O0-NEXT: ; kill: def $sgpr34_sgpr35 killed $sgpr36_sgpr37 -; GFX9-O0-NEXT: s_mov_b64 s[34:35], 0 -; GFX9-O0-NEXT: v_mov_b32_e32 v0, s36 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s37 +; GFX9-O0-NEXT: s_mov_b32 s34, s8 +; GFX9-O0-NEXT: s_mov_b32 s36, s4 +; GFX9-O0-NEXT: ; kill: def $sgpr36 killed $sgpr36 def $sgpr36_sgpr37_sgpr38_sgpr39 +; GFX9-O0-NEXT: s_mov_b32 s37, s5 +; GFX9-O0-NEXT: s_mov_b32 s38, s6 +; GFX9-O0-NEXT: s_mov_b32 s39, s7 +; GFX9-O0-NEXT: v_writelane_b32 v10, s36, 2 +; GFX9-O0-NEXT: v_writelane_b32 v10, s37, 3 +; GFX9-O0-NEXT: v_writelane_b32 v10, s38, 4 +; GFX9-O0-NEXT: v_writelane_b32 v10, s39, 5 +; GFX9-O0-NEXT: ; kill: def $sgpr34 killed $sgpr34 def $sgpr34_sgpr35 +; GFX9-O0-NEXT: s_mov_b32 s35, s9 +; GFX9-O0-NEXT: ; kill: def $sgpr30_sgpr31 killed $sgpr34_sgpr35 +; GFX9-O0-NEXT: s_mov_b64 s[30:31], 0 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s34 +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s35 ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v1 ; GFX9-O0-NEXT: v_mov_b32_e32 v8, v0 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v8, s34 -; GFX9-O0-NEXT: v_mov_b32_e32 v9, s35 +; GFX9-O0-NEXT: v_mov_b32_e32 v8, s30 +; GFX9-O0-NEXT: v_mov_b32_e32 v9, s31 ; GFX9-O0-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-O0-NEXT: v_writelane_b32 v10, s34, 6 -; GFX9-O0-NEXT: v_writelane_b32 v10, s35, 7 +; GFX9-O0-NEXT: s_or_saveexec_b64 s[30:31], -1 +; GFX9-O0-NEXT: v_writelane_b32 v10, s30, 6 +; GFX9-O0-NEXT: v_writelane_b32 v10, s31, 7 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v8 -; GFX9-O0-NEXT: s_mov_b32 s34, 32 -; GFX9-O0-NEXT: ; implicit-def: $sgpr36_sgpr37 -; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s34, v[8:9] -; GFX9-O0-NEXT: s_getpc_b64 s[34:35] -; GFX9-O0-NEXT: s_add_u32 s34, s34, strict_wwm_called_i64@gotpcrel32@lo+4 -; GFX9-O0-NEXT: s_addc_u32 s35, s35, strict_wwm_called_i64@gotpcrel32@hi+12 -; GFX9-O0-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-O0-NEXT: s_mov_b32 s30, 32 +; GFX9-O0-NEXT: ; implicit-def: $sgpr34_sgpr35 +; GFX9-O0-NEXT: v_lshrrev_b64 v[3:4], s30, v[8:9] +; GFX9-O0-NEXT: s_getpc_b64 s[30:31] +; GFX9-O0-NEXT: s_add_u32 s30, s30, strict_wwm_called_i64@gotpcrel32@lo+4 +; GFX9-O0-NEXT: s_addc_u32 s31, s31, strict_wwm_called_i64@gotpcrel32@hi+12 +; GFX9-O0-NEXT: s_load_dwordx2 s[30:31], s[30:31], 0x0 ; GFX9-O0-NEXT: s_mov_b64 s[38:39], s[2:3] ; GFX9-O0-NEXT: s_mov_b64 s[36:37], s[0:1] ; GFX9-O0-NEXT: s_mov_b64 s[0:1], s[36:37] @@ -575,13 +574,15 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O0-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[34:35] +; GFX9-O0-NEXT: s_swappc_b64 s[30:31], s[30:31] ; GFX9-O0-NEXT: v_readlane_b32 s34, v10, 6 ; GFX9-O0-NEXT: v_readlane_b32 s35, v10, 7 ; GFX9-O0-NEXT: v_readlane_b32 s36, v10, 2 ; GFX9-O0-NEXT: v_readlane_b32 s37, v10, 3 ; GFX9-O0-NEXT: v_readlane_b32 s38, v10, 4 ; GFX9-O0-NEXT: v_readlane_b32 s39, v10, 5 +; GFX9-O0-NEXT: v_readlane_b32 s30, v10, 0 +; GFX9-O0-NEXT: v_readlane_b32 s31, v10, 1 ; GFX9-O0-NEXT: v_mov_b32_e32 v2, v0 ; GFX9-O0-NEXT: v_mov_b32_e32 v3, v1 ; GFX9-O0-NEXT: v_mov_b32_e32 v4, v8 @@ -593,8 +594,6 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O0-NEXT: s_mov_b32 s34, 0 ; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1], off, s[36:39], s34 offset:4 -; GFX9-O0-NEXT: v_readlane_b32 s31, v10, 1 -; GFX9-O0-NEXT: v_readlane_b32 s30, v10, 0 ; GFX9-O0-NEXT: s_add_i32 s32, s32, 0xfffff400 ; GFX9-O0-NEXT: v_readlane_b32 s33, v10, 8 ; GFX9-O0-NEXT: s_or_saveexec_b64 s[34:35], -1 @@ -626,7 +625,6 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O3: ; %bb.0: ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-O3-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill ; GFX9-O3-NEXT: buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) ; GFX9-O3-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill @@ -636,19 +634,18 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) ; GFX9-O3-NEXT: buffer_store_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill ; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] -; GFX9-O3-NEXT: v_writelane_b32 v8, s33, 2 -; GFX9-O3-NEXT: v_writelane_b32 v8, s30, 0 +; GFX9-O3-NEXT: s_mov_b32 s40, s33 ; GFX9-O3-NEXT: s_mov_b32 s33, s32 ; GFX9-O3-NEXT: s_addk_i32 s32, 0x800 -; GFX9-O3-NEXT: v_writelane_b32 v8, s31, 1 +; GFX9-O3-NEXT: s_mov_b64 s[36:37], s[30:31] ; GFX9-O3-NEXT: v_mov_b32_e32 v6, s8 ; GFX9-O3-NEXT: v_mov_b32_e32 v7, s9 -; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-O3-NEXT: s_getpc_b64 s[36:37] -; GFX9-O3-NEXT: s_add_u32 s36, s36, strict_wwm_called_i64@gotpcrel32@lo+4 -; GFX9-O3-NEXT: s_addc_u32 s37, s37, strict_wwm_called_i64@gotpcrel32@hi+12 -; GFX9-O3-NEXT: s_load_dwordx2 s[36:37], s[36:37], 0x0 -; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-O3-NEXT: s_or_saveexec_b64 s[30:31], -1 +; GFX9-O3-NEXT: s_getpc_b64 s[34:35] +; GFX9-O3-NEXT: s_add_u32 s34, s34, strict_wwm_called_i64@gotpcrel32@lo+4 +; GFX9-O3-NEXT: s_addc_u32 s35, s35, strict_wwm_called_i64@gotpcrel32@hi+12 +; GFX9-O3-NEXT: s_load_dwordx2 s[34:35], s[34:35], 0x0 +; GFX9-O3-NEXT: s_mov_b64 exec, s[30:31] ; GFX9-O3-NEXT: s_not_b64 exec, exec ; GFX9-O3-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-O3-NEXT: v_mov_b32_e32 v7, 0 @@ -657,7 +654,7 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v6 ; GFX9-O3-NEXT: v_mov_b32_e32 v1, v7 ; GFX9-O3-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[36:37] +; GFX9-O3-NEXT: s_swappc_b64 s[30:31], s[34:35] ; GFX9-O3-NEXT: v_mov_b32_e32 v2, v0 ; GFX9-O3-NEXT: v_mov_b32_e32 v3, v1 ; GFX9-O3-NEXT: v_add_co_u32_e32 v2, vcc, v2, v6 @@ -666,13 +663,9 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-O3-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-O3-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 offset:4 -; GFX9-O3-NEXT: v_readlane_b32 s31, v8, 1 -; GFX9-O3-NEXT: v_readlane_b32 s30, v8, 0 ; GFX9-O3-NEXT: s_addk_i32 s32, 0xf800 -; GFX9-O3-NEXT: v_readlane_b32 s33, v8, 2 -; GFX9-O3-NEXT: s_or_saveexec_b64 s[34:35], -1 -; GFX9-O3-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload -; GFX9-O3-NEXT: s_nop 0 +; GFX9-O3-NEXT: s_mov_b32 s33, s40 +; GFX9-O3-NEXT: s_or_saveexec_b64 s[30:31], -1 ; GFX9-O3-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_nop 0 ; GFX9-O3-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload @@ -683,9 +676,9 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a ; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; GFX9-O3-NEXT: s_nop 0 ; GFX9-O3-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload -; GFX9-O3-NEXT: s_mov_b64 exec, s[34:35] +; GFX9-O3-NEXT: s_mov_b64 exec, s[30:31] ; GFX9-O3-NEXT: s_waitcnt vmcnt(0) -; GFX9-O3-NEXT: s_setpc_b64 s[30:31] +; GFX9-O3-NEXT: s_setpc_b64 s[36:37] %tmp107 = tail call i64 @llvm.amdgcn.set.inactive.i64(i64 %arg, i64 0) %tmp134 = call amdgpu_gfx i64 @strict_wwm_called_i64(i64 %tmp107) %tmp136 = add i64 %tmp134, %tmp107 diff --git a/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir b/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir index 0ef574c2b3f7..a9fbe15b244e 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/machine-metadata.mir @@ -77,9 +77,10 @@ machineMetadataNodes: - '!7 = distinct !{!7, !"MemcpyLoweringDomain"}' body: | bb.0 (%ir-block.0): - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-LABEL: name: test_memcpy + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 @@ -95,8 +96,11 @@ body: | ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1 ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[COPY8]], killed [[COPY9]], 0, implicit $exec + ; CHECK: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; CHECK: $vgpr0 = COPY [[V_ADD_U32_e64_]] - ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; CHECK: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] + ; CHECK: S_SETPC_B64_return [[COPY11]], implicit $vgpr0 + %4:sreg_64 = COPY $sgpr30_sgpr31 %3:vgpr_32 = COPY $vgpr3 %2:vgpr_32 = COPY $vgpr2 %1:vgpr_32 = COPY $vgpr1 @@ -112,8 +116,10 @@ body: | %13:vgpr_32 = COPY %11.sub0 %14:vgpr_32 = COPY %11.sub1 %15:vgpr_32 = V_ADD_U32_e64 killed %13, killed %14, 0, implicit $exec + %5:ccr_sgpr_64 = COPY %4 $vgpr0 = COPY %15 - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %16:ccr_sgpr_64 = COPY %5 + S_SETPC_B64_return %16, implicit $vgpr0 ... --- @@ -128,9 +134,10 @@ machineMetadataNodes: - '!10 = !{!1, !9}' body: | bb.0 (%ir-block.0): - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 ; CHECK-LABEL: name: test_memcpy_inline + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 @@ -146,8 +153,11 @@ body: | ; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub0 ; CHECK: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX2_]].sub1 ; CHECK: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[COPY8]], killed [[COPY9]], 0, implicit $exec + ; CHECK: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; CHECK: $vgpr0 = COPY [[V_ADD_U32_e64_]] - ; CHECK: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; CHECK: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY10]] + ; CHECK: S_SETPC_B64_return [[COPY11]], implicit $vgpr0 + %4:sreg_64 = COPY $sgpr30_sgpr31 %3:vgpr_32 = COPY $vgpr3 %2:vgpr_32 = COPY $vgpr2 %1:vgpr_32 = COPY $vgpr1 @@ -163,7 +173,9 @@ body: | %13:vgpr_32 = COPY %11.sub0 %14:vgpr_32 = COPY %11.sub1 %15:vgpr_32 = V_ADD_U32_e64 killed %13, killed %14, 0, implicit $exec + %5:ccr_sgpr_64 = COPY %4 $vgpr0 = COPY %15 - S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %16:ccr_sgpr_64 = COPY %5 + S_SETPC_B64_return %16, implicit $vgpr0 ... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir b/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir index 6e9f4dacc56c..7b2ef70da2d6 100644 --- a/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir +++ b/llvm/test/CodeGen/MIR/AMDGPU/stack-id-assert.mir @@ -17,9 +17,11 @@ --- name: foo +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '' } body: | bb.0: - SI_RETURN + S_SETPC_B64_return killed renamable $sgpr30_sgpr31 ... --- @@ -29,6 +31,7 @@ liveins: - { reg: '$vgpr0', virtual-reg: '' } - { reg: '$vgpr1', virtual-reg: '' } - { reg: '$vgpr2', virtual-reg: '' } + - { reg: '$sgpr30_sgpr31', virtual-reg: '' } stack: - { id: 0, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4, stack-id: sgpr-spill, callee-saved-register: '', callee-saved-restored: true, @@ -38,7 +41,7 @@ machineFunctionInfo: stackPtrOffsetReg: '$sgpr32' body: | bb.0: - liveins: $vgpr0, $vgpr1, $vgpr2 + liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 renamable $vgpr41 = COPY $vgpr2, implicit $exec renamable $vgpr40 = COPY $vgpr1, implicit $exec @@ -49,10 +52,12 @@ body: | ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @foo + 4, target-flags(amdgpu-gotprel32-hi) @foo + 12, implicit-def dead $scc renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 + SI_SPILL_S64_SAVE killed renamable $sgpr30_sgpr31, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr4_sgpr5, @foo, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + renamable $sgpr30_sgpr31 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - SI_RETURN + S_SETPC_B64_return killed renamable $sgpr30_sgpr31 ... From 4639461531e7325458dc6a37bc6d857c0df109f7 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 22 Dec 2021 16:57:44 +0000 Subject: [PATCH 061/293] [DAG][X86] Add TargetLowering::isSplatValueForTargetNode override Add callback to enable us to test target nodes if they are splat vectors Added some basic X86ISD::VBROADCAST + X86ISD::VBROADCAST_LOAD handling --- llvm/include/llvm/CodeGen/TargetLowering.h | 7 +++ .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 12 +++-- .../CodeGen/SelectionDAG/TargetLowering.cpp | 13 ++++++ llvm/lib/Target/X86/X86ISelLowering.cpp | 22 ++++++++++ llvm/lib/Target/X86/X86ISelLowering.h | 4 ++ llvm/test/CodeGen/X86/pr15296.ll | 22 ++-------- .../test/CodeGen/X86/vector-shift-ashr-256.ll | 44 +++++++++---------- 7 files changed, 78 insertions(+), 46 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 83ce3d017cc5..b2d82e0cc6e8 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3620,6 +3620,13 @@ class TargetLowering : public TargetLoweringBase { const SelectionDAG &DAG, bool SNaN = false, unsigned Depth = 0) const; + + /// Return true if vector \p Op has the same value across all \p DemandedElts, + /// indicating any elements which may be undef in the output \p UndefElts. + virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, + APInt &UndefElts, + unsigned Depth = 0) const; + struct DAGCombinerInfo { void *DC; // The DAG Combiner object. CombineLevel Level; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 0e2fe8bf54eb..2ae0d4df7b77 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2500,6 +2500,7 @@ bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask, /// they are simply ignored. bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth) const { + unsigned Opcode = V.getOpcode(); EVT VT = V.getValueType(); assert(VT.isVector() && "Vector type expected"); @@ -2511,7 +2512,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, // Deal with some common cases here that work for both fixed and scalable // vector types. - switch (V.getOpcode()) { + switch (Opcode) { case ISD::SPLAT_VECTOR: UndefElts = V.getOperand(0).isUndef() ? APInt::getAllOnes(DemandedElts.getBitWidth()) @@ -2537,7 +2538,12 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: return isSplatValue(V.getOperand(0), DemandedElts, UndefElts, Depth + 1); - } + default: + if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || + Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) + return TLI->isSplatValueForTargetNode(V, DemandedElts, UndefElts, Depth); + break; +} // We don't support other cases than those above for scalable vectors at // the moment. @@ -2548,7 +2554,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch"); UndefElts = APInt::getZero(NumElts); - switch (V.getOpcode()) { + switch (Opcode) { case ISD::BUILD_VECTOR: { SDValue Scl; for (unsigned i = 0; i != NumElts; ++i) { diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 3d29e3f39a8e..4a0b23f191dc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3136,6 +3136,19 @@ bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op, return false; } +bool TargetLowering::isSplatValueForTargetNode(SDValue Op, + const APInt &DemandedElts, + APInt &UndefElts, + unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use isSplatValue if you don't know whether Op" + " is a target node!"); + return false; +} + // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must // work with truncating build vectors and vectors with elements of less than // 8 bits. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3c83beeb0ce5..ef849047c626 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -41050,6 +41050,28 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode( Op, DemandedBits, DemandedElts, DAG, Depth); } +bool X86TargetLowering::isSplatValueForTargetNode(SDValue Op, + const APInt &DemandedElts, + APInt &UndefElts, + unsigned Depth) const { + unsigned NumElts = DemandedElts.getBitWidth(); + unsigned Opc = Op.getOpcode(); + + switch (Opc) { + case X86ISD::VBROADCAST: + case X86ISD::VBROADCAST_LOAD: + // TODO: Permit vXi64 types on 32-bit targets. + if (isTypeLegal(Op.getValueType().getVectorElementType())) { + UndefElts = APInt::getNullValue(NumElts); + return true; + } + return false; + } + + return TargetLowering::isSplatValueForTargetNode(Op, DemandedElts, UndefElts, + Depth); +}; + // Helper to peek through bitops/trunc/setcc to determine size of source vector. // Allows combineBitcastvxi1 to determine what size vector generated a . static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 2ea3477c3820..d1d6e319f16b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1159,6 +1159,10 @@ namespace llvm { SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const override; + bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, + APInt &UndefElts, + unsigned Depth) const override; + const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override; SDValue unwrapAddress(SDValue N) const override; diff --git a/llvm/test/CodeGen/X86/pr15296.ll b/llvm/test/CodeGen/X86/pr15296.ll index 71034f696429..8476b765dbe2 100644 --- a/llvm/test/CodeGen/X86/pr15296.ll +++ b/llvm/test/CodeGen/X86/pr15296.ll @@ -27,27 +27,11 @@ define <8 x i32> @shiftInput___canonical(<8 x i32> %input, i32 %shiftval, <8 x i ; CHECK-LABEL: shiftInput___canonical: ; CHECK: # %bb.0: # %allocas ; CHECK-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1 -; CHECK-NEXT: vpsrldq {{.*#+}} xmm2 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm3 -; CHECK-NEXT: vpsrld %xmm2, %xmm3, %xmm4 -; CHECK-NEXT: vpsrlq $32, %xmm1, %xmm5 -; CHECK-NEXT: vpsrld %xmm5, %xmm3, %xmm6 -; CHECK-NEXT: vpblendw {{.*#+}} xmm4 = xmm6[0,1,2,3],xmm4[4,5,6,7] -; CHECK-NEXT: vpxor %xmm6, %xmm6, %xmm6 -; CHECK-NEXT: vpblendw {{.*#+}} xmm6 = xmm1[0,1],xmm6[2,3,4,5,6,7] -; CHECK-NEXT: vpsrld %xmm6, %xmm3, %xmm7 ; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero -; CHECK-NEXT: vpsrld %xmm1, %xmm3, %xmm3 -; CHECK-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm7[4,5,6,7] -; CHECK-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7] -; CHECK-NEXT: vpsrld %xmm2, %xmm0, %xmm2 -; CHECK-NEXT: vpsrld %xmm5, %xmm0, %xmm4 -; CHECK-NEXT: vpsrld %xmm6, %xmm0, %xmm5 +; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2 +; CHECK-NEXT: vpsrld %xmm1, %xmm2, %xmm2 ; CHECK-NEXT: vpsrld %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm4[0,1,2,3],xmm2[4,5,6,7] -; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm5[4,5,6,7] -; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] -; CHECK-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 +; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; CHECK-NEXT: retl allocas: %smear.0 = insertelement <8 x i32> undef, i32 %shiftval, i32 0 diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll index 0d0f410cc830..e6c802e53514 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -2203,39 +2203,35 @@ define <4 x i64> @PR52719(<4 x i64> %a0, i32 %a1) { ; ; X86-AVX1-LABEL: PR52719: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm2 -; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm2[0],zero,xmm2[1],zero -; X86-AVX1-NEXT: vpsrldq {{.*#+}} xmm3 = xmm2[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; X86-AVX1-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm1 +; X86-AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; X86-AVX1-NEXT: vblendps {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] +; X86-AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero +; X86-AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808] ; X86-AVX1-NEXT: # xmm4 = mem[0,0] -; X86-AVX1-NEXT: vpsrlq %xmm3, %xmm4, %xmm5 -; X86-AVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6 -; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3,4,5,6,7] +; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm4, %xmm5 ; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm4, %xmm6 ; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm6[0,1,2,3],xmm5[4,5,6,7] -; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7 -; X86-AVX1-NEXT: vpsrlq %xmm3, %xmm7, %xmm3 -; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm7, %xmm7 -; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm7[0,1,2,3],xmm3[4,5,6,7] -; X86-AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3 -; X86-AVX1-NEXT: vpsubq %xmm5, %xmm3, %xmm3 -; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm4, %xmm4 -; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm6[4,5,6,7] -; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm0, %xmm2 -; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7] -; X86-AVX1-NEXT: vpxor %xmm4, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpsubq %xmm4, %xmm0, %xmm0 -; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 +; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 +; X86-AVX1-NEXT: vpsrlq %xmm1, %xmm6, %xmm1 +; X86-AVX1-NEXT: vpsrlq %xmm2, %xmm6, %xmm2 +; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] +; X86-AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1 +; X86-AVX1-NEXT: vpsubq %xmm5, %xmm1, %xmm1 +; X86-AVX1-NEXT: vpsrlq %xmm3, %xmm4, %xmm2 +; X86-AVX1-NEXT: vpsrlq %xmm3, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpsubq %xmm2, %xmm0, %xmm0 +; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X86-AVX1-NEXT: retl ; ; X86-AVX2-LABEL: PR52719: ; X86-AVX2: # %bb.0: -; X86-AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1 -; X86-AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; X86-AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero ; X86-AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648] -; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm2, %ymm2 -; X86-AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 +; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm2, %ymm2 +; X86-AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 ; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; X86-AVX2-NEXT: vpsubq %ymm2, %ymm0, %ymm0 ; X86-AVX2-NEXT: retl From 4a10457d33e92f2e024f6d024d168ddcd49c3a59 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Wed, 22 Dec 2021 18:09:59 +0100 Subject: [PATCH 062/293] [mlir][arith] Fix CmpIOP folding for vector types. Previously, the folding assumed that it always operates on scalar types. Differential Revision: https://reviews.llvm.org/D116151 --- .../Dialect/Arithmetic/IR/ArithmeticOps.cpp | 10 ++++++- .../test/Dialect/Arithmetic/canonicalize.mlir | 26 +++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp index aca6e4f3c27a..a413fb263775 100644 --- a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp +++ b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp @@ -1056,13 +1056,21 @@ static bool applyCmpPredicateToEqualOperands(arith::CmpIPredicate predicate) { llvm_unreachable("unknown cmpi predicate kind"); } +static Attribute getBoolAttribute(Type type, MLIRContext *ctx, bool value) { + auto boolAttr = BoolAttr::get(ctx, value); + ShapedType shapedType = type.dyn_cast_or_null(); + if (!shapedType) + return boolAttr; + return DenseElementsAttr::get(shapedType, boolAttr); +} + OpFoldResult arith::CmpIOp::fold(ArrayRef operands) { assert(operands.size() == 2 && "cmpi takes two operands"); // cmpi(pred, x, x) if (getLhs() == getRhs()) { auto val = applyCmpPredicateToEqualOperands(getPredicate()); - return BoolAttr::get(getContext(), val); + return getBoolAttribute(getType(), getContext(), val); } auto lhs = operands.front().dyn_cast_or_null(); diff --git a/mlir/test/Dialect/Arithmetic/canonicalize.mlir b/mlir/test/Dialect/Arithmetic/canonicalize.mlir index 328ed1d028c7..96a630a248cf 100644 --- a/mlir/test/Dialect/Arithmetic/canonicalize.mlir +++ b/mlir/test/Dialect/Arithmetic/canonicalize.mlir @@ -22,6 +22,32 @@ func @cmpi_equal_operands(%arg0: i64) : i1, i1, i1, i1, i1, i1, i1, i1, i1, i1 } +// Test case: Folding of comparisons with equal vector operands. +// CHECK-LABEL: @cmpi_equal_vector_operands +// CHECK-DAG: %[[T:.*]] = arith.constant dense +// CHECK-DAG: %[[F:.*]] = arith.constant dense +// CHECK: return %[[T]], %[[T]], %[[T]], %[[T]], %[[T]], +// CHECK-SAME: %[[F]], %[[F]], %[[F]], %[[F]], %[[F]] +func @cmpi_equal_vector_operands(%arg0: vector<1x8xi64>) + -> (vector<1x8xi1>, vector<1x8xi1>, vector<1x8xi1>, vector<1x8xi1>, + vector<1x8xi1>, vector<1x8xi1>, vector<1x8xi1>, vector<1x8xi1>, + vector<1x8xi1>, vector<1x8xi1>) { + %0 = arith.cmpi eq, %arg0, %arg0 : vector<1x8xi64> + %1 = arith.cmpi sle, %arg0, %arg0 : vector<1x8xi64> + %2 = arith.cmpi sge, %arg0, %arg0 : vector<1x8xi64> + %3 = arith.cmpi ule, %arg0, %arg0 : vector<1x8xi64> + %4 = arith.cmpi uge, %arg0, %arg0 : vector<1x8xi64> + %5 = arith.cmpi ne, %arg0, %arg0 : vector<1x8xi64> + %6 = arith.cmpi slt, %arg0, %arg0 : vector<1x8xi64> + %7 = arith.cmpi sgt, %arg0, %arg0 : vector<1x8xi64> + %8 = arith.cmpi ult, %arg0, %arg0 : vector<1x8xi64> + %9 = arith.cmpi ugt, %arg0, %arg0 : vector<1x8xi64> + return %0, %1, %2, %3, %4, %5, %6, %7, %8, %9 + : vector<1x8xi1>, vector<1x8xi1>, vector<1x8xi1>, vector<1x8xi1>, + vector<1x8xi1>, vector<1x8xi1>, vector<1x8xi1>, vector<1x8xi1>, + vector<1x8xi1>, vector<1x8xi1> +} + // ----- // CHECK-LABEL: @indexCastOfSignExtend From cb8a0b07974eb6a6aa681c30ce694e13b33a3db5 Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Mon, 13 Dec 2021 19:21:38 -0500 Subject: [PATCH 063/293] [libc++] [ranges] Introduce _LIBCPP_AUTO_CAST(x) for auto(x). Clang is gaining `auto(x)` support in D113393; sadly there seems to be no feature-test macro for it. Zhihao is opening a core issue for that macro. Use `_LIBCPP_AUTO_CAST` where C++20 specifies we should use `auto(x)`; stop using `__decay_copy(x)` in those places. In fact, remove `__decay_copy` entirely. As of C++20, it's purely a paper specification tool signifying "Return just `x`, but it was perfect-forwarded, so we understand you're going to have to call its move-constructor sometimes." I believe there's no reason we'd ever need to do its operation explicitly in code. This heisenbugs away a test failure on MinGW; see D112214. Differential Revision: https://reviews.llvm.org/D115686 --- libcxx/docs/Status/Cxx2bPapers.csv | 2 +- libcxx/include/CMakeLists.txt | 2 +- libcxx/include/__ranges/access.h | 27 ++++++----- libcxx/include/__ranges/all.h | 6 +-- libcxx/include/__ranges/size.h | 18 +++----- .../__utility/{decay_copy.h => auto_cast.h} | 21 ++------- libcxx/include/future | 10 ++-- libcxx/include/module.modulemap | 2 +- libcxx/include/thread | 5 +- libcxx/include/utility | 1 + ...verify.cpp => auto_cast.module.verify.cpp} | 4 +- .../range.access.begin/begin.pass.cpp | 31 ++++++------- .../range.access.end/end.pass.cpp | 46 +++++++++---------- .../range.access/range.prim/size.pass.cpp | 2 +- .../thread.thread.constr/F.pass.cpp | 2 +- 15 files changed, 80 insertions(+), 99 deletions(-) rename libcxx/include/__utility/{decay_copy.h => auto_cast.h} (53%) rename libcxx/test/libcxx/diagnostics/detail.headers/utility/{decay_copy.module.verify.cpp => auto_cast.module.verify.cpp} (89%) diff --git a/libcxx/docs/Status/Cxx2bPapers.csv b/libcxx/docs/Status/Cxx2bPapers.csv index 8a5b75928e74..7a76e1f57645 100644 --- a/libcxx/docs/Status/Cxx2bPapers.csv +++ b/libcxx/docs/Status/Cxx2bPapers.csv @@ -25,7 +25,7 @@ "","","","","","" "`P0288R9 `__","LWG","``any_invocable``","October 2021","","" "`P0798R8 `__","LWG","Monadic operations for ``std::optional``","October 2021","|Complete|","14.0" -"`P0849R8 `__","LWG","``auto(x)``: ``DECAY_COPY`` in the language","October 2021","","" +"`P0849R8 `__","LWG","``auto(x)``: ``DECAY_COPY`` in the language","October 2021","|Complete|","14.0" "`P1072R10 `__","LWG","``basic_string::resize_and_overwrite``","October 2021","","" "`P1147R1 `__","LWG","Printing ``volatile`` Pointers","October 2021","|Complete|","14.0" "`P1272R4 `__","LWG","Byteswapping for fun&&nuf","October 2021","|Complete|","14.0" diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index 76eed6f64d3b..180f7d4259cd 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -360,8 +360,8 @@ set(files __tuple __undef_macros __utility/as_const.h + __utility/auto_cast.h __utility/cmp.h - __utility/decay_copy.h __utility/declval.h __utility/exchange.h __utility/forward.h diff --git a/libcxx/include/__ranges/access.h b/libcxx/include/__ranges/access.h index d54b3314e435..c58849f7cf33 100644 --- a/libcxx/include/__ranges/access.h +++ b/libcxx/include/__ranges/access.h @@ -14,8 +14,7 @@ #include <__iterator/readable_traits.h> #include <__ranges/enable_borrowed_range.h> #include <__utility/as_const.h> -#include <__utility/decay_copy.h> -#include <__utility/forward.h> +#include <__utility/auto_cast.h> #include #include @@ -39,7 +38,7 @@ namespace ranges::__begin { concept __member_begin = __can_borrow<_Tp> && requires(_Tp&& __t) { - { _VSTD::__decay_copy(__t.begin()) } -> input_or_output_iterator; + { _LIBCPP_AUTO_CAST(__t.begin()) } -> input_or_output_iterator; }; void begin(auto&) = delete; @@ -51,7 +50,7 @@ namespace ranges::__begin { __can_borrow<_Tp> && __class_or_enum > && requires(_Tp && __t) { - { _VSTD::__decay_copy(begin(__t)) } -> input_or_output_iterator; + { _LIBCPP_AUTO_CAST(begin(__t)) } -> input_or_output_iterator; }; struct __fn { @@ -65,17 +64,17 @@ namespace ranges::__begin { template requires __member_begin<_Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const - noexcept(noexcept(_VSTD::__decay_copy(__t.begin()))) + noexcept(noexcept(_LIBCPP_AUTO_CAST(__t.begin()))) { - return __t.begin(); + return _LIBCPP_AUTO_CAST(__t.begin()); } template requires __unqualified_begin<_Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const - noexcept(noexcept(_VSTD::__decay_copy(begin(__t)))) + noexcept(noexcept(_LIBCPP_AUTO_CAST(begin(__t)))) { - return begin(__t); + return _LIBCPP_AUTO_CAST(begin(__t)); } void operator()(auto&&) const = delete; @@ -98,7 +97,7 @@ namespace ranges::__end { __can_borrow<_Tp> && requires(_Tp&& __t) { typename iterator_t<_Tp>; - { _VSTD::__decay_copy(_VSTD::forward<_Tp>(__t).end()) } -> sentinel_for >; + { _LIBCPP_AUTO_CAST(__t.end()) } -> sentinel_for >; }; void end(auto&) = delete; @@ -111,7 +110,7 @@ namespace ranges::__end { __class_or_enum > && requires(_Tp && __t) { typename iterator_t<_Tp>; - { _VSTD::__decay_copy(end(_VSTD::forward<_Tp>(__t))) } -> sentinel_for >; + { _LIBCPP_AUTO_CAST(end(__t)) } -> sentinel_for >; }; class __fn { @@ -126,17 +125,17 @@ namespace ranges::__end { template requires __member_end<_Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const - noexcept(noexcept(_VSTD::__decay_copy(__t.end()))) + noexcept(noexcept(_LIBCPP_AUTO_CAST(__t.end()))) { - return _VSTD::forward<_Tp>(__t).end(); + return _LIBCPP_AUTO_CAST(__t.end()); } template requires __unqualified_end<_Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const - noexcept(noexcept(_VSTD::__decay_copy(end(__t)))) + noexcept(noexcept(_LIBCPP_AUTO_CAST(end(__t)))) { - return end(__t); + return _LIBCPP_AUTO_CAST(end(__t)); } void operator()(auto&&) const = delete; diff --git a/libcxx/include/__ranges/all.h b/libcxx/include/__ranges/all.h index affe13ee0862..ccc77258ba10 100644 --- a/libcxx/include/__ranges/all.h +++ b/libcxx/include/__ranges/all.h @@ -17,7 +17,7 @@ #include <__ranges/range_adaptor.h> #include <__ranges/ref_view.h> #include <__ranges/subrange.h> -#include <__utility/decay_copy.h> +#include <__utility/auto_cast.h> #include <__utility/declval.h> #include <__utility/forward.h> #include @@ -38,9 +38,9 @@ namespace __all { requires ranges::view> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const - noexcept(noexcept(_VSTD::__decay_copy(_VSTD::forward<_Tp>(__t)))) + noexcept(noexcept(_LIBCPP_AUTO_CAST(_VSTD::forward<_Tp>(__t)))) { - return _VSTD::forward<_Tp>(__t); + return _LIBCPP_AUTO_CAST(_VSTD::forward<_Tp>(__t)); } template diff --git a/libcxx/include/__ranges/size.h b/libcxx/include/__ranges/size.h index af0a8479f2ec..c37467eaf747 100644 --- a/libcxx/include/__ranges/size.h +++ b/libcxx/include/__ranges/size.h @@ -13,8 +13,7 @@ #include <__iterator/concepts.h> #include <__iterator/iterator_traits.h> #include <__ranges/access.h> -#include <__utility/decay_copy.h> -#include <__utility/forward.h> +#include <__utility/auto_cast.h> #include #include @@ -26,7 +25,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if !defined(_LIBCPP_HAS_NO_RANGES) -// clang-format off namespace ranges { template inline constexpr bool disable_sized_range = false; @@ -41,7 +39,7 @@ namespace __size { template concept __member_size = __size_enabled<_Tp> && requires(_Tp&& __t) { - { _VSTD::__decay_copy(_VSTD::forward<_Tp>(__t).size()) } -> __integer_like; + { _LIBCPP_AUTO_CAST(__t.size()) } -> __integer_like; }; template @@ -50,7 +48,7 @@ namespace __size { !__member_size<_Tp> && __class_or_enum> && requires(_Tp&& __t) { - { _VSTD::__decay_copy(size(_VSTD::forward<_Tp>(__t))) } -> __integer_like; + { _LIBCPP_AUTO_CAST(size(__t)) } -> __integer_like; }; template @@ -76,14 +74,14 @@ namespace __size { template <__member_size _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __integer_like auto operator()(_Tp&& __t) const - noexcept(noexcept(_VSTD::forward<_Tp>(__t).size())) { - return _VSTD::forward<_Tp>(__t).size(); + noexcept(noexcept(_LIBCPP_AUTO_CAST(__t.size()))) { + return _LIBCPP_AUTO_CAST(__t.size()); } template <__unqualified_size _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __integer_like auto operator()(_Tp&& __t) const - noexcept(noexcept(size(_VSTD::forward<_Tp>(__t)))) { - return size(_VSTD::forward<_Tp>(__t)); + noexcept(noexcept(_LIBCPP_AUTO_CAST(size(__t)))) { + return _LIBCPP_AUTO_CAST(size(__t)); } template<__difference _Tp> @@ -118,8 +116,6 @@ inline namespace __cpo { } // namespace __cpo } // namespace ranges -// clang-format off - #endif // !defined(_LIBCPP_HAS_NO_RANGES) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__utility/decay_copy.h b/libcxx/include/__utility/auto_cast.h similarity index 53% rename from libcxx/include/__utility/decay_copy.h rename to libcxx/include/__utility/auto_cast.h index 32238e117f56..5c368e077508 100644 --- a/libcxx/include/__utility/decay_copy.h +++ b/libcxx/include/__utility/auto_cast.h @@ -7,29 +7,16 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___UTILITY_DECAY_COPY_H -#define _LIBCPP___UTILITY_DECAY_COPY_H +#ifndef _LIBCPP___UTILITY_AUTO_CAST_H +#define _LIBCPP___UTILITY_AUTO_CAST_H #include <__config> -#include <__utility/forward.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header #endif -_LIBCPP_BEGIN_NAMESPACE_STD +#define _LIBCPP_AUTO_CAST(expr) static_cast::type>(expr) -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR -typename decay<_Tp>::type __decay_copy(_Tp&& __t) -#if _LIBCPP_STD_VER > 17 - noexcept(is_nothrow_convertible_v<_Tp, decay_t<_Tp>>) -#endif -{ - return _VSTD::forward<_Tp>(__t); -} - -_LIBCPP_END_NAMESPACE_STD - -#endif // _LIBCPP___UTILITY_DECAY_COPY_H +#endif // _LIBCPP___UTILITY_AUTO_CAST_H diff --git a/libcxx/include/future b/libcxx/include/future index 99df8831a778..6b666a70f48e 100644 --- a/libcxx/include/future +++ b/libcxx/include/future @@ -366,7 +366,7 @@ template struct uses_allocator, Alloc>; #include <__debug> #include <__memory/allocator_arg_t.h> #include <__memory/uses_allocator.h> -#include <__utility/decay_copy.h> +#include <__utility/auto_cast.h> #include <__utility/forward.h> #include #include @@ -2207,16 +2207,16 @@ async(launch __policy, _Fp&& __f, _Args&&... __args) { #endif if (__does_policy_contain(__policy, launch::async)) - return _VSTD::__make_async_assoc_state<_Rp>(_BF(_VSTD::__decay_copy(_VSTD::forward<_Fp>(__f)), - _VSTD::__decay_copy(_VSTD::forward<_Args>(__args))...)); + return _VSTD::__make_async_assoc_state<_Rp>(_BF(_LIBCPP_AUTO_CAST(_VSTD::forward<_Fp>(__f)), + _LIBCPP_AUTO_CAST(_VSTD::forward<_Args>(__args))...)); #ifndef _LIBCPP_NO_EXCEPTIONS } catch ( ... ) { if (__policy == launch::async) throw ; } #endif if (__does_policy_contain(__policy, launch::deferred)) - return _VSTD::__make_deferred_assoc_state<_Rp>(_BF(_VSTD::__decay_copy(_VSTD::forward<_Fp>(__f)), - _VSTD::__decay_copy(_VSTD::forward<_Args>(__args))...)); + return _VSTD::__make_deferred_assoc_state<_Rp>(_BF(_LIBCPP_AUTO_CAST(_VSTD::forward<_Fp>(__f)), + _LIBCPP_AUTO_CAST(_VSTD::forward<_Args>(__args))...)); return future<_Rp>{}; } diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index 4194a7fb83fe..1dc6db406a74 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -921,8 +921,8 @@ module std [system] { module __utility { module as_const { private header "__utility/as_const.h" } + module auto_cast { private header "__utility/auto_cast.h" } module cmp { private header "__utility/cmp.h" } - module decay_copy { private header "__utility/decay_copy.h" } module declval { private header "__utility/declval.h" } module exchange { private header "__utility/exchange.h" } module forward { private header "__utility/forward.h" } diff --git a/libcxx/include/thread b/libcxx/include/thread index bf751c87aa3b..a4632f6fe524 100644 --- a/libcxx/include/thread +++ b/libcxx/include/thread @@ -88,7 +88,6 @@ void sleep_for(const chrono::duration& rel_time); #include <__mutex_base> #include <__thread/poll_with_backoff.h> #include <__threading_support> -#include <__utility/decay_copy.h> #include <__utility/forward.h> #include #include @@ -303,8 +302,8 @@ thread::thread(_Fp&& __f, _Args&&... __args) typedef tuple<_TSPtr, typename decay<_Fp>::type, typename decay<_Args>::type...> _Gp; unique_ptr<_Gp> __p( new _Gp(_VSTD::move(__tsp), - _VSTD::__decay_copy(_VSTD::forward<_Fp>(__f)), - _VSTD::__decay_copy(_VSTD::forward<_Args>(__args))...)); + _VSTD::forward<_Fp>(__f), + _VSTD::forward<_Args>(__args)...)); int __ec = _VSTD::__libcpp_thread_create(&__t_, &__thread_proxy<_Gp>, __p.get()); if (__ec == 0) __p.release(); diff --git a/libcxx/include/utility b/libcxx/include/utility index 9ab7b8e0ffb3..9dd7905516a8 100644 --- a/libcxx/include/utility +++ b/libcxx/include/utility @@ -218,6 +218,7 @@ template #include <__debug> #include <__tuple> #include <__utility/as_const.h> +#include <__utility/auto_cast.h> #include <__utility/cmp.h> #include <__utility/declval.h> #include <__utility/exchange.h> diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/utility/decay_copy.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/utility/auto_cast.module.verify.cpp similarity index 89% rename from libcxx/test/libcxx/diagnostics/detail.headers/utility/decay_copy.module.verify.cpp rename to libcxx/test/libcxx/diagnostics/detail.headers/utility/auto_cast.module.verify.cpp index b15d23c71f2c..cf012c3d17e0 100644 --- a/libcxx/test/libcxx/diagnostics/detail.headers/utility/decay_copy.module.verify.cpp +++ b/libcxx/test/libcxx/diagnostics/detail.headers/utility/auto_cast.module.verify.cpp @@ -11,5 +11,5 @@ // WARNING: This test was generated by 'generate_private_header_tests.py' // and should not be edited manually. -// expected-error@*:* {{use of private header from outside its module: '__utility/decay_copy.h'}} -#include <__utility/decay_copy.h> +// expected-error@*:* {{use of private header from outside its module: '__utility/auto_cast.h'}} +#include <__utility/auto_cast.h> diff --git a/libcxx/test/std/ranges/range.access/range.access.begin/begin.pass.cpp b/libcxx/test/std/ranges/range.access/range.access.begin/begin.pass.cpp index 05e6211f7a54..95d2196803b2 100644 --- a/libcxx/test/std/ranges/range.access/range.access.begin/begin.pass.cpp +++ b/libcxx/test/std/ranges/range.access/range.access.begin/begin.pass.cpp @@ -249,28 +249,27 @@ constexpr bool testBeginFunction() { ASSERT_NOEXCEPT(std::ranges::begin(std::declval())); ASSERT_NOEXCEPT(std::ranges::cbegin(std::declval())); -template struct NoThrowMemberBegin { - T begin() const noexcept; -}; -ASSERT_NOEXCEPT(std::ranges::begin(std::declval&>())); -ASSERT_NOEXCEPT(std::ranges::cbegin(std::declval&>())); -ASSERT_NOT_NOEXCEPT(std::ranges::begin(std::declval>&>())); -ASSERT_NOT_NOEXCEPT(std::ranges::cbegin(std::declval>&>())); + ThrowingIterator begin() const noexcept; // auto(t.begin()) doesn't throw +} ntmb; +static_assert(noexcept(std::ranges::begin(ntmb))); +static_assert(noexcept(std::ranges::cbegin(ntmb))); -template struct NoThrowADLBegin { - friend T begin(NoThrowADLBegin&) noexcept { return T{}; } - friend T begin(NoThrowADLBegin const&) noexcept { return T{}; } -}; -ASSERT_NOEXCEPT(std::ranges::begin(std::declval&>())); -ASSERT_NOEXCEPT(std::ranges::cbegin(std::declval&>())); -ASSERT_NOT_NOEXCEPT(std::ranges::begin(std::declval>&>())); -ASSERT_NOT_NOEXCEPT(std::ranges::cbegin(std::declval>&>())); + friend ThrowingIterator begin(NoThrowADLBegin&) noexcept; // auto(begin(t)) doesn't throw + friend ThrowingIterator begin(const NoThrowADLBegin&) noexcept; +} ntab; +static_assert(noexcept(std::ranges::begin(ntab))); +static_assert(noexcept(std::ranges::cbegin(ntab))); + +struct NoThrowMemberBeginReturnsRef { + ThrowingIterator& begin() const noexcept; // auto(t.begin()) may throw +} ntmbrr; +static_assert(!noexcept(std::ranges::begin(ntmbrr))); +static_assert(!noexcept(std::ranges::cbegin(ntmbrr))); struct BeginReturnsArrayRef { auto begin() const noexcept -> int(&)[10]; - auto end() const noexcept -> int(&)[10]; } brar; static_assert(noexcept(std::ranges::begin(brar))); static_assert(noexcept(std::ranges::cbegin(brar))); diff --git a/libcxx/test/std/ranges/range.access/range.access.end/end.pass.cpp b/libcxx/test/std/ranges/range.access/range.access.end/end.pass.cpp index 1e4fc0213fc4..84b4904d8f96 100644 --- a/libcxx/test/std/ranges/range.access/range.access.end/end.pass.cpp +++ b/libcxx/test/std/ranges/range.access/range.access.end/end.pass.cpp @@ -283,34 +283,34 @@ constexpr bool testEndFunction() { ASSERT_NOEXCEPT(std::ranges::end(std::declval())); ASSERT_NOEXCEPT(std::ranges::cend(std::declval())); -template struct NoThrowMemberEnd { - T begin() const; - T end() const noexcept; -}; -ASSERT_NOEXCEPT(std::ranges::end(std::declval&>())); -ASSERT_NOEXCEPT(std::ranges::cend(std::declval&>())); -ASSERT_NOT_NOEXCEPT(std::ranges::end(std::declval>&>())); -ASSERT_NOT_NOEXCEPT(std::ranges::cend(std::declval>&>())); + ThrowingIterator begin() const; + ThrowingIterator end() const noexcept; // auto(t.end()) doesn't throw +} ntme; +static_assert(noexcept(std::ranges::end(ntme))); +static_assert(noexcept(std::ranges::cend(ntme))); -template struct NoThrowADLEnd { - T begin() const; - friend T end(NoThrowADLEnd&) noexcept { return T{}; } - friend T end(NoThrowADLEnd const&) noexcept { return T{}; } -}; -ASSERT_NOEXCEPT(std::ranges::end(std::declval&>())); -ASSERT_NOEXCEPT(std::ranges::cend(std::declval&>())); -ASSERT_NOT_NOEXCEPT(std::ranges::end(std::declval>&>())); -ASSERT_NOT_NOEXCEPT(std::ranges::cend(std::declval>&>())); - -struct BeginReturnsArrayRef { + ThrowingIterator begin() const; + friend ThrowingIterator end(NoThrowADLEnd&) noexcept; // auto(end(t)) doesn't throw + friend ThrowingIterator end(const NoThrowADLEnd&) noexcept; +} ntae; +static_assert(noexcept(std::ranges::end(ntae))); +static_assert(noexcept(std::ranges::cend(ntae))); + +struct NoThrowMemberEndReturnsRef { + ThrowingIterator begin() const; + ThrowingIterator& end() const noexcept; // auto(t.end()) may throw +} ntmerr; +static_assert(!noexcept(std::ranges::end(ntmerr))); +static_assert(!noexcept(std::ranges::cend(ntmerr))); + +struct EndReturnsArrayRef { auto begin() const noexcept -> int(&)[10]; auto end() const noexcept -> int(&)[10]; -} brar; -static_assert(noexcept(std::ranges::end(brar))); -static_assert(noexcept(std::ranges::cend(brar))); - +} erar; +static_assert(noexcept(std::ranges::end(erar))); +static_assert(noexcept(std::ranges::cend(erar))); int main(int, char**) { testArray(); diff --git a/libcxx/test/std/ranges/range.access/range.prim/size.pass.cpp b/libcxx/test/std/ranges/range.access/range.prim/size.pass.cpp index 92023cc6a287..4d91e3dafebd 100644 --- a/libcxx/test/std/ranges/range.access/range.prim/size.pass.cpp +++ b/libcxx/test/std/ranges/range.access/range.prim/size.pass.cpp @@ -135,7 +135,7 @@ struct SizeFunctionSigned { bool constexpr testHasSizeFunction() { assert(std::ranges::size(SizeFunction()) == 42); ASSERT_SAME_TYPE(decltype(std::ranges::size(SizeFunction())), size_t); - assert(std::ranges::size(MoveOnlySizeFunction()) == 42); + static_assert(!std::is_invocable_v); assert(std::ranges::size(EnumSizeFunction()) == 42); assert(std::ranges::size(SizeFunctionConst()) == 42); diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/F.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/F.pass.cpp index c0468fd8ca22..c3d5189e1bba 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/F.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.class/thread.thread.constr/F.pass.cpp @@ -21,7 +21,7 @@ // (It's fixed in the upcoming Clang 14, by https://reviews.llvm.org/D109651.) // Prior to the fix, when statically linked, the unwind info for the two // (default and overridden) operator new implementations clash. -// XFAIL: target={{.+}}-windows-gnu && !windows-dll && clang-13 +// UNSUPPORTED: target={{.+}}-windows-gnu && !windows-dll && clang-13 #include #include From ece75e20352c3e7ce5a9dc163893f191bcdc336e Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 22 Dec 2021 17:34:39 +0000 Subject: [PATCH 064/293] [gn build] Port cb8a0b07974e --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 80d4c15503af..210041831574 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -420,8 +420,8 @@ if (current_toolchain == default_toolchain) { "__tuple", "__undef_macros", "__utility/as_const.h", + "__utility/auto_cast.h", "__utility/cmp.h", - "__utility/decay_copy.h", "__utility/declval.h", "__utility/exchange.h", "__utility/forward.h", From 8b58344efb2ea3bab28e373d7f9113a200742684 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 22 Dec 2021 17:42:45 +0000 Subject: [PATCH 065/293] Remove superfluous semicolon. Missed by MSVC --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ef849047c626..3d186857e00a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -41070,7 +41070,7 @@ bool X86TargetLowering::isSplatValueForTargetNode(SDValue Op, return TargetLowering::isSplatValueForTargetNode(Op, DemandedElts, UndefElts, Depth); -}; +} // Helper to peek through bitops/trunc/setcc to determine size of source vector. // Allows combineBitcastvxi1 to determine what size vector generated a . From 25226f3e4a42f70124995c16e22ff870efa30c6d Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Tue, 21 Dec 2021 13:12:45 -0800 Subject: [PATCH 066/293] [libc] apply formatting to tests Apply the formatting rules that were applied to the libc/src directory to the libc/test directory, as well as the files in libc/utils that are included by the tests. This does not include automated enforcement. Reviewed By: sivachandra, lntue Differential Revision: https://reviews.llvm.org/D116127 --- libc/benchmarks/LibcBenchmarkTest.cpp | 4 +- .../automemcpy/lib/ResultAnalyzerMain.cpp | 2 +- .../test/config/linux/x86_64/syscall_test.cpp | 2 +- libc/test/src/__support/str_to_float_test.cpp | 220 +++++++++--------- .../test/src/fenv/enabled_exceptions_test.cpp | 4 +- libc/test/src/fenv/exception_status_test.cpp | 8 +- libc/test/src/math/CeilTest.h | 6 +- libc/test/src/math/CopySignTest.h | 6 +- libc/test/src/math/FAbsTest.h | 6 +- libc/test/src/math/FDimTest.h | 18 +- libc/test/src/math/FMaxTest.h | 8 +- libc/test/src/math/FMinTest.h | 8 +- libc/test/src/math/FloorTest.h | 6 +- libc/test/src/math/FmaTest.h | 26 +-- libc/test/src/math/FrexpTest.h | 8 +- libc/test/src/math/HypotTest.h | 22 +- libc/test/src/math/ILogbTest.h | 26 ++- libc/test/src/math/LdExpTest.h | 60 ++--- libc/test/src/math/LogbTest.h | 8 +- libc/test/src/math/ModfTest.h | 6 +- libc/test/src/math/NextAfterTest.h | 116 ++++----- libc/test/src/math/RIntTest.h | 66 +++--- libc/test/src/math/RemQuoTest.h | 14 +- libc/test/src/math/RoundTest.h | 6 +- libc/test/src/math/RoundToIntegerTest.h | 156 ++++++------- libc/test/src/math/SqrtTest.h | 16 +- libc/test/src/math/TruncTest.h | 6 +- libc/test/src/math/cos_test.cpp | 6 +- libc/test/src/math/cosf_test.cpp | 10 +- libc/test/src/math/exp2f_test.cpp | 6 +- libc/test/src/math/expf_test.cpp | 6 +- libc/test/src/math/expm1f_test.cpp | 6 +- libc/test/src/math/fdim_test.cpp | 12 +- libc/test/src/math/fdimf_test.cpp | 12 +- libc/test/src/math/fdiml_test.cpp | 12 +- libc/test/src/math/fma_test.cpp | 6 +- libc/test/src/math/fmaf_test.cpp | 6 +- libc/test/src/math/hypot_test.cpp | 8 +- libc/test/src/math/hypotf_test.cpp | 6 +- libc/test/src/math/ilogb_test.cpp | 10 +- libc/test/src/math/ilogbf_test.cpp | 10 +- libc/test/src/math/ilogbl_test.cpp | 10 +- libc/test/src/math/logf_test.cpp | 6 +- libc/test/src/math/sdcomp26094.h | 2 +- libc/test/src/math/sin_test.cpp | 6 +- libc/test/src/math/sincosf_test.cpp | 10 +- libc/test/src/math/sinf_test.cpp | 10 +- libc/test/src/math/tan_test.cpp | 6 +- libc/test/src/signal/sigprocmask_test.cpp | 4 +- libc/test/src/stdlib/atof_test.cpp | 24 +- libc/test/src/stdlib/bsearch_test.cpp | 28 +-- libc/test/src/stdlib/qsort_test.cpp | 74 +++--- libc/test/src/stdlib/strtod_test.cpp | 59 ++--- libc/test/src/stdlib/strtof_test.cpp | 162 ++++++------- libc/test/src/stdlib/strtold_test.cpp | 218 ++++++++--------- libc/test/src/string/bcmp_test.cpp | 14 +- libc/test/src/string/bzero_test.cpp | 6 +- libc/test/src/string/memcmp_test.cpp | 14 +- libc/test/src/string/memcpy_test.cpp | 10 +- libc/test/src/string/memset_test.cpp | 6 +- libc/test/src/string/stpcpy_test.cpp | 12 +- libc/test/src/threads/call_once_test.cpp | 8 +- libc/test/src/threads/mtx_test.cpp | 8 +- libc/test/src/time/TmHelper.h | 6 +- libc/test/src/time/asctime_r_test.cpp | 4 +- libc/test/src/time/asctime_test.cpp | 4 +- libc/test/src/time/mktime_test.cpp | 6 +- libc/test/src/unistd/write_test.cpp | 6 +- libc/utils/UnitTest/LibcTest.cpp | 26 +-- libc/utils/testutils/ExecuteFunction.h | 27 +-- libc/utils/testutils/ExecuteFunctionUnix.cpp | 30 +-- libc/utils/testutils/FDReader.h | 4 +- libc/utils/testutils/FDReaderUnix.cpp | 2 +- libc/utils/testutils/StreamWrapper.cpp | 6 +- libc/utils/testutils/StreamWrapper.h | 4 +- 75 files changed, 900 insertions(+), 872 deletions(-) diff --git a/libc/benchmarks/LibcBenchmarkTest.cpp b/libc/benchmarks/LibcBenchmarkTest.cpp index c9b21e48e8f7..e4afab531fef 100644 --- a/libc/benchmarks/LibcBenchmarkTest.cpp +++ b/libc/benchmarks/LibcBenchmarkTest.cpp @@ -50,9 +50,9 @@ class LibcBenchmark : public ::testing::Test { } protected: - void SetUp() override { Options.Log = BenchmarkLog::Full; } + void set_up() override { Options.Log = BenchmarkLog::Full; } - void TearDown() override { + void tear_down() override { // We make sure all the expected measurements were performed. if (MaybeTimepoints) EXPECT_THAT(*MaybeTimepoints, IsEmpty()); diff --git a/libc/benchmarks/automemcpy/lib/ResultAnalyzerMain.cpp b/libc/benchmarks/automemcpy/lib/ResultAnalyzerMain.cpp index 6a657e432c18..47e3200f9dcc 100644 --- a/libc/benchmarks/automemcpy/lib/ResultAnalyzerMain.cpp +++ b/libc/benchmarks/automemcpy/lib/ResultAnalyzerMain.cpp @@ -94,7 +94,7 @@ JsonFile parseJsonResultFile(StringRef Filename) { json::Path::Root Root; JsonFile JF; if (!fromJSON(JsonValue, JF, Root)) - ExitOnErr(Root.getError()); + ExitOnErr(Root.get_error()); return JF; } diff --git a/libc/test/config/linux/x86_64/syscall_test.cpp b/libc/test/config/linux/x86_64/syscall_test.cpp index 971b571fce4d..eb2205aa4fb6 100644 --- a/libc/test/config/linux/x86_64/syscall_test.cpp +++ b/libc/test/config/linux/x86_64/syscall_test.cpp @@ -39,6 +39,6 @@ TEST(LlvmLibcX86_64_SyscallTest, APITest) { return __llvm_libc::syscall(n, a1, a2, a3, a4, a5, a6); }); - Function notLongType( + Function not_long_type( [](long n, void *a1) { return __llvm_libc::syscall(n, a1); }); } diff --git a/libc/test/src/__support/str_to_float_test.cpp b/libc/test/src/__support/str_to_float_test.cpp index 5728185e54bb..44f0cecec76c 100644 --- a/libc/test/src/__support/str_to_float_test.cpp +++ b/libc/test/src/__support/str_to_float_test.cpp @@ -14,201 +14,209 @@ class LlvmLibcStrToFloatTest : public __llvm_libc::testing::Test { public: template - void ClingerFastPathTest( + void clinger_fast_path_test( const typename __llvm_libc::fputil::FPBits::UIntType inputMantissa, const int32_t inputExp10, const typename __llvm_libc::fputil::FPBits::UIntType expectedOutputMantissa, const uint32_t expectedOutputExp2) { - typename __llvm_libc::fputil::FPBits::UIntType actualOutputMantissa = 0; - uint32_t actualOutputExp2 = 0; + typename __llvm_libc::fputil::FPBits::UIntType actual_output_mantissa = + 0; + uint32_t actual_output_exp2 = 0; ASSERT_TRUE(__llvm_libc::internal::clinger_fast_path( - inputMantissa, inputExp10, &actualOutputMantissa, &actualOutputExp2)); - EXPECT_EQ(actualOutputMantissa, expectedOutputMantissa); - EXPECT_EQ(actualOutputExp2, expectedOutputExp2); + inputMantissa, inputExp10, &actual_output_mantissa, + &actual_output_exp2)); + EXPECT_EQ(actual_output_mantissa, expectedOutputMantissa); + EXPECT_EQ(actual_output_exp2, expectedOutputExp2); } template - void ClingerFastPathFailsTest( + void clinger_fast_path_fails_test( const typename __llvm_libc::fputil::FPBits::UIntType inputMantissa, const int32_t inputExp10) { - typename __llvm_libc::fputil::FPBits::UIntType actualOutputMantissa = 0; - uint32_t actualOutputExp2 = 0; + typename __llvm_libc::fputil::FPBits::UIntType actual_output_mantissa = + 0; + uint32_t actual_output_exp2 = 0; ASSERT_FALSE(__llvm_libc::internal::clinger_fast_path( - inputMantissa, inputExp10, &actualOutputMantissa, &actualOutputExp2)); + inputMantissa, inputExp10, &actual_output_mantissa, + &actual_output_exp2)); } template - void EiselLemireTest( + void eisel_lemire_test( const typename __llvm_libc::fputil::FPBits::UIntType inputMantissa, const int32_t inputExp10, const typename __llvm_libc::fputil::FPBits::UIntType expectedOutputMantissa, const uint32_t expectedOutputExp2) { - typename __llvm_libc::fputil::FPBits::UIntType actualOutputMantissa = 0; - uint32_t actualOutputExp2 = 0; + typename __llvm_libc::fputil::FPBits::UIntType actual_output_mantissa = + 0; + uint32_t actual_output_exp2 = 0; ASSERT_TRUE(__llvm_libc::internal::eisel_lemire( - inputMantissa, inputExp10, &actualOutputMantissa, &actualOutputExp2)); - EXPECT_EQ(actualOutputMantissa, expectedOutputMantissa); - EXPECT_EQ(actualOutputExp2, expectedOutputExp2); + inputMantissa, inputExp10, &actual_output_mantissa, + &actual_output_exp2)); + EXPECT_EQ(actual_output_mantissa, expectedOutputMantissa); + EXPECT_EQ(actual_output_exp2, expectedOutputExp2); } template - void SimpleDecimalConversionTest( + void simple_decimal_conversion_test( const char *__restrict numStart, const typename __llvm_libc::fputil::FPBits::UIntType expectedOutputMantissa, const uint32_t expectedOutputExp2, const int expectedErrno = 0) { - typename __llvm_libc::fputil::FPBits::UIntType actualOutputMantissa = 0; - uint32_t actualOutputExp2 = 0; + typename __llvm_libc::fputil::FPBits::UIntType actual_output_mantissa = + 0; + uint32_t actual_output_exp2 = 0; errno = 0; __llvm_libc::internal::simple_decimal_conversion( - numStart, &actualOutputMantissa, &actualOutputExp2); - EXPECT_EQ(actualOutputMantissa, expectedOutputMantissa); - EXPECT_EQ(actualOutputExp2, expectedOutputExp2); + numStart, &actual_output_mantissa, &actual_output_exp2); + EXPECT_EQ(actual_output_mantissa, expectedOutputMantissa); + EXPECT_EQ(actual_output_exp2, expectedOutputExp2); EXPECT_EQ(errno, expectedErrno); } }; TEST(LlvmLibcStrToFloatTest, LeadingZeroes) { - uint64_t testNum64 = 1; - uint32_t numOfZeroes = 63; + uint64_t test_num64 = 1; + uint32_t num_of_zeroes = 63; EXPECT_EQ(__llvm_libc::internal::leading_zeroes(0), 64u); - for (; numOfZeroes < 64; testNum64 <<= 1, numOfZeroes--) { - EXPECT_EQ(__llvm_libc::internal::leading_zeroes(testNum64), - numOfZeroes); + for (; num_of_zeroes < 64; test_num64 <<= 1, num_of_zeroes--) { + EXPECT_EQ(__llvm_libc::internal::leading_zeroes(test_num64), + num_of_zeroes); } - testNum64 = 3; - numOfZeroes = 62; - for (; numOfZeroes > 63; testNum64 <<= 1, numOfZeroes--) { - EXPECT_EQ(__llvm_libc::internal::leading_zeroes(testNum64), - numOfZeroes); + test_num64 = 3; + num_of_zeroes = 62; + for (; num_of_zeroes > 63; test_num64 <<= 1, num_of_zeroes--) { + EXPECT_EQ(__llvm_libc::internal::leading_zeroes(test_num64), + num_of_zeroes); } EXPECT_EQ(__llvm_libc::internal::leading_zeroes(0xffffffffffffffff), 0u); - testNum64 = 1; - numOfZeroes = 63; - for (; numOfZeroes > 63; testNum64 = (testNum64 << 1) + 1, numOfZeroes--) { - EXPECT_EQ(__llvm_libc::internal::leading_zeroes(testNum64), - numOfZeroes); + test_num64 = 1; + num_of_zeroes = 63; + for (; num_of_zeroes > 63; + test_num64 = (test_num64 << 1) + 1, num_of_zeroes--) { + EXPECT_EQ(__llvm_libc::internal::leading_zeroes(test_num64), + num_of_zeroes); } - uint64_t testNum32 = 1; - numOfZeroes = 31; + uint64_t test_num32 = 1; + num_of_zeroes = 31; EXPECT_EQ(__llvm_libc::internal::leading_zeroes(0), 32u); - for (; numOfZeroes < 32; testNum32 <<= 1, numOfZeroes--) { - EXPECT_EQ(__llvm_libc::internal::leading_zeroes(testNum32), - numOfZeroes); + for (; num_of_zeroes < 32; test_num32 <<= 1, num_of_zeroes--) { + EXPECT_EQ(__llvm_libc::internal::leading_zeroes(test_num32), + num_of_zeroes); } EXPECT_EQ(__llvm_libc::internal::leading_zeroes(0xffffffff), 0u); } TEST_F(LlvmLibcStrToFloatTest, ClingerFastPathFloat64Simple) { - ClingerFastPathTest(123, 0, 0xEC00000000000, 1029); - ClingerFastPathTest(1234567890123456, 1, 0x5ee2a2eb5a5c0, 1076); - ClingerFastPathTest(1234567890, -10, 0xf9add3739635f, 1019); + clinger_fast_path_test(123, 0, 0xEC00000000000, 1029); + clinger_fast_path_test(1234567890123456, 1, 0x5ee2a2eb5a5c0, 1076); + clinger_fast_path_test(1234567890, -10, 0xf9add3739635f, 1019); } TEST_F(LlvmLibcStrToFloatTest, ClingerFastPathFloat64ExtendedExp) { - ClingerFastPathTest(1, 30, 0x93e5939a08cea, 1122); - ClingerFastPathTest(1, 37, 0xe17b84357691b, 1145); - ClingerFastPathFailsTest(10, 37); - ClingerFastPathFailsTest(1, 100); + clinger_fast_path_test(1, 30, 0x93e5939a08cea, 1122); + clinger_fast_path_test(1, 37, 0xe17b84357691b, 1145); + clinger_fast_path_fails_test(10, 37); + clinger_fast_path_fails_test(1, 100); } TEST_F(LlvmLibcStrToFloatTest, ClingerFastPathFloat64NegativeExp) { - ClingerFastPathTest(1, -10, 0xb7cdfd9d7bdbb, 989); - ClingerFastPathTest(1, -20, 0x79ca10c924223, 956); - ClingerFastPathFailsTest(1, -25); + clinger_fast_path_test(1, -10, 0xb7cdfd9d7bdbb, 989); + clinger_fast_path_test(1, -20, 0x79ca10c924223, 956); + clinger_fast_path_fails_test(1, -25); } TEST_F(LlvmLibcStrToFloatTest, ClingerFastPathFloat32Simple) { - ClingerFastPathTest(123, 0, 0x760000, 133); - ClingerFastPathTest(1234567, 1, 0x3c6146, 150); - ClingerFastPathTest(12345, -5, 0x7cd35b, 123); + clinger_fast_path_test(123, 0, 0x760000, 133); + clinger_fast_path_test(1234567, 1, 0x3c6146, 150); + clinger_fast_path_test(12345, -5, 0x7cd35b, 123); } TEST_F(LlvmLibcStrToFloatTest, ClingerFastPathFloat32ExtendedExp) { - ClingerFastPathTest(1, 15, 0x635fa9, 176); - ClingerFastPathTest(1, 17, 0x31a2bc, 183); - ClingerFastPathFailsTest(10, 17); - ClingerFastPathFailsTest(1, 50); + clinger_fast_path_test(1, 15, 0x635fa9, 176); + clinger_fast_path_test(1, 17, 0x31a2bc, 183); + clinger_fast_path_fails_test(10, 17); + clinger_fast_path_fails_test(1, 50); } TEST_F(LlvmLibcStrToFloatTest, ClingerFastPathFloat32NegativeExp) { - ClingerFastPathTest(1, -5, 0x27c5ac, 110); - ClingerFastPathTest(1, -10, 0x5be6ff, 93); - ClingerFastPathFailsTest(1, -15); + clinger_fast_path_test(1, -5, 0x27c5ac, 110); + clinger_fast_path_test(1, -10, 0x5be6ff, 93); + clinger_fast_path_fails_test(1, -15); } TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat64Simple) { - EiselLemireTest(12345678901234567890u, 1, 0x1AC53A7E04BCDA, 1089); - EiselLemireTest(123, 0, 0x1EC00000000000, 1029); - EiselLemireTest(12345678901234568192u, 0, 0x156A95319D63E2, 1086); + eisel_lemire_test(12345678901234567890u, 1, 0x1AC53A7E04BCDA, 1089); + eisel_lemire_test(123, 0, 0x1EC00000000000, 1029); + eisel_lemire_test(12345678901234568192u, 0, 0x156A95319D63E2, 1086); } TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat64SpecificFailures) { // These test cases have caused failures in the past. - EiselLemireTest(358416272, -33, 0x1BBB2A68C9D0B9, 941); - EiselLemireTest(2166568064000000238u, -9, 0x10246690000000, 1054); - EiselLemireTest(2794967654709307187u, 1, 0x183e132bc608c8, 1087); - EiselLemireTest(2794967654709307188u, 1, 0x183e132bc608c9, 1087); + eisel_lemire_test(358416272, -33, 0x1BBB2A68C9D0B9, 941); + eisel_lemire_test(2166568064000000238u, -9, 0x10246690000000, 1054); + eisel_lemire_test(2794967654709307187u, 1, 0x183e132bc608c8, 1087); + eisel_lemire_test(2794967654709307188u, 1, 0x183e132bc608c9, 1087); } TEST_F(LlvmLibcStrToFloatTest, EiselLemireFallbackStates) { // Check the fallback states for the algorithm: - uint32_t floatOutputMantissa = 0; - uint64_t doubleOutputMantissa = 0; - __uint128_t tooLongMantissa = 0; - uint32_t outputExp2 = 0; + uint32_t float_output_mantissa = 0; + uint64_t double_output_mantissa = 0; + __uint128_t too_long_mantissa = 0; + uint32_t output_exp2 = 0; // This Eisel-Lemire implementation doesn't support long doubles yet. ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( - tooLongMantissa, 0, &tooLongMantissa, &outputExp2)); + too_long_mantissa, 0, &too_long_mantissa, &output_exp2)); // This number can't be evaluated by Eisel-Lemire since it's exactly 1024 away // from both of its closest floating point approximations // (12345678901234548736 and 12345678901234550784) ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( - 12345678901234549760u, 0, &doubleOutputMantissa, &outputExp2)); + 12345678901234549760u, 0, &double_output_mantissa, &output_exp2)); ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( - 20040229, 0, &floatOutputMantissa, &outputExp2)); + 20040229, 0, &float_output_mantissa, &output_exp2)); } TEST_F(LlvmLibcStrToFloatTest, SimpleDecimalConversion64BasicWholeNumbers) { - SimpleDecimalConversionTest("123456789012345678900", 0x1AC53A7E04BCDA, - 1089); - SimpleDecimalConversionTest("123", 0x1EC00000000000, 1029); - SimpleDecimalConversionTest("12345678901234549760", 0x156A95319D63D8, - 1086); + simple_decimal_conversion_test("123456789012345678900", + 0x1AC53A7E04BCDA, 1089); + simple_decimal_conversion_test("123", 0x1EC00000000000, 1029); + simple_decimal_conversion_test("12345678901234549760", + 0x156A95319D63D8, 1086); } TEST_F(LlvmLibcStrToFloatTest, SimpleDecimalConversion64BasicDecimals) { - SimpleDecimalConversionTest("1.2345", 0x13c083126e978d, 1023); - SimpleDecimalConversionTest(".2345", 0x1e04189374bc6a, 1020); - SimpleDecimalConversionTest(".299792458", 0x132fccb4aca314, 1021); + simple_decimal_conversion_test("1.2345", 0x13c083126e978d, 1023); + simple_decimal_conversion_test(".2345", 0x1e04189374bc6a, 1020); + simple_decimal_conversion_test(".299792458", 0x132fccb4aca314, 1021); } TEST_F(LlvmLibcStrToFloatTest, SimpleDecimalConversion64BasicExponents) { - SimpleDecimalConversionTest("1e10", 0x12a05f20000000, 1056); - SimpleDecimalConversionTest("1e-10", 0x1b7cdfd9d7bdbb, 989); - SimpleDecimalConversionTest("1e300", 0x17e43c8800759c, 2019); - SimpleDecimalConversionTest("1e-300", 0x156e1fc2f8f359, 26); + simple_decimal_conversion_test("1e10", 0x12a05f20000000, 1056); + simple_decimal_conversion_test("1e-10", 0x1b7cdfd9d7bdbb, 989); + simple_decimal_conversion_test("1e300", 0x17e43c8800759c, 2019); + simple_decimal_conversion_test("1e-300", 0x156e1fc2f8f359, 26); } TEST_F(LlvmLibcStrToFloatTest, SimpleDecimalConversion64BasicSubnormals) { - SimpleDecimalConversionTest("1e-320", 0x7e8, 0, ERANGE); - SimpleDecimalConversionTest("1e-308", 0x730d67819e8d2, 0, ERANGE); - SimpleDecimalConversionTest("2.9e-308", 0x14da6df5e4bcc8, 1); + simple_decimal_conversion_test("1e-320", 0x7e8, 0, ERANGE); + simple_decimal_conversion_test("1e-308", 0x730d67819e8d2, 0, ERANGE); + simple_decimal_conversion_test("2.9e-308", 0x14da6df5e4bcc8, 1); } TEST_F(LlvmLibcStrToFloatTest, SimpleDecimalConversion64SubnormalRounding) { @@ -216,40 +224,40 @@ TEST_F(LlvmLibcStrToFloatTest, SimpleDecimalConversion64SubnormalRounding) { // Technically you can keep adding digits until you hit the truncation limit, // but this is the shortest string that results in the maximum subnormal that // I found. - SimpleDecimalConversionTest("2.225073858507201e-308", 0xfffffffffffff, - 0, ERANGE); + simple_decimal_conversion_test("2.225073858507201e-308", + 0xfffffffffffff, 0, ERANGE); // Same here, if you were to extend the max subnormal out for another 800 // digits, incrementing any one of those digits would create a normal number. - SimpleDecimalConversionTest("2.2250738585072012e-308", - 0x10000000000000, 1); + simple_decimal_conversion_test("2.2250738585072012e-308", + 0x10000000000000, 1); } TEST_F(LlvmLibcStrToFloatTest, SimpleDecimalConversion32SpecificFailures) { - SimpleDecimalConversionTest( + simple_decimal_conversion_test( "1.4012984643248170709237295832899161312802619418765e-45", 0x1, 0, ERANGE); } TEST(LlvmLibcStrToFloatTest, SimpleDecimalConversionExtraTypes) { - uint32_t floatOutputMantissa = 0; - uint32_t outputExp2 = 0; + uint32_t float_output_mantissa = 0; + uint32_t output_exp2 = 0; errno = 0; __llvm_libc::internal::simple_decimal_conversion( - "123456789012345678900", &floatOutputMantissa, &outputExp2); - EXPECT_EQ(floatOutputMantissa, uint32_t(0xd629d4)); - EXPECT_EQ(outputExp2, uint32_t(193)); + "123456789012345678900", &float_output_mantissa, &output_exp2); + EXPECT_EQ(float_output_mantissa, uint32_t(0xd629d4)); + EXPECT_EQ(output_exp2, uint32_t(193)); EXPECT_EQ(errno, 0); - uint64_t doubleOutputMantissa = 0; - outputExp2 = 0; + uint64_t double_output_mantissa = 0; + output_exp2 = 0; errno = 0; __llvm_libc::internal::simple_decimal_conversion( - "123456789012345678900", &doubleOutputMantissa, &outputExp2); - EXPECT_EQ(doubleOutputMantissa, uint64_t(0x1AC53A7E04BCDA)); - EXPECT_EQ(outputExp2, uint32_t(1089)); + "123456789012345678900", &double_output_mantissa, &output_exp2); + EXPECT_EQ(double_output_mantissa, uint64_t(0x1AC53A7E04BCDA)); + EXPECT_EQ(output_exp2, uint32_t(1089)); EXPECT_EQ(errno, 0); // TODO(michaelrj): Get long double support working. diff --git a/libc/test/src/fenv/enabled_exceptions_test.cpp b/libc/test/src/fenv/enabled_exceptions_test.cpp index acc3b09d4aca..75463c34898a 100644 --- a/libc/test/src/fenv/enabled_exceptions_test.cpp +++ b/libc/test/src/fenv/enabled_exceptions_test.cpp @@ -46,7 +46,7 @@ TEST(LlvmLibcExceptionStatusTest, RaiseAndCrash) { // We '|' the individual exception flags instead of using FE_ALL_EXCEPT // as it can include non-standard extensions. Note that we should be able // to compile this file with headers from other libcs as well. - constexpr int allExcepts = + constexpr int ALL_EXCEPTS = FE_DIVBYZERO | FE_INVALID | FE_INEXACT | FE_OVERFLOW | FE_UNDERFLOW; for (int e : excepts) { @@ -59,7 +59,7 @@ TEST(LlvmLibcExceptionStatusTest, RaiseAndCrash) { // can raise FE_INEXACT as well, we don't verify the other // exception flags when FE_INEXACT is enabled. if (e != FE_INEXACT) { - int others = allExcepts & ~e; + int others = ALL_EXCEPTS & ~e; ASSERT_EQ(__llvm_libc::feraiseexcept(others), 0); ASSERT_EQ(__llvm_libc::fetestexcept(others), others); } diff --git a/libc/test/src/fenv/exception_status_test.cpp b/libc/test/src/fenv/exception_status_test.cpp index a7c0164d370d..f897e65439f2 100644 --- a/libc/test/src/fenv/exception_status_test.cpp +++ b/libc/test/src/fenv/exception_status_test.cpp @@ -25,7 +25,7 @@ TEST(LlvmLibcExceptionStatusTest, RaiseAndTest) { int excepts[] = {FE_DIVBYZERO, FE_INVALID, FE_INEXACT, FE_OVERFLOW, FE_UNDERFLOW}; - constexpr int allExcepts = + constexpr int ALL_EXCEPTS = FE_DIVBYZERO | FE_INVALID | FE_INEXACT | FE_OVERFLOW | FE_UNDERFLOW; for (int e : excepts) { @@ -112,8 +112,8 @@ TEST(LlvmLibcExceptionStatusTest, RaiseAndTest) { } } - int r = __llvm_libc::feraiseexcept(allExcepts); + int r = __llvm_libc::feraiseexcept(ALL_EXCEPTS); ASSERT_EQ(r, 0); - int s = __llvm_libc::fetestexcept(allExcepts); - ASSERT_EQ(s, allExcepts); + int s = __llvm_libc::fetestexcept(ALL_EXCEPTS); + ASSERT_EQ(s, ALL_EXCEPTS); } diff --git a/libc/test/src/math/CeilTest.h b/libc/test/src/math/CeilTest.h index a45b0d577356..64be0a316ef3 100644 --- a/libc/test/src/math/CeilTest.h +++ b/libc/test/src/math/CeilTest.h @@ -64,9 +64,9 @@ template class CeilTest : public __llvm_libc::testing::Test { } void testRange(CeilFunc func) { - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { T x = T(FPBits(v)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/CopySignTest.h b/libc/test/src/math/CopySignTest.h index 46d4c3781184..20f400197ceb 100644 --- a/libc/test/src/math/CopySignTest.h +++ b/libc/test/src/math/CopySignTest.h @@ -33,9 +33,9 @@ template class CopySignTest : public __llvm_libc::testing::Test { } void testRange(CopySignFunc func) { - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { T x = T(FPBits(v)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/FAbsTest.h b/libc/test/src/math/FAbsTest.h index d4377f7110da..fe163f6ff672 100644 --- a/libc/test/src/math/FAbsTest.h +++ b/libc/test/src/math/FAbsTest.h @@ -32,9 +32,9 @@ template class FAbsTest : public __llvm_libc::testing::Test { } void testRange(FabsFunc func) { - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { T x = T(FPBits(v)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/FDimTest.h b/libc/test/src/math/FDimTest.h index 029c6390f076..587913acb1f1 100644 --- a/libc/test/src/math/FDimTest.h +++ b/libc/test/src/math/FDimTest.h @@ -19,7 +19,7 @@ class FDimTestTemplate : public __llvm_libc::testing::Test { using FPBits = __llvm_libc::fputil::FPBits; using UIntType = typename FPBits::UIntType; - void testNaNArg(FuncPtr func) { + void test_na_n_arg(FuncPtr func) { EXPECT_FP_EQ(nan, func(nan, inf)); EXPECT_FP_EQ(nan, func(neg_inf, nan)); EXPECT_FP_EQ(nan, func(nan, zero)); @@ -29,7 +29,7 @@ class FDimTestTemplate : public __llvm_libc::testing::Test { EXPECT_FP_EQ(func(nan, nan), nan); } - void testInfArg(FuncPtr func) { + void test_inf_arg(FuncPtr func) { EXPECT_FP_EQ(zero, func(neg_inf, inf)); EXPECT_FP_EQ(inf, func(inf, zero)); EXPECT_FP_EQ(zero, func(neg_zero, inf)); @@ -37,7 +37,7 @@ class FDimTestTemplate : public __llvm_libc::testing::Test { EXPECT_FP_EQ(zero, func(T(-1.2345), inf)); } - void testNegInfArg(FuncPtr func) { + void test_neg_inf_arg(FuncPtr func) { EXPECT_FP_EQ(inf, func(inf, neg_inf)); EXPECT_FP_EQ(zero, func(neg_inf, zero)); EXPECT_FP_EQ(inf, func(neg_zero, neg_inf)); @@ -45,18 +45,18 @@ class FDimTestTemplate : public __llvm_libc::testing::Test { EXPECT_FP_EQ(inf, func(T(1.2345), neg_inf)); } - void testBothZero(FuncPtr func) { + void test_both_zero(FuncPtr func) { EXPECT_FP_EQ(zero, func(zero, zero)); EXPECT_FP_EQ(zero, func(zero, neg_zero)); EXPECT_FP_EQ(zero, func(neg_zero, zero)); EXPECT_FP_EQ(zero, func(neg_zero, neg_zero)); } - void testInRange(FuncPtr func) { - constexpr UIntType count = 10000001; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0, w = UIntType(-1); i <= count; - ++i, v += step, w -= step) { + void test_in_range(FuncPtr func) { + constexpr UIntType COUNT = 10000001; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0, w = UIntType(-1); i <= COUNT; + ++i, v += STEP, w -= STEP) { T x = T(FPBits(v)), y = T(FPBits(w)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/FMaxTest.h b/libc/test/src/math/FMaxTest.h index c72dad5062eb..4a0b2151a9cc 100644 --- a/libc/test/src/math/FMaxTest.h +++ b/libc/test/src/math/FMaxTest.h @@ -55,10 +55,10 @@ template class FMaxTest : public __llvm_libc::testing::Test { } void testRange(FMaxFunc func) { - constexpr UIntType count = 10000001; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0, w = UIntType(-1); i <= count; - ++i, v += step, w -= step) { + constexpr UIntType COUNT = 10000001; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0, w = UIntType(-1); i <= COUNT; + ++i, v += STEP, w -= STEP) { T x = T(FPBits(v)), y = T(FPBits(w)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/FMinTest.h b/libc/test/src/math/FMinTest.h index a7491a063fd0..a5d62622b205 100644 --- a/libc/test/src/math/FMinTest.h +++ b/libc/test/src/math/FMinTest.h @@ -55,10 +55,10 @@ template class FMinTest : public __llvm_libc::testing::Test { } void testRange(FMinFunc func) { - constexpr UIntType count = 10000001; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0, w = UIntType(-1); i <= count; - ++i, v += step, w -= step) { + constexpr UIntType COUNT = 10000001; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0, w = UIntType(-1); i <= COUNT; + ++i, v += STEP, w -= STEP) { T x = T(FPBits(v)), y = T(FPBits(w)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/FloorTest.h b/libc/test/src/math/FloorTest.h index b52f8a1a4303..0a3d5a3994aa 100644 --- a/libc/test/src/math/FloorTest.h +++ b/libc/test/src/math/FloorTest.h @@ -64,9 +64,9 @@ template class FloorTest : public __llvm_libc::testing::Test { } void testRange(FloorFunc func) { - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { T x = T(FPBits(v)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/FmaTest.h b/libc/test/src/math/FmaTest.h index 30af38f3cee3..2d04989400f4 100644 --- a/libc/test/src/math/FmaTest.h +++ b/libc/test/src/math/FmaTest.h @@ -29,7 +29,7 @@ class FmaTestTemplate : public __llvm_libc::testing::Test { const T zero = T(__llvm_libc::fputil::FPBits::zero()); const T neg_zero = T(__llvm_libc::fputil::FPBits::neg_zero()); - UIntType getRandomBitPattern() { + UIntType get_random_bit_pattern() { UIntType bits{0}; for (UIntType i = 0; i < sizeof(UIntType) / 2; ++i) { bits = @@ -39,7 +39,7 @@ class FmaTestTemplate : public __llvm_libc::testing::Test { } public: - void testSpecialNumbers(Func func) { + void test_special_numbers(Func func) { EXPECT_FP_EQ(func(zero, zero, zero), zero); EXPECT_FP_EQ(func(zero, neg_zero, neg_zero), neg_zero); EXPECT_FP_EQ(func(inf, inf, zero), inf); @@ -63,14 +63,14 @@ class FmaTestTemplate : public __llvm_libc::testing::Test { EXPECT_FP_EQ(func(T(1.75), z, -z), T(0.75) * z); } - void testSubnormalRange(Func func) { - constexpr UIntType count = 1000001; - constexpr UIntType step = - (FPBits::MAX_SUBNORMAL - FPBits::MIN_SUBNORMAL) / count; + void test_subnormal_range(Func func) { + constexpr UIntType COUNT = 1000001; + constexpr UIntType STEP = + (FPBits::MAX_SUBNORMAL - FPBits::MIN_SUBNORMAL) / COUNT; for (UIntType v = FPBits::MIN_SUBNORMAL, w = FPBits::MAX_SUBNORMAL; v <= FPBits::MAX_SUBNORMAL && w >= FPBits::MIN_SUBNORMAL; - v += step, w -= step) { - T x = T(FPBits(getRandomBitPattern())), y = T(FPBits(v)), + v += STEP, w -= STEP) { + T x = T(FPBits(get_random_bit_pattern())), y = T(FPBits(v)), z = T(FPBits(w)); T result = func(x, y, z); mpfr::TernaryInput input{x, y, z}; @@ -78,14 +78,14 @@ class FmaTestTemplate : public __llvm_libc::testing::Test { } } - void testNormalRange(Func func) { - constexpr UIntType count = 1000001; - constexpr UIntType step = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / count; + void test_normal_range(Func func) { + constexpr UIntType COUNT = 1000001; + constexpr UIntType STEP = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / COUNT; for (UIntType v = FPBits::MIN_NORMAL, w = FPBits::MAX_NORMAL; v <= FPBits::MAX_NORMAL && w >= FPBits::MIN_NORMAL; - v += step, w -= step) { + v += STEP, w -= STEP) { T x = T(FPBits(v)), y = T(FPBits(w)), - z = T(FPBits(getRandomBitPattern())); + z = T(FPBits(get_random_bit_pattern())); T result = func(x, y, z); mpfr::TernaryInput input{x, y, z}; ASSERT_MPFR_MATCH(mpfr::Operation::Fma, input, result, 0.5); diff --git a/libc/test/src/math/FrexpTest.h b/libc/test/src/math/FrexpTest.h index 6ccf0b9bac8a..06789f4ebcc2 100644 --- a/libc/test/src/math/FrexpTest.h +++ b/libc/test/src/math/FrexpTest.h @@ -19,7 +19,7 @@ template class FrexpTest : public __llvm_libc::testing::Test { DECLARE_SPECIAL_CONSTANTS(T) - static constexpr UIntType HiddenBit = + static constexpr UIntType HIDDEN_BIT = UIntType(1) << __llvm_libc::fputil::MantissaWidth::VALUE; public: @@ -93,9 +93,9 @@ template class FrexpTest : public __llvm_libc::testing::Test { void testRange(FrexpFunc func) { using UIntType = typename FPBits::UIntType; - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { T x = static_cast(FPBits(v)); if (isnan(x) || isinf(x) || x == 0.0l) continue; diff --git a/libc/test/src/math/HypotTest.h b/libc/test/src/math/HypotTest.h index 962da0b39e06..584b5aef3359 100644 --- a/libc/test/src/math/HypotTest.h +++ b/libc/test/src/math/HypotTest.h @@ -32,7 +32,7 @@ class HypotTestTemplate : public __llvm_libc::testing::Test { const T neg_zero = T(__llvm_libc::fputil::FPBits::neg_zero()); public: - void testSpecialNumbers(Func func) { + void test_special_numbers(Func func) { EXPECT_FP_EQ(func(inf, nan), inf); EXPECT_FP_EQ(func(nan, neg_inf), inf); EXPECT_FP_EQ(func(zero, inf), inf); @@ -45,14 +45,14 @@ class HypotTestTemplate : public __llvm_libc::testing::Test { EXPECT_FP_EQ(func(neg_zero, zero), zero); } - void testSubnormalRange(Func func) { - constexpr UIntType count = 1000001; + void test_subnormal_range(Func func) { + constexpr UIntType COUNT = 1000001; for (unsigned scale = 0; scale < 4; ++scale) { - UIntType maxValue = FPBits::MAX_SUBNORMAL << scale; - UIntType step = (maxValue - FPBits::MIN_SUBNORMAL) / count; + UIntType max_value = FPBits::MAX_SUBNORMAL << scale; + UIntType step = (max_value - FPBits::MIN_SUBNORMAL) / COUNT; for (int signs = 0; signs < 4; ++signs) { - for (UIntType v = FPBits::MIN_SUBNORMAL, w = maxValue; - v <= maxValue && w >= FPBits::MIN_SUBNORMAL; + for (UIntType v = FPBits::MIN_SUBNORMAL, w = max_value; + v <= max_value && w >= FPBits::MIN_SUBNORMAL; v += step, w -= step) { T x = T(FPBits(v)), y = T(FPBits(w)); if (signs % 2 == 1) { @@ -70,13 +70,13 @@ class HypotTestTemplate : public __llvm_libc::testing::Test { } } - void testNormalRange(Func func) { - constexpr UIntType count = 1000001; - constexpr UIntType step = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / count; + void test_normal_range(Func func) { + constexpr UIntType COUNT = 1000001; + constexpr UIntType STEP = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / COUNT; for (int signs = 0; signs < 4; ++signs) { for (UIntType v = FPBits::MIN_NORMAL, w = FPBits::MAX_NORMAL; v <= FPBits::MAX_NORMAL && w >= FPBits::MIN_NORMAL; - v += step, w -= step) { + v += STEP, w -= STEP) { T x = T(FPBits(v)), y = T(FPBits(w)); if (signs % 2 == 1) { x = -x; diff --git a/libc/test/src/math/ILogbTest.h b/libc/test/src/math/ILogbTest.h index 8d8f4e9a6716..e2de14de446d 100644 --- a/libc/test/src/math/ILogbTest.h +++ b/libc/test/src/math/ILogbTest.h @@ -21,7 +21,7 @@ class LlvmLibcILogbTest : public __llvm_libc::testing::Test { template struct ILogbFunc { typedef int (*Func)(T); }; template - void testSpecialNumbers(typename ILogbFunc::Func func) { + void test_special_numbers(typename ILogbFunc::Func func) { EXPECT_EQ(FP_ILOGB0, func(T(__llvm_libc::fputil::FPBits::zero()))); EXPECT_EQ(FP_ILOGB0, func(T(__llvm_libc::fputil::FPBits::neg_zero()))); @@ -32,7 +32,8 @@ class LlvmLibcILogbTest : public __llvm_libc::testing::Test { EXPECT_EQ(INT_MAX, func(T(__llvm_libc::fputil::FPBits::neg_inf()))); } - template void testPowersOfTwo(typename ILogbFunc::Func func) { + template + void test_powers_of_two(typename ILogbFunc::Func func) { EXPECT_EQ(0, func(T(1.0))); EXPECT_EQ(0, func(T(-1.0))); @@ -53,7 +54,7 @@ class LlvmLibcILogbTest : public __llvm_libc::testing::Test { } template - void testSomeIntegers(typename ILogbFunc::Func func) { + void test_some_integers(typename ILogbFunc::Func func) { EXPECT_EQ(1, func(T(3.0))); EXPECT_EQ(1, func(T(-3.0))); @@ -71,14 +72,14 @@ class LlvmLibcILogbTest : public __llvm_libc::testing::Test { } template - void testSubnormalRange(typename ILogbFunc::Func func) { + void test_subnormal_range(typename ILogbFunc::Func func) { using FPBits = __llvm_libc::fputil::FPBits; using UIntType = typename FPBits::UIntType; - constexpr UIntType count = 1000001; - constexpr UIntType step = - (FPBits::MAX_SUBNORMAL - FPBits::MIN_SUBNORMAL) / count; + constexpr UIntType COUNT = 1000001; + constexpr UIntType STEP = + (FPBits::MAX_SUBNORMAL - FPBits::MIN_SUBNORMAL) / COUNT; for (UIntType v = FPBits::MIN_SUBNORMAL; v <= FPBits::MAX_SUBNORMAL; - v += step) { + v += STEP) { T x = T(FPBits(v)); if (isnan(x) || isinf(x) || x == 0.0) continue; @@ -89,12 +90,13 @@ class LlvmLibcILogbTest : public __llvm_libc::testing::Test { } } - template void testNormalRange(typename ILogbFunc::Func func) { + template + void test_normal_range(typename ILogbFunc::Func func) { using FPBits = __llvm_libc::fputil::FPBits; using UIntType = typename FPBits::UIntType; - constexpr UIntType count = 1000001; - constexpr UIntType step = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / count; - for (UIntType v = FPBits::MIN_NORMAL; v <= FPBits::MAX_NORMAL; v += step) { + constexpr UIntType COUNT = 1000001; + constexpr UIntType STEP = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / COUNT; + for (UIntType v = FPBits::MIN_NORMAL; v <= FPBits::MAX_NORMAL; v += STEP) { T x = T(FPBits(v)); if (isnan(x) || isinf(x) || x == 0.0) continue; diff --git a/libc/test/src/math/LdExpTest.h b/libc/test/src/math/LdExpTest.h index cd6491d2b71e..ca9f33f2f816 100644 --- a/libc/test/src/math/LdExpTest.h +++ b/libc/test/src/math/LdExpTest.h @@ -23,10 +23,10 @@ class LdExpTestTemplate : public __llvm_libc::testing::Test { using FPBits = __llvm_libc::fputil::FPBits; using NormalFloat = __llvm_libc::fputil::NormalFloat; using UIntType = typename FPBits::UIntType; - static constexpr UIntType mantissaWidth = + static constexpr UIntType MANTISSA_WIDTH = __llvm_libc::fputil::MantissaWidth::VALUE; // A normalized mantissa to be used with tests. - static constexpr UIntType mantissa = NormalFloat::ONE + 0x1234; + static constexpr UIntType MANTISSA = NormalFloat::ONE + 0x1234; const T zero = T(__llvm_libc::fputil::FPBits::zero()); const T neg_zero = T(__llvm_libc::fputil::FPBits::neg_zero()); @@ -38,8 +38,8 @@ class LdExpTestTemplate : public __llvm_libc::testing::Test { typedef T (*LdExpFunc)(T, int); void testSpecialNumbers(LdExpFunc func) { - int expArray[5] = {-INT_MAX - 1, -10, 0, 10, INT_MAX}; - for (int exp : expArray) { + int exp_array[5] = {-INT_MAX - 1, -10, 0, 10, INT_MAX}; + for (int exp : exp_array) { ASSERT_FP_EQ(zero, func(zero, exp)); ASSERT_FP_EQ(neg_zero, func(neg_zero, exp)); ASSERT_FP_EQ(inf, func(inf, exp)); @@ -49,10 +49,10 @@ class LdExpTestTemplate : public __llvm_libc::testing::Test { } void testPowersOfTwo(LdExpFunc func) { - int32_t expArray[5] = {1, 2, 3, 4, 5}; - int32_t valArray[6] = {1, 2, 4, 8, 16, 32}; - for (int32_t exp : expArray) { - for (int32_t val : valArray) { + int32_t exp_array[5] = {1, 2, 3, 4, 5}; + int32_t val_array[6] = {1, 2, 4, 8, 16, 32}; + for (int32_t exp : exp_array) { + for (int32_t val : val_array) { ASSERT_FP_EQ(T(val << exp), func(T(val), exp)); ASSERT_FP_EQ(T(-1 * (val << exp)), func(T(-val), exp)); } @@ -70,11 +70,12 @@ class LdExpTestTemplate : public __llvm_libc::testing::Test { void testUnderflowToZeroOnNormal(LdExpFunc func) { // In this test, we pass a normal nubmer to func and expect zero // to be returned due to underflow. - int32_t baseExponent = FPBits::EXPONENT_BIAS + mantissaWidth; - int32_t expArray[] = {baseExponent + 5, baseExponent + 4, baseExponent + 3, - baseExponent + 2, baseExponent + 1}; - T x = NormalFloat(0, mantissa, 0); - for (int32_t exp : expArray) { + int32_t base_exponent = FPBits::EXPONENT_BIAS + MANTISSA_WIDTH; + int32_t exp_array[] = {base_exponent + 5, base_exponent + 4, + base_exponent + 3, base_exponent + 2, + base_exponent + 1}; + T x = NormalFloat(0, MANTISSA, 0); + for (int32_t exp : exp_array) { ASSERT_FP_EQ(func(x, -exp), x > 0 ? zero : neg_zero); } } @@ -82,25 +83,26 @@ class LdExpTestTemplate : public __llvm_libc::testing::Test { void testUnderflowToZeroOnSubnormal(LdExpFunc func) { // In this test, we pass a normal nubmer to func and expect zero // to be returned due to underflow. - int32_t baseExponent = FPBits::EXPONENT_BIAS + mantissaWidth; - int32_t expArray[] = {baseExponent + 5, baseExponent + 4, baseExponent + 3, - baseExponent + 2, baseExponent + 1}; - T x = NormalFloat(-FPBits::EXPONENT_BIAS, mantissa, 0); - for (int32_t exp : expArray) { + int32_t base_exponent = FPBits::EXPONENT_BIAS + MANTISSA_WIDTH; + int32_t exp_array[] = {base_exponent + 5, base_exponent + 4, + base_exponent + 3, base_exponent + 2, + base_exponent + 1}; + T x = NormalFloat(-FPBits::EXPONENT_BIAS, MANTISSA, 0); + for (int32_t exp : exp_array) { ASSERT_FP_EQ(func(x, -exp), x > 0 ? zero : neg_zero); } } void testNormalOperation(LdExpFunc func) { - T valArray[] = { + T val_array[] = { // Normal numbers - NormalFloat(100, mantissa, 0), NormalFloat(-100, mantissa, 0), - NormalFloat(100, mantissa, 1), NormalFloat(-100, mantissa, 1), + NormalFloat(100, MANTISSA, 0), NormalFloat(-100, MANTISSA, 0), + NormalFloat(100, MANTISSA, 1), NormalFloat(-100, MANTISSA, 1), // Subnormal numbers - NormalFloat(-FPBits::EXPONENT_BIAS, mantissa, 0), - NormalFloat(-FPBits::EXPONENT_BIAS, mantissa, 1)}; - for (int32_t exp = 0; exp <= static_cast(mantissaWidth); ++exp) { - for (T x : valArray) { + NormalFloat(-FPBits::EXPONENT_BIAS, MANTISSA, 0), + NormalFloat(-FPBits::EXPONENT_BIAS, MANTISSA, 1)}; + for (int32_t exp = 0; exp <= static_cast(MANTISSA_WIDTH); ++exp) { + for (T x : val_array) { // We compare the result of ldexp with the result // of the native multiplication/division instruction. ASSERT_FP_EQ(func(x, exp), x * (UIntType(1) << exp)); @@ -118,13 +120,13 @@ class LdExpTestTemplate : public __llvm_libc::testing::Test { x = NormalFloat(FPBits::EXPONENT_BIAS, NormalFloat::ONE, 0); int exp = -FPBits::MAX_EXPONENT - 5; T result = func(x, exp); - FPBits resultBits(result); - ASSERT_FALSE(resultBits.is_zero()); + FPBits result_bits(result); + ASSERT_FALSE(result_bits.is_zero()); // Verify that the result is indeed subnormal. - ASSERT_EQ(resultBits.get_unbiased_exponent(), uint16_t(0)); + ASSERT_EQ(result_bits.get_unbiased_exponent(), uint16_t(0)); // But if the exp is so less that normalization leads to zero, then // the result should be zero. - result = func(x, -FPBits::MAX_EXPONENT - int(mantissaWidth) - 5); + result = func(x, -FPBits::MAX_EXPONENT - int(MANTISSA_WIDTH) - 5); ASSERT_TRUE(FPBits(result).is_zero()); // Start with a subnormal number but pass a very high number for exponent. diff --git a/libc/test/src/math/LogbTest.h b/libc/test/src/math/LogbTest.h index 6e7e6d5bda06..49ccf3a1a832 100644 --- a/libc/test/src/math/LogbTest.h +++ b/libc/test/src/math/LogbTest.h @@ -19,7 +19,7 @@ template class LogbTest : public __llvm_libc::testing::Test { DECLARE_SPECIAL_CONSTANTS(T) - static constexpr UIntType HiddenBit = + static constexpr UIntType HIDDEN_BIT = UIntType(1) << __llvm_libc::fputil::MantissaWidth::VALUE; public: @@ -72,9 +72,9 @@ template class LogbTest : public __llvm_libc::testing::Test { void testRange(LogbFunc func) { using UIntType = typename FPBits::UIntType; - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { T x = static_cast(FPBits(v)); if (isnan(x) || isinf(x) || x == 0.0l) continue; diff --git a/libc/test/src/math/ModfTest.h b/libc/test/src/math/ModfTest.h index a3f0c4108207..4e88c6ae3654 100644 --- a/libc/test/src/math/ModfTest.h +++ b/libc/test/src/math/ModfTest.h @@ -84,9 +84,9 @@ template class ModfTest : public __llvm_libc::testing::Test { } void testRange(ModfFunc func) { - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { T x = T(FPBits(v)); if (isnan(x) || isinf(x) || x == T(0.0)) continue; diff --git a/libc/test/src/math/NextAfterTest.h b/libc/test/src/math/NextAfterTest.h index 4b7b5f4997ee..201d16433704 100644 --- a/libc/test/src/math/NextAfterTest.h +++ b/libc/test/src/math/NextAfterTest.h @@ -22,7 +22,7 @@ class NextAfterTestTemplate : public __llvm_libc::testing::Test { using MantissaWidth = __llvm_libc::fputil::MantissaWidth; using UIntType = typename FPBits::UIntType; - static constexpr int bitWidthOfType = + static constexpr int BIT_WIDTH_OF_TYPE = __llvm_libc::fputil::FloatProperties::BIT_WIDTH; const T zero = T(FPBits::zero()); @@ -30,10 +30,10 @@ class NextAfterTestTemplate : public __llvm_libc::testing::Test { const T inf = T(FPBits::inf()); const T neg_inf = T(FPBits::neg_inf()); const T nan = T(FPBits::build_nan(1)); - const UIntType MIN_SUBNORMAL = FPBits::MIN_SUBNORMAL; - const UIntType MAX_SUBNORMAL = FPBits::MAX_SUBNORMAL; - const UIntType MIN_NORMAL = FPBits::MIN_NORMAL; - const UIntType MAX_NORMAL = FPBits::MAX_NORMAL; + const UIntType min_subnormal = FPBits::MIN_SUBNORMAL; + const UIntType max_subnormal = FPBits::MAX_SUBNORMAL; + const UIntType min_normal = FPBits::MIN_NORMAL; + const UIntType max_normal = FPBits::MAX_NORMAL; public: typedef T (*NextAfterFunc)(T, T); @@ -50,89 +50,91 @@ class NextAfterTestTemplate : public __llvm_libc::testing::Test { // 'from' is zero|neg_zero. T x = zero; T result = func(x, T(1)); - UIntType expectedBits = 1; - T expected = *reinterpret_cast(&expectedBits); + UIntType expected_bits = 1; + T expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); result = func(x, T(-1)); - expectedBits = (UIntType(1) << (bitWidthOfType - 1)) + 1; - expected = *reinterpret_cast(&expectedBits); + expected_bits = (UIntType(1) << (BIT_WIDTH_OF_TYPE - 1)) + 1; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); x = neg_zero; result = func(x, 1); - expectedBits = 1; - expected = *reinterpret_cast(&expectedBits); + expected_bits = 1; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); result = func(x, -1); - expectedBits = (UIntType(1) << (bitWidthOfType - 1)) + 1; - expected = *reinterpret_cast(&expectedBits); + expected_bits = (UIntType(1) << (BIT_WIDTH_OF_TYPE - 1)) + 1; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); // 'from' is max subnormal value. - x = *reinterpret_cast(&MAX_SUBNORMAL); + x = *reinterpret_cast(&max_subnormal); result = func(x, 1); - expected = *reinterpret_cast(&MIN_NORMAL); + expected = *reinterpret_cast(&min_normal); ASSERT_FP_EQ(result, expected); result = func(x, 0); - expectedBits = MAX_SUBNORMAL - 1; - expected = *reinterpret_cast(&expectedBits); + expected_bits = max_subnormal - 1; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); x = -x; result = func(x, -1); - expectedBits = (UIntType(1) << (bitWidthOfType - 1)) + MIN_NORMAL; - expected = *reinterpret_cast(&expectedBits); + expected_bits = (UIntType(1) << (BIT_WIDTH_OF_TYPE - 1)) + min_normal; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); result = func(x, 0); - expectedBits = (UIntType(1) << (bitWidthOfType - 1)) + MAX_SUBNORMAL - 1; - expected = *reinterpret_cast(&expectedBits); + expected_bits = + (UIntType(1) << (BIT_WIDTH_OF_TYPE - 1)) + max_subnormal - 1; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); // 'from' is min subnormal value. - x = *reinterpret_cast(&MIN_SUBNORMAL); + x = *reinterpret_cast(&min_subnormal); result = func(x, 1); - expectedBits = MIN_SUBNORMAL + 1; - expected = *reinterpret_cast(&expectedBits); + expected_bits = min_subnormal + 1; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); ASSERT_FP_EQ(func(x, 0), 0); x = -x; result = func(x, -1); - expectedBits = (UIntType(1) << (bitWidthOfType - 1)) + MIN_SUBNORMAL + 1; - expected = *reinterpret_cast(&expectedBits); + expected_bits = + (UIntType(1) << (BIT_WIDTH_OF_TYPE - 1)) + min_subnormal + 1; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); ASSERT_FP_EQ(func(x, 0), T(-0.0)); // 'from' is min normal. - x = *reinterpret_cast(&MIN_NORMAL); + x = *reinterpret_cast(&min_normal); result = func(x, 0); - expectedBits = MAX_SUBNORMAL; - expected = *reinterpret_cast(&expectedBits); + expected_bits = max_subnormal; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); result = func(x, inf); - expectedBits = MIN_NORMAL + 1; - expected = *reinterpret_cast(&expectedBits); + expected_bits = min_normal + 1; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); x = -x; result = func(x, 0); - expectedBits = (UIntType(1) << (bitWidthOfType - 1)) + MAX_SUBNORMAL; - expected = *reinterpret_cast(&expectedBits); + expected_bits = (UIntType(1) << (BIT_WIDTH_OF_TYPE - 1)) + max_subnormal; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); result = func(x, -inf); - expectedBits = (UIntType(1) << (bitWidthOfType - 1)) + MIN_NORMAL + 1; - expected = *reinterpret_cast(&expectedBits); + expected_bits = (UIntType(1) << (BIT_WIDTH_OF_TYPE - 1)) + min_normal + 1; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); // 'from' is max normal and 'to' is infinity. - x = *reinterpret_cast(&MAX_NORMAL); + x = *reinterpret_cast(&max_normal); result = func(x, inf); ASSERT_FP_EQ(result, inf); @@ -142,48 +144,48 @@ class NextAfterTestTemplate : public __llvm_libc::testing::Test { // 'from' is infinity. x = inf; result = func(x, 0); - expectedBits = MAX_NORMAL; - expected = *reinterpret_cast(&expectedBits); + expected_bits = max_normal; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); ASSERT_FP_EQ(func(x, inf), inf); x = neg_inf; result = func(x, 0); - expectedBits = (UIntType(1) << (bitWidthOfType - 1)) + MAX_NORMAL; - expected = *reinterpret_cast(&expectedBits); + expected_bits = (UIntType(1) << (BIT_WIDTH_OF_TYPE - 1)) + max_normal; + expected = *reinterpret_cast(&expected_bits); ASSERT_FP_EQ(result, expected); ASSERT_FP_EQ(func(x, neg_inf), neg_inf); // 'from' is a power of 2. x = T(32.0); result = func(x, 0); - FPBits xBits = FPBits(x); - FPBits resultBits = FPBits(result); - ASSERT_EQ(resultBits.get_unbiased_exponent(), - uint16_t(xBits.get_unbiased_exponent() - 1)); - ASSERT_EQ(resultBits.get_mantissa(), + FPBits x_bits = FPBits(x); + FPBits result_bits = FPBits(result); + ASSERT_EQ(result_bits.get_unbiased_exponent(), + uint16_t(x_bits.get_unbiased_exponent() - 1)); + ASSERT_EQ(result_bits.get_mantissa(), (UIntType(1) << MantissaWidth::VALUE) - 1); result = func(x, T(33.0)); - resultBits = FPBits(result); - ASSERT_EQ(resultBits.get_unbiased_exponent(), - xBits.get_unbiased_exponent()); - ASSERT_EQ(resultBits.get_mantissa(), xBits.get_mantissa() + UIntType(1)); + result_bits = FPBits(result); + ASSERT_EQ(result_bits.get_unbiased_exponent(), + x_bits.get_unbiased_exponent()); + ASSERT_EQ(result_bits.get_mantissa(), x_bits.get_mantissa() + UIntType(1)); x = -x; result = func(x, 0); - resultBits = FPBits(result); - ASSERT_EQ(resultBits.get_unbiased_exponent(), - uint16_t(xBits.get_unbiased_exponent() - 1)); - ASSERT_EQ(resultBits.get_mantissa(), + result_bits = FPBits(result); + ASSERT_EQ(result_bits.get_unbiased_exponent(), + uint16_t(x_bits.get_unbiased_exponent() - 1)); + ASSERT_EQ(result_bits.get_mantissa(), (UIntType(1) << MantissaWidth::VALUE) - 1); result = func(x, T(-33.0)); - resultBits = FPBits(result); - ASSERT_EQ(resultBits.get_unbiased_exponent(), - xBits.get_unbiased_exponent()); - ASSERT_EQ(resultBits.get_mantissa(), xBits.get_mantissa() + UIntType(1)); + result_bits = FPBits(result); + ASSERT_EQ(result_bits.get_unbiased_exponent(), + x_bits.get_unbiased_exponent()); + ASSERT_EQ(result_bits.get_mantissa(), x_bits.get_mantissa() + UIntType(1)); } }; diff --git a/libc/test/src/math/RIntTest.h b/libc/test/src/math/RIntTest.h index 53844a1eaa5f..7dc677059f19 100644 --- a/libc/test/src/math/RIntTest.h +++ b/libc/test/src/math/RIntTest.h @@ -21,8 +21,8 @@ namespace mpfr = __llvm_libc::testing::mpfr; -static constexpr int roundingModes[4] = {FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO, - FE_TONEAREST}; +static constexpr int ROUNDING_MODES[4] = {FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO, + FE_TONEAREST}; template class RIntTestTemplate : public __llvm_libc::testing::Test { @@ -39,7 +39,7 @@ class RIntTestTemplate : public __llvm_libc::testing::Test { const T neg_inf = T(FPBits::neg_inf()); const T nan = T(FPBits::build_nan(1)); - static inline mpfr::RoundingMode toMPFRRoundingMode(int mode) { + static inline mpfr::RoundingMode to_mpfr_rounding_mode(int mode) { switch (mode) { case FE_UPWARD: return mpfr::RoundingMode::Upward; @@ -56,7 +56,7 @@ class RIntTestTemplate : public __llvm_libc::testing::Test { public: void testSpecialNumbers(RIntFunc func) { - for (int mode : roundingModes) { + for (int mode : ROUNDING_MODES) { __llvm_libc::fputil::set_round(mode); ASSERT_FP_EQ(inf, func(inf)); ASSERT_FP_EQ(neg_inf, func(neg_inf)); @@ -67,50 +67,50 @@ class RIntTestTemplate : public __llvm_libc::testing::Test { } void testRoundNumbers(RIntFunc func) { - for (int mode : roundingModes) { + for (int mode : ROUNDING_MODES) { __llvm_libc::fputil::set_round(mode); - mpfr::RoundingMode mpfrMode = toMPFRRoundingMode(mode); - ASSERT_FP_EQ(func(T(1.0)), mpfr::Round(T(1.0), mpfrMode)); - ASSERT_FP_EQ(func(T(-1.0)), mpfr::Round(T(-1.0), mpfrMode)); - ASSERT_FP_EQ(func(T(10.0)), mpfr::Round(T(10.0), mpfrMode)); - ASSERT_FP_EQ(func(T(-10.0)), mpfr::Round(T(-10.0), mpfrMode)); - ASSERT_FP_EQ(func(T(1234.0)), mpfr::Round(T(1234.0), mpfrMode)); - ASSERT_FP_EQ(func(T(-1234.0)), mpfr::Round(T(-1234.0), mpfrMode)); + mpfr::RoundingMode mpfr_mode = to_mpfr_rounding_mode(mode); + ASSERT_FP_EQ(func(T(1.0)), mpfr::Round(T(1.0), mpfr_mode)); + ASSERT_FP_EQ(func(T(-1.0)), mpfr::Round(T(-1.0), mpfr_mode)); + ASSERT_FP_EQ(func(T(10.0)), mpfr::Round(T(10.0), mpfr_mode)); + ASSERT_FP_EQ(func(T(-10.0)), mpfr::Round(T(-10.0), mpfr_mode)); + ASSERT_FP_EQ(func(T(1234.0)), mpfr::Round(T(1234.0), mpfr_mode)); + ASSERT_FP_EQ(func(T(-1234.0)), mpfr::Round(T(-1234.0), mpfr_mode)); } } void testFractions(RIntFunc func) { - for (int mode : roundingModes) { + for (int mode : ROUNDING_MODES) { __llvm_libc::fputil::set_round(mode); - mpfr::RoundingMode mpfrMode = toMPFRRoundingMode(mode); - ASSERT_FP_EQ(func(T(0.5)), mpfr::Round(T(0.5), mpfrMode)); - ASSERT_FP_EQ(func(T(-0.5)), mpfr::Round(T(-0.5), mpfrMode)); - ASSERT_FP_EQ(func(T(0.115)), mpfr::Round(T(0.115), mpfrMode)); - ASSERT_FP_EQ(func(T(-0.115)), mpfr::Round(T(-0.115), mpfrMode)); - ASSERT_FP_EQ(func(T(0.715)), mpfr::Round(T(0.715), mpfrMode)); - ASSERT_FP_EQ(func(T(-0.715)), mpfr::Round(T(-0.715), mpfrMode)); + mpfr::RoundingMode mpfr_mode = to_mpfr_rounding_mode(mode); + ASSERT_FP_EQ(func(T(0.5)), mpfr::Round(T(0.5), mpfr_mode)); + ASSERT_FP_EQ(func(T(-0.5)), mpfr::Round(T(-0.5), mpfr_mode)); + ASSERT_FP_EQ(func(T(0.115)), mpfr::Round(T(0.115), mpfr_mode)); + ASSERT_FP_EQ(func(T(-0.115)), mpfr::Round(T(-0.115), mpfr_mode)); + ASSERT_FP_EQ(func(T(0.715)), mpfr::Round(T(0.715), mpfr_mode)); + ASSERT_FP_EQ(func(T(-0.715)), mpfr::Round(T(-0.715), mpfr_mode)); } } void testSubnormalRange(RIntFunc func) { - constexpr UIntType count = 1000001; - constexpr UIntType step = - (FPBits::MAX_SUBNORMAL - FPBits::MIN_SUBNORMAL) / count; + constexpr UIntType COUNT = 1000001; + constexpr UIntType STEP = + (FPBits::MAX_SUBNORMAL - FPBits::MIN_SUBNORMAL) / COUNT; for (UIntType i = FPBits::MIN_SUBNORMAL; i <= FPBits::MAX_SUBNORMAL; - i += step) { + i += STEP) { T x = T(FPBits(i)); - for (int mode : roundingModes) { + for (int mode : ROUNDING_MODES) { __llvm_libc::fputil::set_round(mode); - mpfr::RoundingMode mpfrMode = toMPFRRoundingMode(mode); - ASSERT_FP_EQ(func(x), mpfr::Round(x, mpfrMode)); + mpfr::RoundingMode mpfr_mode = to_mpfr_rounding_mode(mode); + ASSERT_FP_EQ(func(x), mpfr::Round(x, mpfr_mode)); } } } void testNormalRange(RIntFunc func) { - constexpr UIntType count = 1000001; - constexpr UIntType step = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / count; - for (UIntType i = FPBits::MIN_NORMAL; i <= FPBits::MAX_NORMAL; i += step) { + constexpr UIntType COUNT = 1000001; + constexpr UIntType STEP = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / COUNT; + for (UIntType i = FPBits::MIN_NORMAL; i <= FPBits::MAX_NORMAL; i += STEP) { T x = T(FPBits(i)); // In normal range on x86 platforms, the long double implicit 1 bit can be // zero making the numbers NaN. We will skip them. @@ -118,10 +118,10 @@ class RIntTestTemplate : public __llvm_libc::testing::Test { continue; } - for (int mode : roundingModes) { + for (int mode : ROUNDING_MODES) { __llvm_libc::fputil::set_round(mode); - mpfr::RoundingMode mpfrMode = toMPFRRoundingMode(mode); - ASSERT_FP_EQ(func(x), mpfr::Round(x, mpfrMode)); + mpfr::RoundingMode mpfr_mode = to_mpfr_rounding_mode(mode); + ASSERT_FP_EQ(func(x), mpfr::Round(x, mpfr_mode)); } } } diff --git a/libc/test/src/math/RemQuoTest.h b/libc/test/src/math/RemQuoTest.h index e96ddd759bd6..5903760c1969 100644 --- a/libc/test/src/math/RemQuoTest.h +++ b/libc/test/src/math/RemQuoTest.h @@ -95,12 +95,12 @@ class RemQuoTestTemplate : public __llvm_libc::testing::Test { } void testSubnormalRange(RemQuoFunc func) { - constexpr UIntType count = 1000001; - constexpr UIntType step = - (FPBits::MAX_SUBNORMAL - FPBits::MIN_SUBNORMAL) / count; + constexpr UIntType COUNT = 1000001; + constexpr UIntType STEP = + (FPBits::MAX_SUBNORMAL - FPBits::MIN_SUBNORMAL) / COUNT; for (UIntType v = FPBits::MIN_SUBNORMAL, w = FPBits::MAX_SUBNORMAL; v <= FPBits::MAX_SUBNORMAL && w >= FPBits::MIN_SUBNORMAL; - v += step, w -= step) { + v += STEP, w -= STEP) { T x = T(FPBits(v)), y = T(FPBits(w)); mpfr::BinaryOutput result; mpfr::BinaryInput input{x, y}; @@ -110,11 +110,11 @@ class RemQuoTestTemplate : public __llvm_libc::testing::Test { } void testNormalRange(RemQuoFunc func) { - constexpr UIntType count = 1000001; - constexpr UIntType step = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / count; + constexpr UIntType COUNT = 1000001; + constexpr UIntType STEP = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / COUNT; for (UIntType v = FPBits::MIN_NORMAL, w = FPBits::MAX_NORMAL; v <= FPBits::MAX_NORMAL && w >= FPBits::MIN_NORMAL; - v += step, w -= step) { + v += STEP, w -= STEP) { T x = T(FPBits(v)), y = T(FPBits(w)); mpfr::BinaryOutput result; mpfr::BinaryInput input{x, y}; diff --git a/libc/test/src/math/RoundTest.h b/libc/test/src/math/RoundTest.h index d4fea629507f..4720f385edab 100644 --- a/libc/test/src/math/RoundTest.h +++ b/libc/test/src/math/RoundTest.h @@ -64,9 +64,9 @@ template class RoundTest : public __llvm_libc::testing::Test { } void testRange(RoundFunc func) { - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { T x = T(FPBits(v)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/RoundToIntegerTest.h b/libc/test/src/math/RoundToIntegerTest.h index b77c4bffc828..e95f96e92ab0 100644 --- a/libc/test/src/math/RoundToIntegerTest.h +++ b/libc/test/src/math/RoundToIntegerTest.h @@ -23,8 +23,8 @@ namespace mpfr = __llvm_libc::testing::mpfr; -static constexpr int roundingModes[4] = {FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO, - FE_TONEAREST}; +static constexpr int ROUNDING_MODES[4] = {FE_UPWARD, FE_DOWNWARD, FE_TOWARDZERO, + FE_TONEAREST}; template class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test { @@ -40,11 +40,11 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test { const F inf = F(__llvm_libc::fputil::FPBits::inf()); const F neg_inf = F(__llvm_libc::fputil::FPBits::neg_inf()); const F nan = F(__llvm_libc::fputil::FPBits::build_nan(1)); - static constexpr I IntegerMin = I(1) << (sizeof(I) * 8 - 1); - static constexpr I IntegerMax = -(IntegerMin + 1); + static constexpr I INTEGER_MIN = I(1) << (sizeof(I) * 8 - 1); + static constexpr I INTEGER_MAX = -(INTEGER_MIN + 1); - void testOneInput(RoundToIntegerFunc func, F input, I expected, - bool expectError) { + void test_one_input(RoundToIntegerFunc func, F input, I expected, + bool expectError) { #if math_errhandling & MATH_ERRNO errno = 0; #endif @@ -71,7 +71,7 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test { } } - static inline mpfr::RoundingMode toMPFRRoundingMode(int mode) { + static inline mpfr::RoundingMode to_mpfr_rounding_mode(int mode) { switch (mode) { case FE_UPWARD: return mpfr::RoundingMode::Upward; @@ -96,32 +96,32 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test { #endif } - void doInfinityAndNaNTest(RoundToIntegerFunc func) { - testOneInput(func, inf, IntegerMax, true); - testOneInput(func, neg_inf, IntegerMin, true); - testOneInput(func, nan, IntegerMax, true); + void do_infinity_and_na_n_test(RoundToIntegerFunc func) { + test_one_input(func, inf, INTEGER_MAX, true); + test_one_input(func, neg_inf, INTEGER_MIN, true); + test_one_input(func, nan, INTEGER_MAX, true); } void testInfinityAndNaN(RoundToIntegerFunc func) { if (TestModes) { - for (int mode : roundingModes) { + for (int mode : ROUNDING_MODES) { __llvm_libc::fputil::set_round(mode); - doInfinityAndNaNTest(func); + do_infinity_and_na_n_test(func); } } else { - doInfinityAndNaNTest(func); + do_infinity_and_na_n_test(func); } } - void doRoundNumbersTest(RoundToIntegerFunc func) { - testOneInput(func, zero, I(0), false); - testOneInput(func, neg_zero, I(0), false); - testOneInput(func, F(1.0), I(1), false); - testOneInput(func, F(-1.0), I(-1), false); - testOneInput(func, F(10.0), I(10), false); - testOneInput(func, F(-10.0), I(-10), false); - testOneInput(func, F(1234.0), I(1234), false); - testOneInput(func, F(-1234.0), I(-1234), false); + void do_round_numbers_test(RoundToIntegerFunc func) { + test_one_input(func, zero, I(0), false); + test_one_input(func, neg_zero, I(0), false); + test_one_input(func, F(1.0), I(1), false); + test_one_input(func, F(-1.0), I(-1), false); + test_one_input(func, F(10.0), I(10), false); + test_one_input(func, F(-10.0), I(-10), false); + test_one_input(func, F(1234.0), I(1234), false); + test_one_input(func, F(-1234.0), I(-1234), false); // The rest of this this function compares with an equivalent MPFR function // which rounds floating point numbers to long values. There is no MPFR @@ -131,58 +131,58 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test { if (sizeof(I) > sizeof(long)) return; - constexpr int exponentLimit = sizeof(I) * 8 - 1; + constexpr int EXPONENT_LIMIT = sizeof(I) * 8 - 1; // We start with 1.0 so that the implicit bit for x86 long doubles // is set. FPBits bits(F(1.0)); - bits.set_unbiased_exponent(exponentLimit + FPBits::EXPONENT_BIAS); + bits.set_unbiased_exponent(EXPONENT_LIMIT + FPBits::EXPONENT_BIAS); bits.set_sign(1); bits.set_mantissa(0); F x = F(bits); - long mpfrResult; - bool erangeflag = mpfr::RoundToLong(x, mpfrResult); + long mpfr_result; + bool erangeflag = mpfr::RoundToLong(x, mpfr_result); ASSERT_FALSE(erangeflag); - testOneInput(func, x, mpfrResult, false); + test_one_input(func, x, mpfr_result, false); } void testRoundNumbers(RoundToIntegerFunc func) { if (TestModes) { - for (int mode : roundingModes) { + for (int mode : ROUNDING_MODES) { __llvm_libc::fputil::set_round(mode); - doRoundNumbersTest(func); + do_round_numbers_test(func); } } else { - doRoundNumbersTest(func); + do_round_numbers_test(func); } } - void doFractionsTest(RoundToIntegerFunc func, int mode) { - constexpr F fractions[] = {0.5, -0.5, 0.115, -0.115, 0.715, -0.715}; - for (F x : fractions) { - long mpfrLongResult; + void do_fractions_test(RoundToIntegerFunc func, int mode) { + constexpr F FRACTIONS[] = {0.5, -0.5, 0.115, -0.115, 0.715, -0.715}; + for (F x : FRACTIONS) { + long mpfr_long_result; bool erangeflag; if (TestModes) erangeflag = - mpfr::RoundToLong(x, toMPFRRoundingMode(mode), mpfrLongResult); + mpfr::RoundToLong(x, to_mpfr_rounding_mode(mode), mpfr_long_result); else - erangeflag = mpfr::RoundToLong(x, mpfrLongResult); + erangeflag = mpfr::RoundToLong(x, mpfr_long_result); ASSERT_FALSE(erangeflag); - I mpfrResult = mpfrLongResult; - testOneInput(func, x, mpfrResult, false); + I mpfr_result = mpfr_long_result; + test_one_input(func, x, mpfr_result, false); } } void testFractions(RoundToIntegerFunc func) { if (TestModes) { - for (int mode : roundingModes) { + for (int mode : ROUNDING_MODES) { __llvm_libc::fputil::set_round(mode); - doFractionsTest(func, mode); + do_fractions_test(func, mode); } } else { // Passing 0 for mode has no effect as it is not used in doFractionsTest // when `TestModes` is false; - doFractionsTest(func, 0); + do_fractions_test(func, 0); } } @@ -195,39 +195,39 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test { if (sizeof(I) > sizeof(long)) return; - constexpr int exponentLimit = sizeof(I) * 8 - 1; + constexpr int EXPONENT_LIMIT = sizeof(I) * 8 - 1; // We start with 1.0 so that the implicit bit for x86 long doubles // is set. FPBits bits(F(1.0)); - bits.set_unbiased_exponent(exponentLimit + FPBits::EXPONENT_BIAS); + bits.set_unbiased_exponent(EXPONENT_LIMIT + FPBits::EXPONENT_BIAS); bits.set_sign(1); bits.set_mantissa(UIntType(0x1) << (__llvm_libc::fputil::MantissaWidth::VALUE - 1)); F x = F(bits); if (TestModes) { - for (int m : roundingModes) { + for (int m : ROUNDING_MODES) { __llvm_libc::fputil::set_round(m); - long mpfrLongResult; + long mpfr_long_result; bool erangeflag = - mpfr::RoundToLong(x, toMPFRRoundingMode(m), mpfrLongResult); + mpfr::RoundToLong(x, to_mpfr_rounding_mode(m), mpfr_long_result); ASSERT_TRUE(erangeflag); - testOneInput(func, x, IntegerMin, true); + test_one_input(func, x, INTEGER_MIN, true); } } else { - long mpfrLongResult; - bool erangeflag = mpfr::RoundToLong(x, mpfrLongResult); + long mpfr_long_result; + bool erangeflag = mpfr::RoundToLong(x, mpfr_long_result); ASSERT_TRUE(erangeflag); - testOneInput(func, x, IntegerMin, true); + test_one_input(func, x, INTEGER_MIN, true); } } void testSubnormalRange(RoundToIntegerFunc func) { - constexpr UIntType count = 1000001; - constexpr UIntType step = - (FPBits::MAX_SUBNORMAL - FPBits::MIN_SUBNORMAL) / count; + constexpr UIntType COUNT = 1000001; + constexpr UIntType STEP = + (FPBits::MAX_SUBNORMAL - FPBits::MIN_SUBNORMAL) / COUNT; for (UIntType i = FPBits::MIN_SUBNORMAL; i <= FPBits::MAX_SUBNORMAL; - i += step) { + i += STEP) { F x = F(FPBits(i)); if (x == F(0.0)) continue; @@ -235,25 +235,25 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test { if (TestModes) { if (x > 0) { __llvm_libc::fputil::set_round(FE_UPWARD); - testOneInput(func, x, I(1), false); + test_one_input(func, x, I(1), false); __llvm_libc::fputil::set_round(FE_DOWNWARD); - testOneInput(func, x, I(0), false); + test_one_input(func, x, I(0), false); __llvm_libc::fputil::set_round(FE_TOWARDZERO); - testOneInput(func, x, I(0), false); + test_one_input(func, x, I(0), false); __llvm_libc::fputil::set_round(FE_TONEAREST); - testOneInput(func, x, I(0), false); + test_one_input(func, x, I(0), false); } else { __llvm_libc::fputil::set_round(FE_UPWARD); - testOneInput(func, x, I(0), false); + test_one_input(func, x, I(0), false); __llvm_libc::fputil::set_round(FE_DOWNWARD); - testOneInput(func, x, I(-1), false); + test_one_input(func, x, I(-1), false); __llvm_libc::fputil::set_round(FE_TOWARDZERO); - testOneInput(func, x, I(0), false); + test_one_input(func, x, I(0), false); __llvm_libc::fputil::set_round(FE_TONEAREST); - testOneInput(func, x, I(0), false); + test_one_input(func, x, I(0), false); } } else { - testOneInput(func, x, 0L, false); + test_one_input(func, x, 0L, false); } } } @@ -267,9 +267,9 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test { if (sizeof(I) > sizeof(long)) return; - constexpr UIntType count = 1000001; - constexpr UIntType step = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / count; - for (UIntType i = FPBits::MIN_NORMAL; i <= FPBits::MAX_NORMAL; i += step) { + constexpr UIntType COUNT = 1000001; + constexpr UIntType STEP = (FPBits::MAX_NORMAL - FPBits::MIN_NORMAL) / COUNT; + for (UIntType i = FPBits::MIN_NORMAL; i <= FPBits::MAX_NORMAL; i += STEP) { F x = F(FPBits(i)); // In normal range on x86 platforms, the long double implicit 1 bit can be // zero making the numbers NaN. We will skip them. @@ -278,25 +278,25 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test { } if (TestModes) { - for (int m : roundingModes) { - long mpfrLongResult; + for (int m : ROUNDING_MODES) { + long mpfr_long_result; bool erangeflag = - mpfr::RoundToLong(x, toMPFRRoundingMode(m), mpfrLongResult); - I mpfrResult = mpfrLongResult; + mpfr::RoundToLong(x, to_mpfr_rounding_mode(m), mpfr_long_result); + I mpfr_result = mpfr_long_result; __llvm_libc::fputil::set_round(m); if (erangeflag) - testOneInput(func, x, x > 0 ? IntegerMax : IntegerMin, true); + test_one_input(func, x, x > 0 ? INTEGER_MAX : INTEGER_MIN, true); else - testOneInput(func, x, mpfrResult, false); + test_one_input(func, x, mpfr_result, false); } } else { - long mpfrLongResult; - bool erangeflag = mpfr::RoundToLong(x, mpfrLongResult); - I mpfrResult = mpfrLongResult; + long mpfr_long_result; + bool erangeflag = mpfr::RoundToLong(x, mpfr_long_result); + I mpfr_result = mpfr_long_result; if (erangeflag) - testOneInput(func, x, x > 0 ? IntegerMax : IntegerMin, true); + test_one_input(func, x, x > 0 ? INTEGER_MAX : INTEGER_MIN, true); else - testOneInput(func, x, mpfrResult, false); + test_one_input(func, x, mpfr_result, false); } } } diff --git a/libc/test/src/math/SqrtTest.h b/libc/test/src/math/SqrtTest.h index a6120a2c51eb..f3e4def723aa 100644 --- a/libc/test/src/math/SqrtTest.h +++ b/libc/test/src/math/SqrtTest.h @@ -18,7 +18,7 @@ template class SqrtTest : public __llvm_libc::testing::Test { DECLARE_SPECIAL_CONSTANTS(T) - static constexpr UIntType HiddenBit = + static constexpr UIntType HIDDEN_BIT = UIntType(1) << __llvm_libc::fputil::MantissaWidth::VALUE; public: @@ -37,7 +37,7 @@ template class SqrtTest : public __llvm_libc::testing::Test { } void testDenormalValues(SqrtFunc func) { - for (UIntType mant = 1; mant < HiddenBit; mant <<= 1) { + for (UIntType mant = 1; mant < HIDDEN_BIT; mant <<= 1) { FPBits denormal(T(0.0)); denormal.set_mantissa(mant); @@ -45,18 +45,18 @@ template class SqrtTest : public __llvm_libc::testing::Test { T(0.5)); } - constexpr UIntType count = 1'000'001; - constexpr UIntType step = HiddenBit / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 1'000'001; + constexpr UIntType STEP = HIDDEN_BIT / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { T x = *reinterpret_cast(&v); ASSERT_MPFR_MATCH(mpfr::Operation::Sqrt, x, func(x), 0.5); } } void testNormalRange(SqrtFunc func) { - constexpr UIntType count = 10'000'001; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10'000'001; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { T x = *reinterpret_cast(&v); if (isnan(x) || (x < 0)) { continue; diff --git a/libc/test/src/math/TruncTest.h b/libc/test/src/math/TruncTest.h index 6e65f3d7888d..f939de40e24d 100644 --- a/libc/test/src/math/TruncTest.h +++ b/libc/test/src/math/TruncTest.h @@ -64,9 +64,9 @@ template class TruncTest : public __llvm_libc::testing::Test { } void testRange(TruncFunc func) { - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { T x = T(FPBits(v)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/cos_test.cpp b/libc/test/src/math/cos_test.cpp index e99cb39ad1e8..0feef88ca17f 100644 --- a/libc/test/src/math/cos_test.cpp +++ b/libc/test/src/math/cos_test.cpp @@ -19,9 +19,9 @@ DECLARE_SPECIAL_CONSTANTS(double) TEST(LlvmLibccosTest, Range) { static constexpr double _2pi = 6.283185307179586; - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { double x = double(FPBits(v)); // TODO: Expand the range of testing after range reduction is implemented. if (isnan(x) || isinf(x) || x > _2pi || x < -_2pi) diff --git a/libc/test/src/math/cosf_test.cpp b/libc/test/src/math/cosf_test.cpp index be968d6ea35c..8e5f6c0dcfe8 100644 --- a/libc/test/src/math/cosf_test.cpp +++ b/libc/test/src/math/cosf_test.cpp @@ -18,7 +18,7 @@ #include #include -using __llvm_libc::testing::sdcomp26094Values; +using __llvm_libc::testing::SDCOMP26094_VALUES; using FPBits = __llvm_libc::fputil::FPBits; namespace mpfr = __llvm_libc::testing::mpfr; @@ -47,9 +47,9 @@ TEST(LlvmLibcCosfTest, SpecialNumbers) { } TEST(LlvmLibcCosfTest, InFloatRange) { - constexpr uint32_t count = 1000000; - constexpr uint32_t step = UINT32_MAX / count; - for (uint32_t i = 0, v = 0; i <= count; ++i, v += step) { + constexpr uint32_t COUNT = 1000000; + constexpr uint32_t STEP = UINT32_MAX / COUNT; + for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { float x = float(FPBits(v)); if (isnan(x) || isinf(x)) continue; @@ -73,7 +73,7 @@ TEST(LlvmLibcCosfTest, SmallValues) { // SDCOMP-26094: check cosf in the cases for which the range reducer // returns values furthest beyond its nominal upper bound of pi/4. TEST(LlvmLibcCosfTest, SDCOMP_26094) { - for (uint32_t v : sdcomp26094Values) { + for (uint32_t v : SDCOMP26094_VALUES) { float x = float(FPBits(v)); ASSERT_MPFR_MATCH(mpfr::Operation::Cos, x, __llvm_libc::cosf(x), 1.0); } diff --git a/libc/test/src/math/exp2f_test.cpp b/libc/test/src/math/exp2f_test.cpp index 7db1a576f554..f3a4cc413e1f 100644 --- a/libc/test/src/math/exp2f_test.cpp +++ b/libc/test/src/math/exp2f_test.cpp @@ -101,9 +101,9 @@ TEST(LlvmLibcExpfTest, Underflow) { } TEST(LlvmLibcexp2fTest, InFloatRange) { - constexpr uint32_t count = 1000000; - constexpr uint32_t step = UINT32_MAX / count; - for (uint32_t i = 0, v = 0; i <= count; ++i, v += step) { + constexpr uint32_t COUNT = 1000000; + constexpr uint32_t STEP = UINT32_MAX / COUNT; + for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { float x = float(FPBits(v)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/expf_test.cpp b/libc/test/src/math/expf_test.cpp index 68d7dff462f5..d675fc1a9534 100644 --- a/libc/test/src/math/expf_test.cpp +++ b/libc/test/src/math/expf_test.cpp @@ -93,9 +93,9 @@ TEST(LlvmLibcExpfTest, Borderline) { } TEST(LlvmLibcExpfTest, InFloatRange) { - constexpr uint32_t count = 1000000; - constexpr uint32_t step = UINT32_MAX / count; - for (uint32_t i = 0, v = 0; i <= count; ++i, v += step) { + constexpr uint32_t COUNT = 1000000; + constexpr uint32_t STEP = UINT32_MAX / COUNT; + for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { float x = float(FPBits(v)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/expm1f_test.cpp b/libc/test/src/math/expm1f_test.cpp index cec3080962bf..b8439192151a 100644 --- a/libc/test/src/math/expm1f_test.cpp +++ b/libc/test/src/math/expm1f_test.cpp @@ -93,9 +93,9 @@ TEST(LlvmLibcExpm1fTest, Borderline) { } TEST(LlvmLibcExpm1fTest, InFloatRange) { - constexpr uint32_t count = 1000000; - constexpr uint32_t step = UINT32_MAX / count; - for (uint32_t i = 0, v = 0; i <= count; ++i, v += step) { + constexpr uint32_t COUNT = 1000000; + constexpr uint32_t STEP = UINT32_MAX / COUNT; + for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { float x = float(FPBits(v)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/fdim_test.cpp b/libc/test/src/math/fdim_test.cpp index a4726f28a382..06b61c2124d2 100644 --- a/libc/test/src/math/fdim_test.cpp +++ b/libc/test/src/math/fdim_test.cpp @@ -16,14 +16,16 @@ using LlvmLibcFDimTest = FDimTestTemplate; -TEST_F(LlvmLibcFDimTest, NaNArg_fdim) { testNaNArg(&__llvm_libc::fdim); } +TEST_F(LlvmLibcFDimTest, NaNArg_fdim) { test_na_n_arg(&__llvm_libc::fdim); } -TEST_F(LlvmLibcFDimTest, InfArg_fdim) { testInfArg(&__llvm_libc::fdim); } +TEST_F(LlvmLibcFDimTest, InfArg_fdim) { test_inf_arg(&__llvm_libc::fdim); } -TEST_F(LlvmLibcFDimTest, NegInfArg_fdim) { testNegInfArg(&__llvm_libc::fdim); } +TEST_F(LlvmLibcFDimTest, NegInfArg_fdim) { + test_neg_inf_arg(&__llvm_libc::fdim); +} -TEST_F(LlvmLibcFDimTest, BothZero_fdim) { testBothZero(&__llvm_libc::fdim); } +TEST_F(LlvmLibcFDimTest, BothZero_fdim) { test_both_zero(&__llvm_libc::fdim); } TEST_F(LlvmLibcFDimTest, InDoubleRange_fdim) { - testInRange(&__llvm_libc::fdim); + test_in_range(&__llvm_libc::fdim); } diff --git a/libc/test/src/math/fdimf_test.cpp b/libc/test/src/math/fdimf_test.cpp index 45f960cc8955..c85b755738f3 100644 --- a/libc/test/src/math/fdimf_test.cpp +++ b/libc/test/src/math/fdimf_test.cpp @@ -16,16 +16,18 @@ using LlvmLibcFDimTest = FDimTestTemplate; -TEST_F(LlvmLibcFDimTest, NaNArg_fdimf) { testNaNArg(&__llvm_libc::fdimf); } +TEST_F(LlvmLibcFDimTest, NaNArg_fdimf) { test_na_n_arg(&__llvm_libc::fdimf); } -TEST_F(LlvmLibcFDimTest, InfArg_fdimf) { testInfArg(&__llvm_libc::fdimf); } +TEST_F(LlvmLibcFDimTest, InfArg_fdimf) { test_inf_arg(&__llvm_libc::fdimf); } TEST_F(LlvmLibcFDimTest, NegInfArg_fdimf) { - testNegInfArg(&__llvm_libc::fdimf); + test_neg_inf_arg(&__llvm_libc::fdimf); } -TEST_F(LlvmLibcFDimTest, BothZero_fdimf) { testBothZero(&__llvm_libc::fdimf); } +TEST_F(LlvmLibcFDimTest, BothZero_fdimf) { + test_both_zero(&__llvm_libc::fdimf); +} TEST_F(LlvmLibcFDimTest, InFloatRange_fdimf) { - testInRange(&__llvm_libc::fdimf); + test_in_range(&__llvm_libc::fdimf); } diff --git a/libc/test/src/math/fdiml_test.cpp b/libc/test/src/math/fdiml_test.cpp index b82819395b35..79427d6a33d2 100644 --- a/libc/test/src/math/fdiml_test.cpp +++ b/libc/test/src/math/fdiml_test.cpp @@ -16,16 +16,18 @@ using LlvmLibcFDimTest = FDimTestTemplate; -TEST_F(LlvmLibcFDimTest, NaNArg_fdiml) { testNaNArg(&__llvm_libc::fdiml); } +TEST_F(LlvmLibcFDimTest, NaNArg_fdiml) { test_na_n_arg(&__llvm_libc::fdiml); } -TEST_F(LlvmLibcFDimTest, InfArg_fdiml) { testInfArg(&__llvm_libc::fdiml); } +TEST_F(LlvmLibcFDimTest, InfArg_fdiml) { test_inf_arg(&__llvm_libc::fdiml); } TEST_F(LlvmLibcFDimTest, NegInfArg_fdiml) { - testNegInfArg(&__llvm_libc::fdiml); + test_neg_inf_arg(&__llvm_libc::fdiml); } -TEST_F(LlvmLibcFDimTest, BothZero_fdiml) { testBothZero(&__llvm_libc::fdiml); } +TEST_F(LlvmLibcFDimTest, BothZero_fdiml) { + test_both_zero(&__llvm_libc::fdiml); +} TEST_F(LlvmLibcFDimTest, InLongDoubleRange_fdiml) { - testInRange(&__llvm_libc::fdiml); + test_in_range(&__llvm_libc::fdiml); } diff --git a/libc/test/src/math/fma_test.cpp b/libc/test/src/math/fma_test.cpp index 520772797edf..a9388cca72f7 100644 --- a/libc/test/src/math/fma_test.cpp +++ b/libc/test/src/math/fma_test.cpp @@ -13,11 +13,11 @@ using LlvmLibcFmaTest = FmaTestTemplate; TEST_F(LlvmLibcFmaTest, SpecialNumbers) { - testSpecialNumbers(&__llvm_libc::fma); + test_special_numbers(&__llvm_libc::fma); } TEST_F(LlvmLibcFmaTest, SubnormalRange) { - testSubnormalRange(&__llvm_libc::fma); + test_subnormal_range(&__llvm_libc::fma); } -TEST_F(LlvmLibcFmaTest, NormalRange) { testNormalRange(&__llvm_libc::fma); } +TEST_F(LlvmLibcFmaTest, NormalRange) { test_normal_range(&__llvm_libc::fma); } diff --git a/libc/test/src/math/fmaf_test.cpp b/libc/test/src/math/fmaf_test.cpp index f58698c95851..73d009499deb 100644 --- a/libc/test/src/math/fmaf_test.cpp +++ b/libc/test/src/math/fmaf_test.cpp @@ -13,11 +13,11 @@ using LlvmLibcFmaTest = FmaTestTemplate; TEST_F(LlvmLibcFmaTest, SpecialNumbers) { - testSpecialNumbers(&__llvm_libc::fmaf); + test_special_numbers(&__llvm_libc::fmaf); } TEST_F(LlvmLibcFmaTest, SubnormalRange) { - testSubnormalRange(&__llvm_libc::fmaf); + test_subnormal_range(&__llvm_libc::fmaf); } -TEST_F(LlvmLibcFmaTest, NormalRange) { testNormalRange(&__llvm_libc::fmaf); } +TEST_F(LlvmLibcFmaTest, NormalRange) { test_normal_range(&__llvm_libc::fmaf); } diff --git a/libc/test/src/math/hypot_test.cpp b/libc/test/src/math/hypot_test.cpp index 928c38a494f6..2d7b640adf10 100644 --- a/libc/test/src/math/hypot_test.cpp +++ b/libc/test/src/math/hypot_test.cpp @@ -13,11 +13,13 @@ using LlvmLibcHypotTest = HypotTestTemplate; TEST_F(LlvmLibcHypotTest, SpecialNumbers) { - testSpecialNumbers(&__llvm_libc::hypot); + test_special_numbers(&__llvm_libc::hypot); } TEST_F(LlvmLibcHypotTest, SubnormalRange) { - testSubnormalRange(&__llvm_libc::hypot); + test_subnormal_range(&__llvm_libc::hypot); } -TEST_F(LlvmLibcHypotTest, NormalRange) { testNormalRange(&__llvm_libc::hypot); } +TEST_F(LlvmLibcHypotTest, NormalRange) { + test_normal_range(&__llvm_libc::hypot); +} diff --git a/libc/test/src/math/hypotf_test.cpp b/libc/test/src/math/hypotf_test.cpp index 94f102efb73f..2e958d63d84a 100644 --- a/libc/test/src/math/hypotf_test.cpp +++ b/libc/test/src/math/hypotf_test.cpp @@ -13,13 +13,13 @@ using LlvmLibcHypotfTest = HypotTestTemplate; TEST_F(LlvmLibcHypotfTest, SpecialNumbers) { - testSpecialNumbers(&__llvm_libc::hypotf); + test_special_numbers(&__llvm_libc::hypotf); } TEST_F(LlvmLibcHypotfTest, SubnormalRange) { - testSubnormalRange(&__llvm_libc::hypotf); + test_subnormal_range(&__llvm_libc::hypotf); } TEST_F(LlvmLibcHypotfTest, NormalRange) { - testNormalRange(&__llvm_libc::hypotf); + test_normal_range(&__llvm_libc::hypotf); } diff --git a/libc/test/src/math/ilogb_test.cpp b/libc/test/src/math/ilogb_test.cpp index 8b8b51cb41be..b049247815d2 100644 --- a/libc/test/src/math/ilogb_test.cpp +++ b/libc/test/src/math/ilogb_test.cpp @@ -16,21 +16,21 @@ #include TEST_F(LlvmLibcILogbTest, SpecialNumbers_ilogb) { - testSpecialNumbers(&__llvm_libc::ilogb); + test_special_numbers(&__llvm_libc::ilogb); } TEST_F(LlvmLibcILogbTest, PowersOfTwo_ilogb) { - testPowersOfTwo(&__llvm_libc::ilogb); + test_powers_of_two(&__llvm_libc::ilogb); } TEST_F(LlvmLibcILogbTest, SomeIntegers_ilogb) { - testSomeIntegers(&__llvm_libc::ilogb); + test_some_integers(&__llvm_libc::ilogb); } TEST_F(LlvmLibcILogbTest, SubnormalRange_ilogb) { - testSubnormalRange(&__llvm_libc::ilogb); + test_subnormal_range(&__llvm_libc::ilogb); } TEST_F(LlvmLibcILogbTest, NormalRange_ilogb) { - testNormalRange(&__llvm_libc::ilogb); + test_normal_range(&__llvm_libc::ilogb); } diff --git a/libc/test/src/math/ilogbf_test.cpp b/libc/test/src/math/ilogbf_test.cpp index 9fabc01ea969..4b330a8fbbd2 100644 --- a/libc/test/src/math/ilogbf_test.cpp +++ b/libc/test/src/math/ilogbf_test.cpp @@ -16,21 +16,21 @@ #include TEST_F(LlvmLibcILogbTest, SpecialNumbers_ilogbf) { - testSpecialNumbers(&__llvm_libc::ilogbf); + test_special_numbers(&__llvm_libc::ilogbf); } TEST_F(LlvmLibcILogbTest, PowersOfTwo_ilogbf) { - testPowersOfTwo(&__llvm_libc::ilogbf); + test_powers_of_two(&__llvm_libc::ilogbf); } TEST_F(LlvmLibcILogbTest, SomeIntegers_ilogbf) { - testSomeIntegers(&__llvm_libc::ilogbf); + test_some_integers(&__llvm_libc::ilogbf); } TEST_F(LlvmLibcILogbTest, SubnormalRange_ilogbf) { - testSubnormalRange(&__llvm_libc::ilogbf); + test_subnormal_range(&__llvm_libc::ilogbf); } TEST_F(LlvmLibcILogbTest, NormalRange_ilogbf) { - testNormalRange(&__llvm_libc::ilogbf); + test_normal_range(&__llvm_libc::ilogbf); } diff --git a/libc/test/src/math/ilogbl_test.cpp b/libc/test/src/math/ilogbl_test.cpp index f36746b6f64f..5979c96517ab 100644 --- a/libc/test/src/math/ilogbl_test.cpp +++ b/libc/test/src/math/ilogbl_test.cpp @@ -18,21 +18,21 @@ using RunContext = __llvm_libc::testing::RunContext; TEST_F(LlvmLibcILogbTest, SpecialNumbers_ilogbl) { - testSpecialNumbers(&__llvm_libc::ilogbl); + test_special_numbers(&__llvm_libc::ilogbl); } TEST_F(LlvmLibcILogbTest, PowersOfTwo_ilogbl) { - testPowersOfTwo(&__llvm_libc::ilogbl); + test_powers_of_two(&__llvm_libc::ilogbl); } TEST_F(LlvmLibcILogbTest, SomeIntegers_ilogbl) { - testSomeIntegers(&__llvm_libc::ilogbl); + test_some_integers(&__llvm_libc::ilogbl); } TEST_F(LlvmLibcILogbTest, SubnormalRange_ilogbl) { - testSubnormalRange(&__llvm_libc::ilogbl); + test_subnormal_range(&__llvm_libc::ilogbl); } TEST_F(LlvmLibcILogbTest, NormalRange_ilogbl) { - testNormalRange(&__llvm_libc::ilogbl); + test_normal_range(&__llvm_libc::ilogbl); } diff --git a/libc/test/src/math/logf_test.cpp b/libc/test/src/math/logf_test.cpp index a3ada693a3cb..eb4da810486d 100644 --- a/libc/test/src/math/logf_test.cpp +++ b/libc/test/src/math/logf_test.cpp @@ -45,9 +45,9 @@ TEST(LlvmLibcLogfTest, TrickyInputs) { } TEST(LlvmLibcLogfTest, InFloatRange) { - constexpr uint32_t count = 1000000; - constexpr uint32_t step = UINT32_MAX / count; - for (uint32_t i = 0, v = 0; i <= count; ++i, v += step) { + constexpr uint32_t COUNT = 1000000; + constexpr uint32_t STEP = UINT32_MAX / COUNT; + for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { float x = float(FPBits(v)); if (isnan(x) || isinf(x)) continue; diff --git a/libc/test/src/math/sdcomp26094.h b/libc/test/src/math/sdcomp26094.h index c42c12c237af..316288441f94 100644 --- a/libc/test/src/math/sdcomp26094.h +++ b/libc/test/src/math/sdcomp26094.h @@ -16,7 +16,7 @@ namespace __llvm_libc { namespace testing { -static constexpr __llvm_libc::cpp::Array sdcomp26094Values{ +static constexpr __llvm_libc::cpp::Array SDCOMP26094_VALUES{ 0x46427f1b, 0x4647e568, 0x46428bac, 0x4647f1f9, 0x4647fe8a, 0x45d8d7f1, 0x45d371a4, 0x45ce0b57, 0x45d35882, 0x45cdf235, }; diff --git a/libc/test/src/math/sin_test.cpp b/libc/test/src/math/sin_test.cpp index bce7a72a8438..e44907488914 100644 --- a/libc/test/src/math/sin_test.cpp +++ b/libc/test/src/math/sin_test.cpp @@ -20,9 +20,9 @@ DECLARE_SPECIAL_CONSTANTS(double) TEST(LlvmLibcSinTest, Range) { static constexpr double _2pi = 6.283185307179586; - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { double x = double(FPBits(v)); // TODO: Expand the range of testing after range reduction is implemented. if (isnan(x) || isinf(x) || x > _2pi || x < -_2pi) diff --git a/libc/test/src/math/sincosf_test.cpp b/libc/test/src/math/sincosf_test.cpp index 886fc1e7dcac..2b4294d29c05 100644 --- a/libc/test/src/math/sincosf_test.cpp +++ b/libc/test/src/math/sincosf_test.cpp @@ -18,7 +18,7 @@ #include #include -using __llvm_libc::testing::sdcomp26094Values; +using __llvm_libc::testing::SDCOMP26094_VALUES; using FPBits = __llvm_libc::fputil::FPBits; namespace mpfr = __llvm_libc::testing::mpfr; @@ -58,9 +58,9 @@ TEST(LlvmLibcSinCosfTest, SpecialNumbers) { } TEST(LlvmLibcSinCosfTest, InFloatRange) { - constexpr uint32_t count = 1000000; - constexpr uint32_t step = UINT32_MAX / count; - for (uint32_t i = 0, v = 0; i <= count; ++i, v += step) { + constexpr uint32_t COUNT = 1000000; + constexpr uint32_t STEP = UINT32_MAX / COUNT; + for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { float x = float(FPBits((v))); if (isnan(x) || isinf(x)) continue; @@ -95,7 +95,7 @@ TEST(LlvmLibcSinCosfTest, SmallValues) { // SDCOMP-26094: check sinf in the cases for which the range reducer // returns values furthest beyond its nominal upper bound of pi/4. TEST(LlvmLibcSinCosfTest, SDCOMP_26094) { - for (uint32_t v : sdcomp26094Values) { + for (uint32_t v : SDCOMP26094_VALUES) { float x = float(FPBits((v))); float sin, cos; __llvm_libc::sincosf(x, &sin, &cos); diff --git a/libc/test/src/math/sinf_test.cpp b/libc/test/src/math/sinf_test.cpp index 64d932ee22f1..8fa43da08a44 100644 --- a/libc/test/src/math/sinf_test.cpp +++ b/libc/test/src/math/sinf_test.cpp @@ -18,7 +18,7 @@ #include #include -using __llvm_libc::testing::sdcomp26094Values; +using __llvm_libc::testing::SDCOMP26094_VALUES; using FPBits = __llvm_libc::fputil::FPBits; namespace mpfr = __llvm_libc::testing::mpfr; @@ -47,9 +47,9 @@ TEST(LlvmLibcSinfTest, SpecialNumbers) { } TEST(LlvmLibcSinfTest, InFloatRange) { - constexpr uint32_t count = 1000000; - constexpr uint32_t step = UINT32_MAX / count; - for (uint32_t i = 0, v = 0; i <= count; ++i, v += step) { + constexpr uint32_t COUNT = 1000000; + constexpr uint32_t STEP = UINT32_MAX / COUNT; + for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { float x = float(FPBits(v)); if (isnan(x) || isinf(x)) continue; @@ -78,7 +78,7 @@ TEST(LlvmLibcSinfTest, SmallValues) { // SDCOMP-26094: check sinf in the cases for which the range reducer // returns values furthest beyond its nominal upper bound of pi/4. TEST(LlvmLibcSinfTest, SDCOMP_26094) { - for (uint32_t v : sdcomp26094Values) { + for (uint32_t v : SDCOMP26094_VALUES) { float x = float(FPBits((v))); EXPECT_MPFR_MATCH(mpfr::Operation::Sin, x, __llvm_libc::sinf(x), 1.0); } diff --git a/libc/test/src/math/tan_test.cpp b/libc/test/src/math/tan_test.cpp index de3d6f16b76e..750463148bc0 100644 --- a/libc/test/src/math/tan_test.cpp +++ b/libc/test/src/math/tan_test.cpp @@ -19,9 +19,9 @@ DECLARE_SPECIAL_CONSTANTS(double) TEST(LlvmLibctanTest, Range) { static constexpr double _2pi = 6.283185307179586; - constexpr UIntType count = 10000000; - constexpr UIntType step = UIntType(-1) / count; - for (UIntType i = 0, v = 0; i <= count; ++i, v += step) { + constexpr UIntType COUNT = 10000000; + constexpr UIntType STEP = UIntType(-1) / COUNT; + for (UIntType i = 0, v = 0; i <= COUNT; ++i, v += STEP) { double x = double(FPBits(v)); // TODO: Expand the range of testing after range reduction is implemented. if (isnan(x) || isinf(x) || x > _2pi || x < -_2pi) diff --git a/libc/test/src/signal/sigprocmask_test.cpp b/libc/test/src/signal/sigprocmask_test.cpp index e6463627ea9e..8fe3f03b6cc8 100644 --- a/libc/test/src/signal/sigprocmask_test.cpp +++ b/libc/test/src/signal/sigprocmask_test.cpp @@ -21,9 +21,9 @@ class LlvmLibcSignalTest : public __llvm_libc::testing::Test { sigset_t oldSet; public: - void SetUp() override { __llvm_libc::sigprocmask(0, nullptr, &oldSet); } + void set_up() override { __llvm_libc::sigprocmask(0, nullptr, &oldSet); } - void TearDown() override { + void tear_down() override { __llvm_libc::sigprocmask(SIG_SETMASK, &oldSet, nullptr); } }; diff --git a/libc/test/src/stdlib/atof_test.cpp b/libc/test/src/stdlib/atof_test.cpp index a5379b2ee666..620308c37d60 100644 --- a/libc/test/src/stdlib/atof_test.cpp +++ b/libc/test/src/stdlib/atof_test.cpp @@ -18,35 +18,35 @@ // This is just a simple test to make sure that this function works at all. It's // functionally identical to strtod so the bulk of the testing is there. TEST(LlvmLibcAToFTest, SimpleTest) { - __llvm_libc::fputil::FPBits expectedFP = + __llvm_libc::fputil::FPBits expected_fp = __llvm_libc::fputil::FPBits(uint64_t(0x405ec00000000000)); errno = 0; double result = __llvm_libc::atof("123"); - __llvm_libc::fputil::FPBits actualFP = + __llvm_libc::fputil::FPBits actual_fp = __llvm_libc::fputil::FPBits(result); - EXPECT_EQ(actualFP.bits, expectedFP.bits); - EXPECT_EQ(actualFP.get_sign(), expectedFP.get_sign()); - EXPECT_EQ(actualFP.get_exponent(), expectedFP.get_exponent()); - EXPECT_EQ(actualFP.get_mantissa(), expectedFP.get_mantissa()); + EXPECT_EQ(actual_fp.bits, expected_fp.bits); + EXPECT_EQ(actual_fp.get_sign(), expected_fp.get_sign()); + EXPECT_EQ(actual_fp.get_exponent(), expected_fp.get_exponent()); + EXPECT_EQ(actual_fp.get_mantissa(), expected_fp.get_mantissa()); EXPECT_EQ(errno, 0); } TEST(LlvmLibcAToFTest, FailedParsingTest) { - __llvm_libc::fputil::FPBits expectedFP = + __llvm_libc::fputil::FPBits expected_fp = __llvm_libc::fputil::FPBits(uint64_t(0)); errno = 0; double result = __llvm_libc::atof("???"); - __llvm_libc::fputil::FPBits actualFP = + __llvm_libc::fputil::FPBits actual_fp = __llvm_libc::fputil::FPBits(result); - EXPECT_EQ(actualFP.bits, expectedFP.bits); - EXPECT_EQ(actualFP.get_sign(), expectedFP.get_sign()); - EXPECT_EQ(actualFP.get_exponent(), expectedFP.get_exponent()); - EXPECT_EQ(actualFP.get_mantissa(), expectedFP.get_mantissa()); + EXPECT_EQ(actual_fp.bits, expected_fp.bits); + EXPECT_EQ(actual_fp.get_sign(), expected_fp.get_sign()); + EXPECT_EQ(actual_fp.get_exponent(), expected_fp.get_exponent()); + EXPECT_EQ(actual_fp.get_mantissa(), expected_fp.get_mantissa()); EXPECT_EQ(errno, 0); } diff --git a/libc/test/src/stdlib/bsearch_test.cpp b/libc/test/src/stdlib/bsearch_test.cpp index 6076e47f9ab9..b8bd319f051c 100644 --- a/libc/test/src/stdlib/bsearch_test.cpp +++ b/libc/test/src/stdlib/bsearch_test.cpp @@ -35,44 +35,44 @@ TEST(LlvmLibcBsearchTest, ErrorInputs) { } TEST(LlvmLibcBsearchTest, IntegerArray) { - constexpr int array[25] = {10, 23, 33, 35, 55, 70, 71, + constexpr int ARRAY[25] = {10, 23, 33, 35, 55, 70, 71, 100, 110, 123, 133, 135, 155, 170, 171, 1100, 1110, 1123, 1133, 1135, 1155, 1170, 1171, 11100, 12310}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(ARRAY) / sizeof(int); - for (size_t s = 1; s <= array_size; ++s) { + for (size_t s = 1; s <= ARRAY_SIZE; ++s) { for (size_t i = 0; i < s; ++i) { - int key = array[i]; + int key = ARRAY[i]; void *elem = - __llvm_libc::bsearch(&key, array, s, sizeof(int), int_compare); + __llvm_libc::bsearch(&key, ARRAY, s, sizeof(int), int_compare); ASSERT_EQ(*reinterpret_cast(elem), key); } } // Non existent keys - for (size_t s = 1; s <= array_size; ++s) { + for (size_t s = 1; s <= ARRAY_SIZE; ++s) { int key = 5; - ASSERT_TRUE(__llvm_libc::bsearch(&key, &array, s, sizeof(int), + ASSERT_TRUE(__llvm_libc::bsearch(&key, &ARRAY, s, sizeof(int), int_compare) == nullptr); key = 125; - ASSERT_TRUE(__llvm_libc::bsearch(&key, &array, s, sizeof(int), + ASSERT_TRUE(__llvm_libc::bsearch(&key, &ARRAY, s, sizeof(int), int_compare) == nullptr); key = 136; - ASSERT_TRUE(__llvm_libc::bsearch(&key, &array, s, sizeof(int), + ASSERT_TRUE(__llvm_libc::bsearch(&key, &ARRAY, s, sizeof(int), int_compare) == nullptr); key = 12345; - ASSERT_TRUE(__llvm_libc::bsearch(&key, &array, s, sizeof(int), + ASSERT_TRUE(__llvm_libc::bsearch(&key, &ARRAY, s, sizeof(int), int_compare) == nullptr); } } TEST(LlvmLibcBsearchTest, SameKeyAndArray) { - constexpr int array[5] = {1, 2, 3, 4, 5}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr int ARRAY[5] = {1, 2, 3, 4, 5}; + constexpr size_t ARRAY_SIZE = sizeof(ARRAY) / sizeof(int); void *elem = - __llvm_libc::bsearch(array, array, array_size, sizeof(int), int_compare); - EXPECT_EQ(*reinterpret_cast(elem), array[0]); + __llvm_libc::bsearch(ARRAY, ARRAY, ARRAY_SIZE, sizeof(int), int_compare); + EXPECT_EQ(*reinterpret_cast(elem), ARRAY[0]); } diff --git a/libc/test/src/stdlib/qsort_test.cpp b/libc/test/src/stdlib/qsort_test.cpp index 8de8beee598d..99ccc3fa6046 100644 --- a/libc/test/src/stdlib/qsort_test.cpp +++ b/libc/test/src/stdlib/qsort_test.cpp @@ -27,9 +27,9 @@ TEST(LlvmLibcQsortTest, SortedArray) { int array[25] = {10, 23, 33, 35, 55, 70, 71, 100, 110, 123, 133, 135, 155, 170, 171, 1100, 1110, 1123, 1133, 1135, 1155, 1170, 1171, 11100, 12310}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 10); ASSERT_LE(array[1], 23); @@ -61,11 +61,11 @@ TEST(LlvmLibcQsortTest, SortedArray) { TEST(LlvmLibcQsortTest, ReverseSortedArray) { int array[25] = {25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); - for (int i = 0; i < int(array_size - 1); ++i) + for (int i = 0; i < int(ARRAY_SIZE - 1); ++i) ASSERT_LE(array[i], i + 1); } @@ -73,20 +73,20 @@ TEST(LlvmLibcQsortTest, AllEqualElements) { int array[25] = {100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); - for (size_t i = 0; i < array_size - 1; ++i) + for (size_t i = 0; i < ARRAY_SIZE - 1; ++i) ASSERT_LE(array[i], 100); } TEST(LlvmLibcQsortTest, UnsortedArray1) { int array[25] = {10, 23, 8, 35, 55, 45, 40, 100, 110, 123, 90, 80, 70, 60, 171, 11, 1, -1, -5, -10, 1155, 1170, 1171, 12, -100}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], -100); ASSERT_LE(array[1], -10); @@ -117,9 +117,9 @@ TEST(LlvmLibcQsortTest, UnsortedArray1) { TEST(LlvmLibcQsortTest, UnsortedArray2) { int array[7] = {10, 40, 45, 55, 35, 23, 60}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 10); ASSERT_LE(array[1], 23); @@ -132,9 +132,9 @@ TEST(LlvmLibcQsortTest, UnsortedArray2) { TEST(LlvmLibcQsortTest, UnsortedArrayDuplicateElements1) { int array[6] = {10, 10, 20, 20, 5, 5}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 5); ASSERT_LE(array[1], 5); @@ -146,9 +146,9 @@ TEST(LlvmLibcQsortTest, UnsortedArrayDuplicateElements1) { TEST(LlvmLibcQsortTest, UnsortedArrayDuplicateElements2) { int array[10] = {20, 10, 10, 10, 10, 20, 21, 21, 21, 21}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 10); ASSERT_LE(array[1], 10); @@ -164,9 +164,9 @@ TEST(LlvmLibcQsortTest, UnsortedArrayDuplicateElements2) { TEST(LlvmLibcQsortTest, UnsortedArrayDuplicateElements3) { int array[10] = {20, 30, 30, 30, 30, 20, 21, 21, 21, 21}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 20); ASSERT_LE(array[1], 20); @@ -182,9 +182,9 @@ TEST(LlvmLibcQsortTest, UnsortedArrayDuplicateElements3) { TEST(LlvmLibcQsortTest, UnsortedThreeElementArray1) { int array[3] = {14999024, 0, 3}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 0); ASSERT_LE(array[1], 3); @@ -193,9 +193,9 @@ TEST(LlvmLibcQsortTest, UnsortedThreeElementArray1) { TEST(LlvmLibcQsortTest, UnsortedThreeElementArray2) { int array[3] = {3, 14999024, 0}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 0); ASSERT_LE(array[1], 3); @@ -204,9 +204,9 @@ TEST(LlvmLibcQsortTest, UnsortedThreeElementArray2) { TEST(LlvmLibcQsortTest, UnsortedThreeElementArray3) { int array[3] = {3, 0, 14999024}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 0); ASSERT_LE(array[1], 3); @@ -215,9 +215,9 @@ TEST(LlvmLibcQsortTest, UnsortedThreeElementArray3) { TEST(LlvmLibcQsortTest, SameElementThreeElementArray) { int array[3] = {12345, 12345, 12345}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 12345); ASSERT_LE(array[1], 12345); @@ -226,9 +226,9 @@ TEST(LlvmLibcQsortTest, SameElementThreeElementArray) { TEST(LlvmLibcQsortTest, UnsortedTwoElementArray1) { int array[2] = {14999024, 0}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 0); ASSERT_LE(array[1], 14999024); @@ -236,9 +236,9 @@ TEST(LlvmLibcQsortTest, UnsortedTwoElementArray1) { TEST(LlvmLibcQsortTest, UnsortedTwoElementArray2) { int array[2] = {0, 14999024}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 0); ASSERT_LE(array[1], 14999024); @@ -246,20 +246,20 @@ TEST(LlvmLibcQsortTest, UnsortedTwoElementArray2) { TEST(LlvmLibcQsortTest, SameElementTwoElementArray) { int array[2] = {12345, 12345}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); ASSERT_LE(array[0], 12345); ASSERT_LE(array[1], 12345); } TEST(LlvmLibcQSortTest, SingleElementArray) { - constexpr int elem = 12345; - int array[1] = {elem}; - constexpr size_t array_size = sizeof(array) / sizeof(int); + constexpr int ELEM = 12345; + int array[1] = {ELEM}; + constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - __llvm_libc::qsort(array, array_size, sizeof(int), int_compare); + __llvm_libc::qsort(array, ARRAY_SIZE, sizeof(int), int_compare); - ASSERT_LE(array[0], elem); + ASSERT_LE(array[0], ELEM); } diff --git a/libc/test/src/stdlib/strtod_test.cpp b/libc/test/src/stdlib/strtod_test.cpp index 154cf1a33e7e..eee11231c044 100644 --- a/libc/test/src/stdlib/strtod_test.cpp +++ b/libc/test/src/stdlib/strtod_test.cpp @@ -17,8 +17,8 @@ class LlvmLibcStrToDTest : public __llvm_libc::testing::Test { public: - void runTest(const char *inputString, const ptrdiff_t expectedStrLen, - const uint64_t expectedRawData, const int expectedErrno = 0) { + void run_test(const char *inputString, const ptrdiff_t expectedStrLen, + const uint64_t expectedRawData, const int expectedErrno = 0) { // expectedRawData is the expected double result as a uint64_t, organized // according to IEEE754: // @@ -33,58 +33,59 @@ class LlvmLibcStrToDTest : public __llvm_libc::testing::Test { // +-- 11 Exponent Bits // // This is so that the result can be compared in parts. - char *strEnd = nullptr; + char *str_end = nullptr; - __llvm_libc::fputil::FPBits expectedFP = + __llvm_libc::fputil::FPBits expected_fp = __llvm_libc::fputil::FPBits(expectedRawData); errno = 0; - double result = __llvm_libc::strtod(inputString, &strEnd); + double result = __llvm_libc::strtod(inputString, &str_end); - __llvm_libc::fputil::FPBits actualFP = + __llvm_libc::fputil::FPBits actual_fp = __llvm_libc::fputil::FPBits(result); - EXPECT_EQ(strEnd - inputString, expectedStrLen); + EXPECT_EQ(str_end - inputString, expectedStrLen); - EXPECT_EQ(actualFP.bits, expectedFP.bits); - EXPECT_EQ(actualFP.get_sign(), expectedFP.get_sign()); - EXPECT_EQ(actualFP.get_exponent(), expectedFP.get_exponent()); - EXPECT_EQ(actualFP.get_mantissa(), expectedFP.get_mantissa()); + EXPECT_EQ(actual_fp.bits, expected_fp.bits); + EXPECT_EQ(actual_fp.get_sign(), expected_fp.get_sign()); + EXPECT_EQ(actual_fp.get_exponent(), expected_fp.get_exponent()); + EXPECT_EQ(actual_fp.get_mantissa(), expected_fp.get_mantissa()); EXPECT_EQ(errno, expectedErrno); } }; TEST_F(LlvmLibcStrToDTest, SimpleTest) { - runTest("123", 3, uint64_t(0x405ec00000000000)); + run_test("123", 3, uint64_t(0x405ec00000000000)); // This should fail on Eisel-Lemire, forcing a fallback to simple decimal // conversion. - runTest("12345678901234549760", 20, uint64_t(0x43e56a95319d63d8)); + run_test("12345678901234549760", 20, uint64_t(0x43e56a95319d63d8)); // Found while looking for difficult test cases here: // https://github.com/nigeltao/parse-number-fxx-test-data/blob/main/more-test-cases/golang-org-issue-36657.txt - runTest("1090544144181609348835077142190", 31, uint64_t(0x462b8779f2474dfb)); + run_test("1090544144181609348835077142190", 31, uint64_t(0x462b8779f2474dfb)); - runTest("0x123", 5, uint64_t(0x4072300000000000)); + run_test("0x123", 5, uint64_t(0x4072300000000000)); } // These are tests that have caused problems in the past. TEST_F(LlvmLibcStrToDTest, SpecificFailures) { - runTest("3E70000000000000", 16, uint64_t(0x7FF0000000000000), ERANGE); - runTest("358416272e-33", 13, uint64_t(0x3adbbb2a68c9d0b9)); - runTest("2.16656806400000023841857910156251e9", 36, - uint64_t(0x41e0246690000001)); - runTest("27949676547093071875", 20, uint64_t(0x43f83e132bc608c9)); + run_test("3E70000000000000", 16, uint64_t(0x7FF0000000000000), ERANGE); + run_test("358416272e-33", 13, uint64_t(0x3adbbb2a68c9d0b9)); + run_test("2.16656806400000023841857910156251e9", 36, + uint64_t(0x41e0246690000001)); + run_test("27949676547093071875", 20, uint64_t(0x43f83e132bc608c9)); } TEST_F(LlvmLibcStrToDTest, FuzzFailures) { - runTest("-\xff\xff\xff\xff\xff\xff\xff\x01", 0, uint64_t(0)); - runTest("-.????", 0, uint64_t(0)); - runTest("44444444444444444444444444444444444444444444444444A44444444444444444" - "44444444444*\x99\xff\xff\xff\xff", - 50, uint64_t(0x4a3e68fdd0e0b2d8)); - runTest("-NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNKNNNNNNNNNNNNNNNNNN?" - "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN?", - 0, uint64_t(0)); - runTest("0x.666E40", 9, uint64_t(0x3fd99b9000000000)); + run_test("-\xff\xff\xff\xff\xff\xff\xff\x01", 0, uint64_t(0)); + run_test("-.????", 0, uint64_t(0)); + run_test( + "44444444444444444444444444444444444444444444444444A44444444444444444" + "44444444444*\x99\xff\xff\xff\xff", + 50, uint64_t(0x4a3e68fdd0e0b2d8)); + run_test("-NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNKNNNNNNNNNNNNNNNNNN?" + "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN?", + 0, uint64_t(0)); + run_test("0x.666E40", 9, uint64_t(0x3fd99b9000000000)); } diff --git a/libc/test/src/stdlib/strtof_test.cpp b/libc/test/src/stdlib/strtof_test.cpp index db6c194904db..e47541da47cc 100644 --- a/libc/test/src/stdlib/strtof_test.cpp +++ b/libc/test/src/stdlib/strtof_test.cpp @@ -17,8 +17,8 @@ class LlvmLibcStrToFTest : public __llvm_libc::testing::Test { public: - void runTest(const char *inputString, const ptrdiff_t expectedStrLen, - const uint32_t expectedRawData, const int expectedErrno = 0) { + void run_test(const char *inputString, const ptrdiff_t expectedStrLen, + const uint32_t expectedRawData, const int expectedErrno = 0) { // expectedRawData is the expected float result as a uint32_t, organized // according to IEEE754: // @@ -33,23 +33,23 @@ class LlvmLibcStrToFTest : public __llvm_libc::testing::Test { // +-- 8 Exponent Bits // // This is so that the result can be compared in parts. - char *strEnd = nullptr; + char *str_end = nullptr; - __llvm_libc::fputil::FPBits expectedFP = + __llvm_libc::fputil::FPBits expected_fp = __llvm_libc::fputil::FPBits(expectedRawData); errno = 0; - float result = __llvm_libc::strtof(inputString, &strEnd); + float result = __llvm_libc::strtof(inputString, &str_end); - __llvm_libc::fputil::FPBits actualFP = + __llvm_libc::fputil::FPBits actual_fp = __llvm_libc::fputil::FPBits(result); - EXPECT_EQ(strEnd - inputString, expectedStrLen); + EXPECT_EQ(str_end - inputString, expectedStrLen); - EXPECT_EQ(actualFP.bits, expectedFP.bits); - EXPECT_EQ(actualFP.get_sign(), expectedFP.get_sign()); - EXPECT_EQ(actualFP.get_exponent(), expectedFP.get_exponent()); - EXPECT_EQ(actualFP.get_mantissa(), expectedFP.get_mantissa()); + EXPECT_EQ(actual_fp.bits, expected_fp.bits); + EXPECT_EQ(actual_fp.get_sign(), expected_fp.get_sign()); + EXPECT_EQ(actual_fp.get_exponent(), expected_fp.get_exponent()); + EXPECT_EQ(actual_fp.get_mantissa(), expected_fp.get_mantissa()); EXPECT_EQ(errno, expectedErrno); } }; @@ -59,115 +59,117 @@ class LlvmLibcStrToFTest : public __llvm_libc::testing::Test { // them. TEST_F(LlvmLibcStrToFTest, BasicDecimalTests) { - runTest("1", 1, 0x3f800000); - runTest("123", 3, 0x42f60000); - runTest("1234567890", 10, 0x4e932c06u); - runTest("123456789012345678901", 21, 0x60d629d4); - runTest("0.1", 3, 0x3dcccccdu); - runTest(".1", 2, 0x3dcccccdu); - runTest("-0.123456789", 12, 0xbdfcd6eau); - runTest("0.11111111111111111111", 22, 0x3de38e39u); - runTest("0.0000000000000000000000001", 27, 0x15f79688u); + run_test("1", 1, 0x3f800000); + run_test("123", 3, 0x42f60000); + run_test("1234567890", 10, 0x4e932c06u); + run_test("123456789012345678901", 21, 0x60d629d4); + run_test("0.1", 3, 0x3dcccccdu); + run_test(".1", 2, 0x3dcccccdu); + run_test("-0.123456789", 12, 0xbdfcd6eau); + run_test("0.11111111111111111111", 22, 0x3de38e39u); + run_test("0.0000000000000000000000001", 27, 0x15f79688u); } TEST_F(LlvmLibcStrToFTest, DecimalOutOfRangeTests) { - runTest("555E36", 6, 0x7f800000, ERANGE); - runTest("1e-10000", 8, 0x0, ERANGE); + run_test("555E36", 6, 0x7f800000, ERANGE); + run_test("1e-10000", 8, 0x0, ERANGE); } TEST_F(LlvmLibcStrToFTest, DecimalsWithRoundingProblems) { - runTest("20040229", 8, 0x4b98e512); - runTest("20040401", 8, 0x4b98e568); - runTest("9E9", 3, 0x50061c46); + run_test("20040229", 8, 0x4b98e512); + run_test("20040401", 8, 0x4b98e568); + run_test("9E9", 3, 0x50061c46); } TEST_F(LlvmLibcStrToFTest, DecimalSubnormals) { - runTest("1.4012984643248170709237295832899161312802619418765e-45", 55, 0x1, - ERANGE); + run_test("1.4012984643248170709237295832899161312802619418765e-45", 55, 0x1, + ERANGE); } TEST_F(LlvmLibcStrToFTest, DecimalWithLongExponent) { - runTest("1e2147483648", 12, 0x7f800000, ERANGE); - runTest("1e2147483646", 12, 0x7f800000, ERANGE); - runTest("100e2147483646", 14, 0x7f800000, ERANGE); - runTest("1e-2147483647", 13, 0x0, ERANGE); - runTest("1e-2147483649", 13, 0x0, ERANGE); + run_test("1e2147483648", 12, 0x7f800000, ERANGE); + run_test("1e2147483646", 12, 0x7f800000, ERANGE); + run_test("100e2147483646", 14, 0x7f800000, ERANGE); + run_test("1e-2147483647", 13, 0x0, ERANGE); + run_test("1e-2147483649", 13, 0x0, ERANGE); } TEST_F(LlvmLibcStrToFTest, BasicHexadecimalTests) { - runTest("0x1", 3, 0x3f800000); - runTest("0x10", 4, 0x41800000); - runTest("0x11", 4, 0x41880000); - runTest("0x0.1234", 8, 0x3d91a000); + run_test("0x1", 3, 0x3f800000); + run_test("0x10", 4, 0x41800000); + run_test("0x11", 4, 0x41880000); + run_test("0x0.1234", 8, 0x3d91a000); } TEST_F(LlvmLibcStrToFTest, HexadecimalSubnormalTests) { - runTest("0x0.0000000000000000000000000000000002", 38, 0x4000, ERANGE); + run_test("0x0.0000000000000000000000000000000002", 38, 0x4000, ERANGE); // This is the largest subnormal number as represented in hex - runTest("0x0.00000000000000000000000000000003fffff8", 42, 0x7fffff, ERANGE); + run_test("0x0.00000000000000000000000000000003fffff8", 42, 0x7fffff, ERANGE); } TEST_F(LlvmLibcStrToFTest, HexadecimalSubnormalRoundingTests) { // This is the largest subnormal number that gets rounded down to 0 (as a // float) - runTest("0x0.00000000000000000000000000000000000004", 42, 0x0, ERANGE); + run_test("0x0.00000000000000000000000000000000000004", 42, 0x0, ERANGE); // This is slightly larger, and thus rounded up - runTest("0x0.000000000000000000000000000000000000041", 43, 0x00000001, - ERANGE); + run_test("0x0.000000000000000000000000000000000000041", 43, 0x00000001, + ERANGE); // These check that we're rounding to even properly - runTest("0x0.0000000000000000000000000000000000000b", 42, 0x00000001, ERANGE); - runTest("0x0.0000000000000000000000000000000000000c", 42, 0x00000002, ERANGE); + run_test("0x0.0000000000000000000000000000000000000b", 42, 0x00000001, + ERANGE); + run_test("0x0.0000000000000000000000000000000000000c", 42, 0x00000002, + ERANGE); // These check that we're rounding to even properly even when the input bits // are longer than the bit fields can contain. - runTest("0x1.000000000000000000000p-150", 30, 0x00000000, ERANGE); - runTest("0x1.000010000000000001000p-150", 30, 0x00000001, ERANGE); - runTest("0x1.000100000000000001000p-134", 30, 0x00008001, ERANGE); - runTest("0x1.FFFFFC000000000001000p-127", 30, 0x007FFFFF, ERANGE); - runTest("0x1.FFFFFE000000000000000p-127", 30, 0x00800000); + run_test("0x1.000000000000000000000p-150", 30, 0x00000000, ERANGE); + run_test("0x1.000010000000000001000p-150", 30, 0x00000001, ERANGE); + run_test("0x1.000100000000000001000p-134", 30, 0x00008001, ERANGE); + run_test("0x1.FFFFFC000000000001000p-127", 30, 0x007FFFFF, ERANGE); + run_test("0x1.FFFFFE000000000000000p-127", 30, 0x00800000); } TEST_F(LlvmLibcStrToFTest, HexadecimalNormalRoundingTests) { // This also checks the round to even behavior by checking three adjacent // numbers. // This gets rounded down to even - runTest("0x123456500", 11, 0x4f91a2b2); + run_test("0x123456500", 11, 0x4f91a2b2); // This doesn't get rounded at all - runTest("0x123456600", 11, 0x4f91a2b3); + run_test("0x123456600", 11, 0x4f91a2b3); // This gets rounded up to even - runTest("0x123456700", 11, 0x4f91a2b4); + run_test("0x123456700", 11, 0x4f91a2b4); // Correct rounding for long input - runTest("0x1.000001000000000000000", 25, 0x3f800000); - runTest("0x1.000001000000000000100", 25, 0x3f800001); + run_test("0x1.000001000000000000000", 25, 0x3f800000); + run_test("0x1.000001000000000000100", 25, 0x3f800001); } TEST_F(LlvmLibcStrToFTest, HexadecimalsWithRoundingProblems) { - runTest("0xFFFFFFFF", 10, 0x4f800000); + run_test("0xFFFFFFFF", 10, 0x4f800000); } TEST_F(LlvmLibcStrToFTest, HexadecimalOutOfRangeTests) { - runTest("0x123456789123456789123456789123456789", 38, 0x7f800000, ERANGE); - runTest("-0x123456789123456789123456789123456789", 39, 0xff800000, ERANGE); - runTest("0x0.00000000000000000000000000000000000001", 42, 0x0, ERANGE); + run_test("0x123456789123456789123456789123456789", 38, 0x7f800000, ERANGE); + run_test("-0x123456789123456789123456789123456789", 39, 0xff800000, ERANGE); + run_test("0x0.00000000000000000000000000000000000001", 42, 0x0, ERANGE); } TEST_F(LlvmLibcStrToFTest, InfTests) { - runTest("INF", 3, 0x7f800000); - runTest("INFinity", 8, 0x7f800000); - runTest("infnity", 3, 0x7f800000); - runTest("infinit", 3, 0x7f800000); - runTest("infinfinit", 3, 0x7f800000); - runTest("innf", 0, 0x0); - runTest("-inf", 4, 0xff800000); - runTest("-iNfInItY", 9, 0xff800000); + run_test("INF", 3, 0x7f800000); + run_test("INFinity", 8, 0x7f800000); + run_test("infnity", 3, 0x7f800000); + run_test("infinit", 3, 0x7f800000); + run_test("infinfinit", 3, 0x7f800000); + run_test("innf", 0, 0x0); + run_test("-inf", 4, 0xff800000); + run_test("-iNfInItY", 9, 0xff800000); } TEST_F(LlvmLibcStrToFTest, SimpleNaNTests) { - runTest("NaN", 3, 0x7fc00000); - runTest("-nAn", 4, 0xffc00000); + run_test("NaN", 3, 0x7fc00000); + run_test("-nAn", 4, 0xffc00000); } // These NaNs are of the form `NaN(n-character-sequence)` where the @@ -178,26 +180,26 @@ TEST_F(LlvmLibcStrToFTest, SimpleNaNTests) { // the result is put into the mantissa if it takes up the whole width of the // parentheses. TEST_F(LlvmLibcStrToFTest, NaNWithParenthesesEmptyTest) { - runTest("NaN()", 5, 0x7fc00000); + run_test("NaN()", 5, 0x7fc00000); } TEST_F(LlvmLibcStrToFTest, NaNWithParenthesesValidNumberTests) { - runTest("NaN(1234)", 9, 0x7fc004d2); - runTest("NaN(0x1234)", 11, 0x7fc01234); - runTest("NaN(01234)", 10, 0x7fc0029c); + run_test("NaN(1234)", 9, 0x7fc004d2); + run_test("NaN(0x1234)", 11, 0x7fc01234); + run_test("NaN(01234)", 10, 0x7fc0029c); } TEST_F(LlvmLibcStrToFTest, NaNWithParenthesesInvalidSequenceTests) { - runTest("NaN( 1234)", 3, 0x7fc00000); - runTest("NaN(-1234)", 3, 0x7fc00000); - runTest("NaN(asd&f)", 3, 0x7fc00000); - runTest("NaN(123 )", 3, 0x7fc00000); - runTest("NaN(123+asdf)", 3, 0x7fc00000); - runTest("NaN(123", 3, 0x7fc00000); + run_test("NaN( 1234)", 3, 0x7fc00000); + run_test("NaN(-1234)", 3, 0x7fc00000); + run_test("NaN(asd&f)", 3, 0x7fc00000); + run_test("NaN(123 )", 3, 0x7fc00000); + run_test("NaN(123+asdf)", 3, 0x7fc00000); + run_test("NaN(123", 3, 0x7fc00000); } TEST_F(LlvmLibcStrToFTest, NaNWithParenthesesValidSequenceInvalidNumberTests) { - runTest("NaN(1a)", 7, 0x7fc00000); - runTest("NaN(asdf)", 9, 0x7fc00000); - runTest("NaN(1A1)", 8, 0x7fc00000); + run_test("NaN(1a)", 7, 0x7fc00000); + run_test("NaN(asdf)", 9, 0x7fc00000); + run_test("NaN(1A1)", 8, 0x7fc00000); } diff --git a/libc/test/src/stdlib/strtold_test.cpp b/libc/test/src/stdlib/strtold_test.cpp index c1d41bc302f5..0aa8f45e623d 100644 --- a/libc/test/src/stdlib/strtold_test.cpp +++ b/libc/test/src/stdlib/strtold_test.cpp @@ -17,12 +17,12 @@ class LlvmLibcStrToLDTest : public __llvm_libc::testing::Test { public: - void runTest(const char *inputString, const ptrdiff_t expectedStrLen, - const uint64_t expectedRawData64, - const __uint128_t expectedRawData80, - const __uint128_t expectedRawData128, - const int expectedErrno64 = 0, const int expectedErrno80 = 0, - const int expectedErrno128 = 0) { + void run_test(const char *inputString, const ptrdiff_t expectedStrLen, + const uint64_t expectedRawData64, + const __uint128_t expectedRawData80, + const __uint128_t expectedRawData128, + const int expectedErrno64 = 0, const int expectedErrno80 = 0, + const int expectedErrno128 = 0) { // expectedRawData64 is the expected long double result as a uint64_t, // organized according to the IEEE754 double precision format: // @@ -61,16 +61,16 @@ class LlvmLibcStrToLDTest : public __llvm_libc::testing::Test { // +------+------+ // | // +-- 15 Exponent Bits - char *strEnd = nullptr; + char *str_end = nullptr; #if defined(LONG_DOUBLE_IS_DOUBLE) __llvm_libc::fputil::FPBits expectedFP = __llvm_libc::fputil::FPBits(expectedRawData64); const int expectedErrno = expectedErrno64; #elif defined(SPECIAL_X86_LONG_DOUBLE) - __llvm_libc::fputil::FPBits expectedFP = + __llvm_libc::fputil::FPBits expected_fp = __llvm_libc::fputil::FPBits(expectedRawData80); - const int expectedErrno = expectedErrno80; + const int expected_errno = expectedErrno80; #else __llvm_libc::fputil::FPBits expectedFP = __llvm_libc::fputil::FPBits(expectedRawData128); @@ -78,144 +78,146 @@ class LlvmLibcStrToLDTest : public __llvm_libc::testing::Test { #endif errno = 0; - long double result = __llvm_libc::strtold(inputString, &strEnd); + long double result = __llvm_libc::strtold(inputString, &str_end); - __llvm_libc::fputil::FPBits actualFP = + __llvm_libc::fputil::FPBits actual_fp = __llvm_libc::fputil::FPBits(); - actualFP = __llvm_libc::fputil::FPBits(result); + actual_fp = __llvm_libc::fputil::FPBits(result); - EXPECT_EQ(strEnd - inputString, expectedStrLen); + EXPECT_EQ(str_end - inputString, expectedStrLen); - EXPECT_EQ(actualFP.bits, expectedFP.bits); - EXPECT_EQ(actualFP.get_sign(), expectedFP.get_sign()); - EXPECT_EQ(actualFP.get_exponent(), expectedFP.get_exponent()); - EXPECT_EQ(actualFP.get_mantissa(), expectedFP.get_mantissa()); - EXPECT_EQ(errno, expectedErrno); + EXPECT_EQ(actual_fp.bits, expected_fp.bits); + EXPECT_EQ(actual_fp.get_sign(), expected_fp.get_sign()); + EXPECT_EQ(actual_fp.get_exponent(), expected_fp.get_exponent()); + EXPECT_EQ(actual_fp.get_mantissa(), expected_fp.get_mantissa()); + EXPECT_EQ(errno, expected_errno); } }; TEST_F(LlvmLibcStrToLDTest, SimpleTest) { - runTest("123", 3, uint64_t(0x405ec00000000000), - __uint128_t(0x4005f60000) << 40, - __uint128_t(0x4005ec0000000000) << 64); + run_test("123", 3, uint64_t(0x405ec00000000000), + __uint128_t(0x4005f60000) << 40, + __uint128_t(0x4005ec0000000000) << 64); // This should fail on Eisel-Lemire, forcing a fallback to simple decimal // conversion. - runTest("12345678901234549760", 20, uint64_t(0x43e56a95319d63d8), - (__uint128_t(0x403eab54a9) << 40) + __uint128_t(0x8ceb1ec400), - (__uint128_t(0x403e56a95319d63d) << 64) + - __uint128_t(0x8800000000000000)); + run_test("12345678901234549760", 20, uint64_t(0x43e56a95319d63d8), + (__uint128_t(0x403eab54a9) << 40) + __uint128_t(0x8ceb1ec400), + (__uint128_t(0x403e56a95319d63d) << 64) + + __uint128_t(0x8800000000000000)); // Found while looking for difficult test cases here: // https://github.com/nigeltao/parse-number-fxx-test-data/blob/main/more-test-cases/golang-org-issue-36657.txt - runTest("1090544144181609348835077142190", 31, uint64_t(0x462b8779f2474dfb), - (__uint128_t(0x4062dc3bcf) << 40) + __uint128_t(0x923a6fd402), - (__uint128_t(0x4062b8779f2474df) << 64) + - __uint128_t(0xa804bfd8c6d5c000)); - - runTest("0x123", 5, uint64_t(0x4072300000000000), - (__uint128_t(0x4007918000) << 40), - (__uint128_t(0x4007230000000000) << 64)); + run_test("1090544144181609348835077142190", 31, uint64_t(0x462b8779f2474dfb), + (__uint128_t(0x4062dc3bcf) << 40) + __uint128_t(0x923a6fd402), + (__uint128_t(0x4062b8779f2474df) << 64) + + __uint128_t(0xa804bfd8c6d5c000)); + + run_test("0x123", 5, uint64_t(0x4072300000000000), + (__uint128_t(0x4007918000) << 40), + (__uint128_t(0x4007230000000000) << 64)); } // These are tests that have caused problems for doubles in the past. TEST_F(LlvmLibcStrToLDTest, Float64SpecificFailures) { - runTest("3E70000000000000", 16, uint64_t(0x7FF0000000000000), - (__uint128_t(0x7fff800000) << 40), - (__uint128_t(0x7fff000000000000) << 64), ERANGE, ERANGE, ERANGE); - runTest("358416272e-33", 13, uint64_t(0x3adbbb2a68c9d0b9), - (__uint128_t(0x3fadddd953) << 40) + __uint128_t(0x464e85c400), - (__uint128_t(0x3fadbbb2a68c9d0b) << 64) + - __uint128_t(0x8800e7969e1c5fc8)); - runTest( + run_test("3E70000000000000", 16, uint64_t(0x7FF0000000000000), + (__uint128_t(0x7fff800000) << 40), + (__uint128_t(0x7fff000000000000) << 64), ERANGE, ERANGE, ERANGE); + run_test("358416272e-33", 13, uint64_t(0x3adbbb2a68c9d0b9), + (__uint128_t(0x3fadddd953) << 40) + __uint128_t(0x464e85c400), + (__uint128_t(0x3fadbbb2a68c9d0b) << 64) + + __uint128_t(0x8800e7969e1c5fc8)); + run_test( "2.16656806400000023841857910156251e9", 36, uint64_t(0x41e0246690000001), (__uint128_t(0x401e812334) << 40) + __uint128_t(0x8000000400), (__uint128_t(0x401e024669000000) << 64) + __uint128_t(0x800000000000018)); - runTest("27949676547093071875", 20, uint64_t(0x43f83e132bc608c9), - (__uint128_t(0x403fc1f099) << 40) + __uint128_t(0x5e30464402), - (__uint128_t(0x403f83e132bc608c) << 64) + - __uint128_t(0x8803000000000000)); + run_test("27949676547093071875", 20, uint64_t(0x43f83e132bc608c9), + (__uint128_t(0x403fc1f099) << 40) + __uint128_t(0x5e30464402), + (__uint128_t(0x403f83e132bc608c) << 64) + + __uint128_t(0x8803000000000000)); } TEST_F(LlvmLibcStrToLDTest, MaxSizeNumbers) { - runTest("1.1897314953572317650e4932", 26, uint64_t(0x7FF0000000000000), - (__uint128_t(0x7ffeffffff) << 40) + __uint128_t(0xffffffffff), - (__uint128_t(0x7ffeffffffffffff) << 64) + - __uint128_t(0xfffd57322e3f8675), - ERANGE, 0, 0); - runTest("1.18973149535723176508e4932", 27, uint64_t(0x7FF0000000000000), - (__uint128_t(0x7fff800000) << 40), - (__uint128_t(0x7ffeffffffffffff) << 64) + - __uint128_t(0xffffd2478338036c), - ERANGE, ERANGE, 0); + run_test("1.1897314953572317650e4932", 26, uint64_t(0x7FF0000000000000), + (__uint128_t(0x7ffeffffff) << 40) + __uint128_t(0xffffffffff), + (__uint128_t(0x7ffeffffffffffff) << 64) + + __uint128_t(0xfffd57322e3f8675), + ERANGE, 0, 0); + run_test("1.18973149535723176508e4932", 27, uint64_t(0x7FF0000000000000), + (__uint128_t(0x7fff800000) << 40), + (__uint128_t(0x7ffeffffffffffff) << 64) + + __uint128_t(0xffffd2478338036c), + ERANGE, ERANGE, 0); } // These tests check subnormal behavior for 80 bit and 128 bit floats. They will // be too small for 64 bit floats. TEST_F(LlvmLibcStrToLDTest, SubnormalTests) { - runTest("1e-4950", 7, uint64_t(0), (__uint128_t(0x00000000000000000003)), - (__uint128_t(0x000000000000000000057c9647e1a018)), ERANGE, ERANGE, - ERANGE); - runTest("1.89e-4951", 10, uint64_t(0), (__uint128_t(0x00000000000000000001)), - (__uint128_t(0x0000000000000000000109778a006738)), ERANGE, ERANGE, - ERANGE); - runTest("4e-4966", 7, uint64_t(0), (__uint128_t(0)), - (__uint128_t(0x00000000000000000000000000000001)), ERANGE, ERANGE, - ERANGE); + run_test("1e-4950", 7, uint64_t(0), (__uint128_t(0x00000000000000000003)), + (__uint128_t(0x000000000000000000057c9647e1a018)), ERANGE, ERANGE, + ERANGE); + run_test("1.89e-4951", 10, uint64_t(0), (__uint128_t(0x00000000000000000001)), + (__uint128_t(0x0000000000000000000109778a006738)), ERANGE, ERANGE, + ERANGE); + run_test("4e-4966", 7, uint64_t(0), (__uint128_t(0)), + (__uint128_t(0x00000000000000000000000000000001)), ERANGE, ERANGE, + ERANGE); } TEST_F(LlvmLibcStrToLDTest, SmallNormalTests) { - runTest("3.37e-4932", 10, uint64_t(0), - (__uint128_t(0x1804cf7) << 40) + __uint128_t(0x908850712), - (__uint128_t(0x10099ee12110a) << 64) + __uint128_t(0xe24b75c0f50dc0c), - ERANGE, 0, 0); + run_test("3.37e-4932", 10, uint64_t(0), + (__uint128_t(0x1804cf7) << 40) + __uint128_t(0x908850712), + (__uint128_t(0x10099ee12110a) << 64) + + __uint128_t(0xe24b75c0f50dc0c), + ERANGE, 0, 0); } TEST_F(LlvmLibcStrToLDTest, ComplexHexadecimalTests) { - runTest("0x1p16383", 9, 0x7ff0000000000000, (__uint128_t(0x7ffe800000) << 40), - (__uint128_t(0x7ffe000000000000) << 64)); - runTest("0x123456789abcdef", 17, 0x43723456789abcdf, - (__uint128_t(0x403791a2b3) << 40) + __uint128_t(0xc4d5e6f780), - (__uint128_t(0x403723456789abcd) << 64) + - __uint128_t(0xef00000000000000)); - runTest("0x123456789abcdef0123456789ABCDEF", 33, 0x7ff0000000000000, - (__uint128_t(0x407791a2b3) << 40) + __uint128_t(0xc4d5e6f781), - (__uint128_t(0x407723456789abcd) << 64) + - __uint128_t(0xef0123456789abce)); + run_test("0x1p16383", 9, 0x7ff0000000000000, + (__uint128_t(0x7ffe800000) << 40), + (__uint128_t(0x7ffe000000000000) << 64)); + run_test("0x123456789abcdef", 17, 0x43723456789abcdf, + (__uint128_t(0x403791a2b3) << 40) + __uint128_t(0xc4d5e6f780), + (__uint128_t(0x403723456789abcd) << 64) + + __uint128_t(0xef00000000000000)); + run_test("0x123456789abcdef0123456789ABCDEF", 33, 0x7ff0000000000000, + (__uint128_t(0x407791a2b3) << 40) + __uint128_t(0xc4d5e6f781), + (__uint128_t(0x407723456789abcd) << 64) + + __uint128_t(0xef0123456789abce)); } TEST_F(LlvmLibcStrToLDTest, InfTests) { - runTest("INF", 3, 0x7ff0000000000000, (__uint128_t(0x7fff800000) << 40), - (__uint128_t(0x7fff000000000000) << 64)); - runTest("INFinity", 8, 0x7ff0000000000000, (__uint128_t(0x7fff800000) << 40), - (__uint128_t(0x7fff000000000000) << 64)); - runTest("-inf", 4, 0xfff0000000000000, (__uint128_t(0xffff800000) << 40), - (__uint128_t(0xffff000000000000) << 64)); + run_test("INF", 3, 0x7ff0000000000000, (__uint128_t(0x7fff800000) << 40), + (__uint128_t(0x7fff000000000000) << 64)); + run_test("INFinity", 8, 0x7ff0000000000000, (__uint128_t(0x7fff800000) << 40), + (__uint128_t(0x7fff000000000000) << 64)); + run_test("-inf", 4, 0xfff0000000000000, (__uint128_t(0xffff800000) << 40), + (__uint128_t(0xffff000000000000) << 64)); } TEST_F(LlvmLibcStrToLDTest, NaNTests) { - runTest("NaN", 3, 0x7ff8000000000000, (__uint128_t(0x7fffc00000) << 40), - (__uint128_t(0x7fff800000000000) << 64)); - runTest("-nAn", 4, 0xfff8000000000000, (__uint128_t(0xffffc00000) << 40), - (__uint128_t(0xffff800000000000) << 64)); - runTest("NaN()", 5, 0x7ff8000000000000, (__uint128_t(0x7fffc00000) << 40), - (__uint128_t(0x7fff800000000000) << 64)); - runTest("NaN(1234)", 9, 0x7ff80000000004d2, - (__uint128_t(0x7fffc00000) << 40) + __uint128_t(0x4d2), - (__uint128_t(0x7fff800000000000) << 64) + __uint128_t(0x4d2)); - runTest("NaN(0xffffffffffff)", 19, 0x7ff8ffffffffffff, - (__uint128_t(0x7fffc000ff) << 40) + __uint128_t(0xffffffffff), - (__uint128_t(0x7fff800000000000) << 64) + - __uint128_t(0xffffffffffff)); - runTest("NaN(0xfffffffffffff)", 20, 0x7fffffffffffffff, - (__uint128_t(0x7fffc00fff) << 40) + __uint128_t(0xffffffffff), - (__uint128_t(0x7fff800000000000) << 64) + - __uint128_t(0xfffffffffffff)); - runTest("NaN(0xffffffffffffffff)", 23, 0x7fffffffffffffff, - (__uint128_t(0x7fffffffff) << 40) + __uint128_t(0xffffffffff), - (__uint128_t(0x7fff800000000000) << 64) + - __uint128_t(0xffffffffffffffff)); - runTest("NaN( 1234)", 3, 0x7ff8000000000000, - (__uint128_t(0x7fffc00000) << 40), - (__uint128_t(0x7fff800000000000) << 64)); + run_test("NaN", 3, 0x7ff8000000000000, (__uint128_t(0x7fffc00000) << 40), + (__uint128_t(0x7fff800000000000) << 64)); + run_test("-nAn", 4, 0xfff8000000000000, (__uint128_t(0xffffc00000) << 40), + (__uint128_t(0xffff800000000000) << 64)); + run_test("NaN()", 5, 0x7ff8000000000000, (__uint128_t(0x7fffc00000) << 40), + (__uint128_t(0x7fff800000000000) << 64)); + run_test("NaN(1234)", 9, 0x7ff80000000004d2, + (__uint128_t(0x7fffc00000) << 40) + __uint128_t(0x4d2), + (__uint128_t(0x7fff800000000000) << 64) + __uint128_t(0x4d2)); + run_test("NaN(0xffffffffffff)", 19, 0x7ff8ffffffffffff, + (__uint128_t(0x7fffc000ff) << 40) + __uint128_t(0xffffffffff), + (__uint128_t(0x7fff800000000000) << 64) + + __uint128_t(0xffffffffffff)); + run_test("NaN(0xfffffffffffff)", 20, 0x7fffffffffffffff, + (__uint128_t(0x7fffc00fff) << 40) + __uint128_t(0xffffffffff), + (__uint128_t(0x7fff800000000000) << 64) + + __uint128_t(0xfffffffffffff)); + run_test("NaN(0xffffffffffffffff)", 23, 0x7fffffffffffffff, + (__uint128_t(0x7fffffffff) << 40) + __uint128_t(0xffffffffff), + (__uint128_t(0x7fff800000000000) << 64) + + __uint128_t(0xffffffffffffffff)); + run_test("NaN( 1234)", 3, 0x7ff8000000000000, + (__uint128_t(0x7fffc00000) << 40), + (__uint128_t(0x7fff800000000000) << 64)); } diff --git a/libc/test/src/string/bcmp_test.cpp b/libc/test/src/string/bcmp_test.cpp index dfcad7aff259..19df7ad2637a 100644 --- a/libc/test/src/string/bcmp_test.cpp +++ b/libc/test/src/string/bcmp_test.cpp @@ -34,25 +34,25 @@ TEST(LlvmLibcBcmpTest, LhsAfterRhsLexically) { } TEST(LlvmLibcBcmpTest, Sweep) { - static constexpr size_t kMaxSize = 1024; - char lhs[kMaxSize]; - char rhs[kMaxSize]; + static constexpr size_t K_MAX_SIZE = 1024; + char lhs[K_MAX_SIZE]; + char rhs[K_MAX_SIZE]; const auto reset = [](char *const ptr) { - for (size_t i = 0; i < kMaxSize; ++i) + for (size_t i = 0; i < K_MAX_SIZE; ++i) ptr[i] = 'a'; }; reset(lhs); reset(rhs); - for (size_t i = 0; i < kMaxSize; ++i) + for (size_t i = 0; i < K_MAX_SIZE; ++i) EXPECT_EQ(__llvm_libc::bcmp(lhs, rhs, i), 0); reset(lhs); reset(rhs); - for (size_t i = 0; i < kMaxSize; ++i) { + for (size_t i = 0; i < K_MAX_SIZE; ++i) { rhs[i] = 'b'; - EXPECT_NE(__llvm_libc::bcmp(lhs, rhs, kMaxSize), 0); + EXPECT_NE(__llvm_libc::bcmp(lhs, rhs, K_MAX_SIZE), 0); rhs[i] = 'a'; } } diff --git a/libc/test/src/string/bzero_test.cpp b/libc/test/src/string/bzero_test.cpp index a997cbc43f46..f8edfa542d77 100644 --- a/libc/test/src/string/bzero_test.cpp +++ b/libc/test/src/string/bzero_test.cpp @@ -14,10 +14,10 @@ using __llvm_libc::cpp::Array; using __llvm_libc::cpp::ArrayRef; using Data = Array; -static const ArrayRef kDeadcode("DEADC0DE", 8); +static const ArrayRef k_deadcode("DEADC0DE", 8); // Returns a Data object filled with a repetition of `filler`. -Data getData(ArrayRef filler) { +Data get_data(ArrayRef filler) { Data out; for (size_t i = 0; i < out.size(); ++i) out[i] = filler[i % filler.size()]; @@ -25,7 +25,7 @@ Data getData(ArrayRef filler) { } TEST(LlvmLibcBzeroTest, Thorough) { - const Data dirty = getData(kDeadcode); + const Data dirty = get_data(k_deadcode); for (size_t count = 0; count < 1024; ++count) { for (size_t align = 0; align < 64; ++align) { auto buffer = dirty; diff --git a/libc/test/src/string/memcmp_test.cpp b/libc/test/src/string/memcmp_test.cpp index 35ee741514aa..0cd33ebdaf85 100644 --- a/libc/test/src/string/memcmp_test.cpp +++ b/libc/test/src/string/memcmp_test.cpp @@ -34,25 +34,25 @@ TEST(LlvmLibcMemcmpTest, LhsAfterRhsLexically) { } TEST(LlvmLibcMemcmpTest, Sweep) { - static constexpr size_t kMaxSize = 1024; - char lhs[kMaxSize]; - char rhs[kMaxSize]; + static constexpr size_t K_MAX_SIZE = 1024; + char lhs[K_MAX_SIZE]; + char rhs[K_MAX_SIZE]; const auto reset = [](char *const ptr) { - for (size_t i = 0; i < kMaxSize; ++i) + for (size_t i = 0; i < K_MAX_SIZE; ++i) ptr[i] = 'a'; }; reset(lhs); reset(rhs); - for (size_t i = 0; i < kMaxSize; ++i) + for (size_t i = 0; i < K_MAX_SIZE; ++i) EXPECT_EQ(__llvm_libc::memcmp(lhs, rhs, i), 0); reset(lhs); reset(rhs); - for (size_t i = 0; i < kMaxSize; ++i) { + for (size_t i = 0; i < K_MAX_SIZE; ++i) { rhs[i] = 'z'; - EXPECT_LT(__llvm_libc::memcmp(lhs, rhs, kMaxSize), 0); + EXPECT_LT(__llvm_libc::memcmp(lhs, rhs, K_MAX_SIZE), 0); rhs[i] = 'a'; } } diff --git a/libc/test/src/string/memcpy_test.cpp b/libc/test/src/string/memcpy_test.cpp index 9a7c9ba7f7a0..ed4bbd45c410 100644 --- a/libc/test/src/string/memcpy_test.cpp +++ b/libc/test/src/string/memcpy_test.cpp @@ -14,11 +14,11 @@ using __llvm_libc::cpp::Array; using __llvm_libc::cpp::ArrayRef; using Data = Array; -static const ArrayRef kNumbers("0123456789", 10); -static const ArrayRef kDeadcode("DEADC0DE", 8); +static const ArrayRef k_numbers("0123456789", 10); +static const ArrayRef k_deadcode("DEADC0DE", 8); // Returns a Data object filled with a repetition of `filler`. -Data getData(ArrayRef filler) { +Data get_data(ArrayRef filler) { Data out; for (size_t i = 0; i < out.size(); ++i) out[i] = filler[i % filler.size()]; @@ -26,8 +26,8 @@ Data getData(ArrayRef filler) { } TEST(LlvmLibcMemcpyTest, Thorough) { - const Data groundtruth = getData(kNumbers); - const Data dirty = getData(kDeadcode); + const Data groundtruth = get_data(k_numbers); + const Data dirty = get_data(k_deadcode); for (size_t count = 0; count < 1024; ++count) { for (size_t align = 0; align < 64; ++align) { auto buffer = dirty; diff --git a/libc/test/src/string/memset_test.cpp b/libc/test/src/string/memset_test.cpp index c3f65cf6fd6c..2d07e7bb0c73 100644 --- a/libc/test/src/string/memset_test.cpp +++ b/libc/test/src/string/memset_test.cpp @@ -14,10 +14,10 @@ using __llvm_libc::cpp::Array; using __llvm_libc::cpp::ArrayRef; using Data = Array; -static const ArrayRef kDeadcode("DEADC0DE", 8); +static const ArrayRef k_deadcode("DEADC0DE", 8); // Returns a Data object filled with a repetition of `filler`. -Data getData(ArrayRef filler) { +Data get_data(ArrayRef filler) { Data out; for (size_t i = 0; i < out.size(); ++i) out[i] = filler[i % filler.size()]; @@ -25,7 +25,7 @@ Data getData(ArrayRef filler) { } TEST(LlvmLibcMemsetTest, Thorough) { - const Data dirty = getData(kDeadcode); + const Data dirty = get_data(k_deadcode); for (int value = -1; value <= 1; ++value) { for (size_t count = 0; count < 1024; ++count) { for (size_t align = 0; align < 64; ++align) { diff --git a/libc/test/src/string/stpcpy_test.cpp b/libc/test/src/string/stpcpy_test.cpp index 90ec5311b9b9..e5ed3fb06f78 100644 --- a/libc/test/src/string/stpcpy_test.cpp +++ b/libc/test/src/string/stpcpy_test.cpp @@ -13,33 +13,33 @@ TEST(LlvmLibcStpCpyTest, EmptySrc) { const char *empty = ""; - size_t srcSize = __llvm_libc::internal::string_length(empty); + size_t src_size = __llvm_libc::internal::string_length(empty); char dest[4] = {'a', 'b', 'c', '\0'}; char *result = __llvm_libc::stpcpy(dest, empty); - ASSERT_EQ(dest + srcSize, result); + ASSERT_EQ(dest + src_size, result); ASSERT_EQ(result[0], '\0'); ASSERT_STREQ(dest, empty); } TEST(LlvmLibcStpCpyTest, EmptyDest) { const char *abc = "abc"; - size_t srcSize = __llvm_libc::internal::string_length(abc); + size_t src_size = __llvm_libc::internal::string_length(abc); char dest[4]; char *result = __llvm_libc::stpcpy(dest, abc); - ASSERT_EQ(dest + srcSize, result); + ASSERT_EQ(dest + src_size, result); ASSERT_EQ(result[0], '\0'); ASSERT_STREQ(dest, abc); } TEST(LlvmLibcStpCpyTest, OffsetDest) { const char *abc = "abc"; - size_t srcSize = __llvm_libc::internal::string_length(abc); + size_t src_size = __llvm_libc::internal::string_length(abc); char dest[7] = {'x', 'y', 'z'}; char *result = __llvm_libc::stpcpy(dest + 3, abc); - ASSERT_EQ(dest + 3 + srcSize, result); + ASSERT_EQ(dest + 3 + src_size, result); ASSERT_EQ(result[0], '\0'); ASSERT_STREQ(dest, "xyzabc"); } diff --git a/libc/test/src/threads/call_once_test.cpp b/libc/test/src/threads/call_once_test.cpp index 18ed71015763..b0e9fb8ccc71 100644 --- a/libc/test/src/threads/call_once_test.cpp +++ b/libc/test/src/threads/call_once_test.cpp @@ -18,7 +18,7 @@ #include -static constexpr unsigned int num_threads = 5; +static constexpr unsigned int NUM_THREADS = 5; static atomic_uint thread_count; static unsigned int call_count; @@ -38,13 +38,13 @@ TEST(LlvmLibcCallOnceTest, CallFrom5Threads) { call_count = 0; thread_count = 0; - thrd_t threads[num_threads]; - for (unsigned int i = 0; i < num_threads; ++i) { + thrd_t threads[NUM_THREADS]; + for (unsigned int i = 0; i < NUM_THREADS; ++i) { ASSERT_EQ(__llvm_libc::thrd_create(threads + i, func, nullptr), static_cast(thrd_success)); } - for (unsigned int i = 0; i < num_threads; ++i) { + for (unsigned int i = 0; i < NUM_THREADS; ++i) { int retval; ASSERT_EQ(__llvm_libc::thrd_join(threads + i, &retval), static_cast(thrd_success)); diff --git a/libc/test/src/threads/mtx_test.cpp b/libc/test/src/threads/mtx_test.cpp index 8ee8cf5bb0dd..3abe77e0eab7 100644 --- a/libc/test/src/threads/mtx_test.cpp +++ b/libc/test/src/threads/mtx_test.cpp @@ -69,11 +69,11 @@ TEST(LlvmLibcMutexTest, RelayCounter) { } mtx_t start_lock, step_lock; -bool start, step; +bool started, step; int stepper(void *arg) { __llvm_libc::mtx_lock(&start_lock); - start = true; + started = true; __llvm_libc::mtx_unlock(&start_lock); __llvm_libc::mtx_lock(&step_lock); @@ -92,7 +92,7 @@ TEST(LlvmLibcMutexTest, WaitAndStep) { // step. Once we ensure that the thread is blocked, we unblock it. // After unblocking, we then verify that the thread was indeed unblocked. step = false; - start = false; + started = false; ASSERT_EQ(__llvm_libc::mtx_lock(&step_lock), static_cast(thrd_success)); thrd_t thread; @@ -102,7 +102,7 @@ TEST(LlvmLibcMutexTest, WaitAndStep) { // Make sure the thread actually started. ASSERT_EQ(__llvm_libc::mtx_lock(&start_lock), static_cast(thrd_success)); - bool s = start; + bool s = started; ASSERT_EQ(__llvm_libc::mtx_unlock(&start_lock), static_cast(thrd_success)); if (s) diff --git a/libc/test/src/time/TmHelper.h b/libc/test/src/time/TmHelper.h index 1ab2a5eeeba6..54d9f4eb72ce 100644 --- a/libc/test/src/time/TmHelper.h +++ b/libc/test/src/time/TmHelper.h @@ -20,9 +20,9 @@ namespace tmhelper { namespace testing { // A helper function to initialize tm data structure. -static inline void InitializeTmData(struct tm *tm_data, int year, int month, - int mday, int hour, int min, int sec, - int wday, int yday) { +static inline void initialize_tm_data(struct tm *tm_data, int year, int month, + int mday, int hour, int min, int sec, + int wday, int yday) { struct tm temp = {.tm_sec = sec, .tm_min = min, .tm_hour = hour, diff --git a/libc/test/src/time/asctime_r_test.cpp b/libc/test/src/time/asctime_r_test.cpp index 1f883cf0cac1..f9ccdda50a32 100644 --- a/libc/test/src/time/asctime_r_test.cpp +++ b/libc/test/src/time/asctime_r_test.cpp @@ -17,8 +17,8 @@ using __llvm_libc::time_utils::TimeConstants; static inline char *call_asctime_r(struct tm *tm_data, int year, int month, int mday, int hour, int min, int sec, int wday, int yday, char *buffer) { - __llvm_libc::tmhelper::testing::InitializeTmData(tm_data, year, month, mday, - hour, min, sec, wday, yday); + __llvm_libc::tmhelper::testing::initialize_tm_data( + tm_data, year, month, mday, hour, min, sec, wday, yday); return __llvm_libc::asctime_r(tm_data, buffer); } diff --git a/libc/test/src/time/asctime_test.cpp b/libc/test/src/time/asctime_test.cpp index 14201fd5a0de..0b0c159eb53a 100644 --- a/libc/test/src/time/asctime_test.cpp +++ b/libc/test/src/time/asctime_test.cpp @@ -13,8 +13,8 @@ static inline char *call_asctime(struct tm *tm_data, int year, int month, int mday, int hour, int min, int sec, int wday, int yday) { - __llvm_libc::tmhelper::testing::InitializeTmData(tm_data, year, month, mday, - hour, min, sec, wday, yday); + __llvm_libc::tmhelper::testing::initialize_tm_data( + tm_data, year, month, mday, hour, min, sec, wday, yday); return __llvm_libc::asctime(tm_data); } diff --git a/libc/test/src/time/mktime_test.cpp b/libc/test/src/time/mktime_test.cpp index d08a6b9640dd..16b814f1d425 100644 --- a/libc/test/src/time/mktime_test.cpp +++ b/libc/test/src/time/mktime_test.cpp @@ -24,14 +24,14 @@ using __llvm_libc::time_utils::TimeConstants; static inline time_t call_mktime(struct tm *tm_data, int year, int month, int mday, int hour, int min, int sec, int wday, int yday) { - __llvm_libc::tmhelper::testing::InitializeTmData(tm_data, year, month, mday, - hour, min, sec, wday, yday); + __llvm_libc::tmhelper::testing::initialize_tm_data( + tm_data, year, month, mday, hour, min, sec, wday, yday); return __llvm_libc::mktime(tm_data); } TEST(LlvmLibcMkTime, FailureSetsErrno) { struct tm tm_data; - __llvm_libc::tmhelper::testing::InitializeTmData( + __llvm_libc::tmhelper::testing::initialize_tm_data( &tm_data, INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX, -1, 0, 0); EXPECT_THAT(__llvm_libc::mktime(&tm_data), Fails(EOVERFLOW)); } diff --git a/libc/test/src/unistd/write_test.cpp b/libc/test/src/unistd/write_test.cpp index 2313d97d5888..56243846083a 100644 --- a/libc/test/src/unistd/write_test.cpp +++ b/libc/test/src/unistd/write_test.cpp @@ -14,10 +14,10 @@ TEST(LlvmLibcUniStd, WriteBasic) { using __llvm_libc::testing::ErrnoSetterMatcher::Succeeds; - constexpr const char *hello = "hello"; + constexpr const char *HELLO = "hello"; __llvm_libc::testutils::FDReader reader; - EXPECT_THAT(__llvm_libc::write(reader.getWriteFD(), hello, 5), Succeeds(5)); - EXPECT_TRUE(reader.matchWritten(hello)); + EXPECT_THAT(__llvm_libc::write(reader.get_write_fd(), HELLO, 5), Succeeds(5)); + EXPECT_TRUE(reader.match_written(HELLO)); } TEST(LlvmLibcUniStd, WriteFails) { diff --git a/libc/utils/UnitTest/LibcTest.cpp b/libc/utils/UnitTest/LibcTest.cpp index 710ee15bba3f..c998337039a6 100644 --- a/libc/utils/UnitTest/LibcTest.cpp +++ b/libc/utils/UnitTest/LibcTest.cpp @@ -291,22 +291,22 @@ bool Test::testMatch(bool MatchResult, MatcherBase &Matcher, const char *LHSStr, bool Test::testProcessKilled(testutils::FunctionCaller *Func, int Signal, const char *LHSStr, const char *RHSStr, const char *File, unsigned long Line) { - testutils::ProcessStatus Result = testutils::invokeInSubprocess(Func, 500); + testutils::ProcessStatus Result = testutils::invoke_in_subprocess(Func, 500); - if (const char *error = Result.getError()) { + if (const char *error = Result.get_error()) { Ctx->markFail(); std::cout << File << ":" << Line << ": FAILURE\n" << error << '\n'; return false; } - if (Result.timedOut()) { + if (Result.timed_out()) { Ctx->markFail(); std::cout << File << ":" << Line << ": FAILURE\n" << "Process timed out after " << 500 << " milliseconds.\n"; return false; } - if (Result.exitedNormally()) { + if (Result.exited_normally()) { Ctx->markFail(); std::cout << File << ":" << Line << ": FAILURE\n" << "Expected " << LHSStr @@ -314,41 +314,41 @@ bool Test::testProcessKilled(testutils::FunctionCaller *Func, int Signal, return false; } - int KilledBy = Result.getFatalSignal(); + int KilledBy = Result.get_fatal_signal(); assert(KilledBy != 0 && "Not killed by any signal"); if (Signal == -1 || KilledBy == Signal) return true; - using testutils::signalAsString; + using testutils::signal_as_string; Ctx->markFail(); std::cout << File << ":" << Line << ": FAILURE\n" << " Expected: " << LHSStr << '\n' << "To be killed by signal: " << Signal << '\n' - << " Which is: " << signalAsString(Signal) << '\n' + << " Which is: " << signal_as_string(Signal) << '\n' << " But it was killed by: " << KilledBy << '\n' - << " Which is: " << signalAsString(KilledBy) << '\n'; + << " Which is: " << signal_as_string(KilledBy) << '\n'; return false; } bool Test::testProcessExits(testutils::FunctionCaller *Func, int ExitCode, const char *LHSStr, const char *RHSStr, const char *File, unsigned long Line) { - testutils::ProcessStatus Result = testutils::invokeInSubprocess(Func, 500); + testutils::ProcessStatus Result = testutils::invoke_in_subprocess(Func, 500); - if (const char *error = Result.getError()) { + if (const char *error = Result.get_error()) { Ctx->markFail(); std::cout << File << ":" << Line << ": FAILURE\n" << error << '\n'; return false; } - if (Result.timedOut()) { + if (Result.timed_out()) { Ctx->markFail(); std::cout << File << ":" << Line << ": FAILURE\n" << "Process timed out after " << 500 << " milliseconds.\n"; return false; } - if (!Result.exitedNormally()) { + if (!Result.exited_normally()) { Ctx->markFail(); std::cout << File << ":" << Line << ": FAILURE\n" << "Expected " << LHSStr << '\n' @@ -357,7 +357,7 @@ bool Test::testProcessExits(testutils::FunctionCaller *Func, int ExitCode, return false; } - int ActualExit = Result.getExitCode(); + int ActualExit = Result.get_exit_code(); if (ActualExit == ExitCode) return true; diff --git a/libc/utils/testutils/ExecuteFunction.h b/libc/utils/testutils/ExecuteFunction.h index 32482ebaf3bf..a22336575d82 100644 --- a/libc/utils/testutils/ExecuteFunction.h +++ b/libc/utils/testutils/ExecuteFunction.h @@ -21,28 +21,29 @@ class FunctionCaller { }; struct ProcessStatus { - int PlatformDefined; + int platform_defined; const char *failure = nullptr; - static constexpr uintptr_t timeout = -1L; + static constexpr uintptr_t TIMEOUT = -1L; - static ProcessStatus Error(const char *error) { return {0, error}; } - static ProcessStatus TimedOut() { - return {0, reinterpret_cast(timeout)}; + static ProcessStatus error(const char *error) { return {0, error}; } + static ProcessStatus timed_out_ps() { + return {0, reinterpret_cast(TIMEOUT)}; } - bool timedOut() const { - return failure == reinterpret_cast(timeout); + bool timed_out() const { + return failure == reinterpret_cast(TIMEOUT); } - const char *getError() const { return timedOut() ? nullptr : failure; } - bool exitedNormally() const; - int getExitCode() const; - int getFatalSignal() const; + const char *get_error() const { return timed_out() ? nullptr : failure; } + bool exited_normally() const; + int get_exit_code() const; + int get_fatal_signal() const; }; -ProcessStatus invokeInSubprocess(FunctionCaller *Func, unsigned TimeoutMS = -1); +ProcessStatus invoke_in_subprocess(FunctionCaller *Func, + unsigned TimeoutMS = -1); -const char *signalAsString(int Signum); +const char *signal_as_string(int Signum); } // namespace testutils } // namespace __llvm_libc diff --git a/libc/utils/testutils/ExecuteFunctionUnix.cpp b/libc/utils/testutils/ExecuteFunctionUnix.cpp index 4050a0ca8a6e..a3871d07386d 100644 --- a/libc/utils/testutils/ExecuteFunctionUnix.cpp +++ b/libc/utils/testutils/ExecuteFunctionUnix.cpp @@ -20,33 +20,33 @@ namespace __llvm_libc { namespace testutils { -bool ProcessStatus::exitedNormally() const { - return WIFEXITED(PlatformDefined); +bool ProcessStatus::exited_normally() const { + return WIFEXITED(platform_defined); } -int ProcessStatus::getExitCode() const { - assert(exitedNormally() && "Abnormal termination, no exit code"); - return WEXITSTATUS(PlatformDefined); +int ProcessStatus::get_exit_code() const { + assert(exited_normally() && "Abnormal termination, no exit code"); + return WEXITSTATUS(platform_defined); } -int ProcessStatus::getFatalSignal() const { - if (exitedNormally()) +int ProcessStatus::get_fatal_signal() const { + if (exited_normally()) return 0; - return WTERMSIG(PlatformDefined); + return WTERMSIG(platform_defined); } -ProcessStatus invokeInSubprocess(FunctionCaller *Func, unsigned timeoutMS) { +ProcessStatus invoke_in_subprocess(FunctionCaller *Func, unsigned timeoutMS) { std::unique_ptr X(Func); int pipeFDs[2]; if (::pipe(pipeFDs) == -1) - return ProcessStatus::Error("pipe(2) failed"); + return ProcessStatus::error("pipe(2) failed"); // Don't copy the buffers into the child process and print twice. std::cout.flush(); std::cerr.flush(); pid_t Pid = ::fork(); if (Pid == -1) - return ProcessStatus::Error("fork(2) failed"); + return ProcessStatus::error("fork(2) failed"); if (!Pid) { (*Func)(); @@ -60,11 +60,11 @@ ProcessStatus invokeInSubprocess(FunctionCaller *Func, unsigned timeoutMS) { // No events requested so this call will only return after the timeout or if // the pipes peer was closed, signaling the process exited. if (::poll(&pollFD, 1, timeoutMS) == -1) - return ProcessStatus::Error("poll(2) failed"); + return ProcessStatus::error("poll(2) failed"); // If the pipe wasn't closed by the child yet then timeout has expired. if (!(pollFD.revents & POLLHUP)) { ::kill(Pid, SIGKILL); - return ProcessStatus::TimedOut(); + return ProcessStatus::timed_out_ps(); } int WStatus = 0; @@ -72,13 +72,13 @@ ProcessStatus invokeInSubprocess(FunctionCaller *Func, unsigned timeoutMS) { // and doesn't turn into a zombie. pid_t status = ::waitpid(Pid, &WStatus, 0); if (status == -1) - return ProcessStatus::Error("waitpid(2) failed"); + return ProcessStatus::error("waitpid(2) failed"); assert(status == Pid); (void)status; return {WStatus}; } -const char *signalAsString(int Signum) { return ::strsignal(Signum); } +const char *signal_as_string(int Signum) { return ::strsignal(Signum); } } // namespace testutils } // namespace __llvm_libc diff --git a/libc/utils/testutils/FDReader.h b/libc/utils/testutils/FDReader.h index e25dcea290a6..8a39a92dcc33 100644 --- a/libc/utils/testutils/FDReader.h +++ b/libc/utils/testutils/FDReader.h @@ -19,8 +19,8 @@ class FDReader { FDReader(); ~FDReader(); - int getWriteFD() { return pipefd[1]; } - bool matchWritten(const char *); + int get_write_fd() { return pipefd[1]; } + bool match_written(const char *); }; } // namespace testutils diff --git a/libc/utils/testutils/FDReaderUnix.cpp b/libc/utils/testutils/FDReaderUnix.cpp index db1f63f0dcc3..be6cde331f37 100644 --- a/libc/utils/testutils/FDReaderUnix.cpp +++ b/libc/utils/testutils/FDReaderUnix.cpp @@ -27,7 +27,7 @@ FDReader::~FDReader() { ::close(pipefd[1]); } -bool FDReader::matchWritten(const char *str) { +bool FDReader::match_written(const char *str) { ::close(pipefd[1]); diff --git a/libc/utils/testutils/StreamWrapper.cpp b/libc/utils/testutils/StreamWrapper.cpp index 84ab583f6644..f65c9c3055ce 100644 --- a/libc/utils/testutils/StreamWrapper.cpp +++ b/libc/utils/testutils/StreamWrapper.cpp @@ -19,8 +19,8 @@ namespace testutils { StreamWrapper outs() { return {std::addressof(std::cout)}; } template StreamWrapper &StreamWrapper::operator<<(T t) { - assert(OS); - std::ostream &Stream = *reinterpret_cast(OS); + assert(os); + std::ostream &Stream = *reinterpret_cast(os); Stream << t; Stream.flush(); return *this; @@ -52,7 +52,7 @@ OutputFileStream::OutputFileStream(const char *FN) : StreamWrapper(new std::ofstream(FN)) {} OutputFileStream::~OutputFileStream() { - delete reinterpret_cast(OS); + delete reinterpret_cast(os); } } // namespace testutils diff --git a/libc/utils/testutils/StreamWrapper.h b/libc/utils/testutils/StreamWrapper.h index 2434ab06bb71..f4309abdedef 100644 --- a/libc/utils/testutils/StreamWrapper.h +++ b/libc/utils/testutils/StreamWrapper.h @@ -17,10 +17,10 @@ namespace testutils { // expose the system libc headers. class StreamWrapper { protected: - void *OS; + void *os; public: - StreamWrapper(void *OS) : OS(OS) {} + StreamWrapper(void *OS) : os(OS) {} template StreamWrapper &operator<<(T t); }; From 42ac4f3dc6e673e44fa9fcbb40d4200be2d1e2c1 Mon Sep 17 00:00:00 2001 From: Mogball Date: Tue, 21 Dec 2021 01:18:14 +0000 Subject: [PATCH 067/293] [mlir] Canonicalizer constructor should accept disabled/enabled patterns There is no way to programmatically configure the list of disabled and enabled patterns in the canonicalizer pass, other than the duplicate the whole pass. This patch exposes the `disabledPatterns` and `enabledPatterns` options. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D116055 --- .../mlir/Rewrite/FrozenRewritePatternSet.h | 10 +++++++--- mlir/include/mlir/Transforms/Passes.h | 11 ++++++++++- mlir/lib/Transforms/Canonicalizer.cpp | 15 ++++++++++++--- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/mlir/include/mlir/Rewrite/FrozenRewritePatternSet.h b/mlir/include/mlir/Rewrite/FrozenRewritePatternSet.h index 74ddba34ada9..7627fd0145eb 100644 --- a/mlir/include/mlir/Rewrite/FrozenRewritePatternSet.h +++ b/mlir/include/mlir/Rewrite/FrozenRewritePatternSet.h @@ -40,9 +40,13 @@ class FrozenRewritePatternSet { /// Freeze the patterns held in `patterns`, and take ownership. /// `disabledPatternLabels` is a set of labels used to filter out input - /// patterns with a label in this set. `enabledPatternLabels` is a set of - /// labels used to filter out input patterns that do not have one of the - /// labels in this set. + /// patterns with a debug label or debug name in this set. + /// `enabledPatternLabels` is a set of labels used to filter out input + /// patterns that do not have one of the labels in this set. Debug labels must + /// be set explicitly on patterns or when adding them with + /// `RewritePatternSet::addWithLabel`. Debug names may be empty, but patterns + /// created with `RewritePattern::create` have their default debug name set to + /// their type name. FrozenRewritePatternSet( RewritePatternSet &&patterns, ArrayRef disabledPatternLabels = llvm::None, diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h index e6cfda886363..6aab120fb469 100644 --- a/mlir/include/mlir/Transforms/Passes.h +++ b/mlir/include/mlir/Transforms/Passes.h @@ -62,8 +62,17 @@ std::unique_ptr createBufferResultsToOutParamsPass(); std::unique_ptr createCanonicalizerPass(); /// Creates an instance of the Canonicalizer pass with the specified config. +/// `disabledPatterns` is a set of labels used to filter out input patterns with +/// a debug label or debug name in this set. `enabledPatterns` is a set of +/// labels used to filter out input patterns that do not have one of the labels +/// in this set. Debug labels must be set explicitly on patterns or when adding +/// them with `RewritePatternSet::addWithLabel`. Debug names may be empty, but +/// patterns created with `RewritePattern::create` have their default debug name +/// set to their type name. std::unique_ptr -createCanonicalizerPass(const GreedyRewriteConfig &config); +createCanonicalizerPass(const GreedyRewriteConfig &config, + ArrayRef disabledPatterns = llvm::None, + ArrayRef enabledPatterns = llvm::None); /// Creates a pass to perform common sub expression elimination. std::unique_ptr createCSEPass(); diff --git a/mlir/lib/Transforms/Canonicalizer.cpp b/mlir/lib/Transforms/Canonicalizer.cpp index 745477d396fb..92f21f6c226b 100644 --- a/mlir/lib/Transforms/Canonicalizer.cpp +++ b/mlir/lib/Transforms/Canonicalizer.cpp @@ -21,7 +21,13 @@ using namespace mlir; namespace { /// Canonicalize operations in nested regions. struct Canonicalizer : public CanonicalizerBase { - Canonicalizer(const GreedyRewriteConfig &config) : config(config) {} + Canonicalizer(const GreedyRewriteConfig &config, + ArrayRef disabledPatterns, + ArrayRef enabledPatterns) + : config(config) { + this->disabledPatterns = disabledPatterns; + this->enabledPatterns = enabledPatterns; + } Canonicalizer() { // Default constructed Canonicalizer takes its settings from command line @@ -61,6 +67,9 @@ std::unique_ptr mlir::createCanonicalizerPass() { /// Creates an instance of the Canonicalizer pass with the specified config. std::unique_ptr -mlir::createCanonicalizerPass(const GreedyRewriteConfig &config) { - return std::make_unique(config); +createCanonicalizerPass(const GreedyRewriteConfig &config, + ArrayRef disabledPatterns = llvm::None, + ArrayRef enabledPatterns = llvm::None) { + return std::make_unique(config, disabledPatterns, + enabledPatterns); } From 9eb8e7b176e9fc38c8df86bd927663c6409ac262 Mon Sep 17 00:00:00 2001 From: Stanislav Funiak Date: Wed, 22 Dec 2021 19:10:02 +0000 Subject: [PATCH 068/293] [MLIR][PDL] Clear up the terminology in the root ordering graph. Previously, we defined a struct named `RootOrderingCost`, which stored the cost (a pair consisting of the depth of the connector and a tie breaking ID), as well as the connector itself. This created some confusion, because we would sometimes write, e.g., `cost.cost.first` (the first `cost` referring to the struct, the second one referring to the `cost` field, and `first` referring to the depth). In order to address this confusion, here we rename `RootOrderingCost` to `RootOrderingEntry` (keeping the fields and their names as-is). This clarification exposed non-determinism in the optimal branching algorithm. When choosing the best local parent, we were previuosly only considering its depth (`cost.first`) and not the tie-breaking ID (`cost.second`). This led to non-deterministic choice of the parent when multiple potential parents had the same depth. The solution is to compare both the depth and the tie-breaking ID. Testing: Rely on existing unit tests. Non-detgerminism is hard to unit-test. Reviewed By: rriddle, Mogball Differential Revision: https://reviews.llvm.org/D116079 --- .../PDLToPDLInterp/PredicateTree.cpp | 20 ++++---- .../PDLToPDLInterp/RootOrdering.cpp | 51 ++++++++++--------- .../Conversion/PDLToPDLInterp/RootOrdering.h | 10 ++-- 3 files changed, 42 insertions(+), 39 deletions(-) diff --git a/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp b/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp index b22e50549dfd..517f28c2044f 100644 --- a/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp +++ b/mlir/lib/Conversion/PDLToPDLInterp/PredicateTree.cpp @@ -487,12 +487,12 @@ static void buildCostGraph(ArrayRef roots, RootOrderingGraph &graph, if (&p == &q) continue; // Insert or retrieve the property of edge from p to q. - RootOrderingCost &cost = graph[q.root][p.root]; - if (!cost.connector /* new edge */ || cost.cost.first > q.depth) { - if (!cost.connector) - cost.cost.second = nextID++; - cost.cost.first = q.depth; - cost.connector = value; + RootOrderingEntry &entry = graph[q.root][p.root]; + if (!entry.connector /* new edge */ || entry.cost.first > q.depth) { + if (!entry.connector) + entry.cost.second = nextID++; + entry.cost.first = q.depth; + entry.connector = value; } } } @@ -570,10 +570,10 @@ static Value buildPredicateList(pdl::PatternOp pattern, for (auto &target : graph) { llvm::dbgs() << " * " << target.first << "\n"; for (auto &source : target.second) { - RootOrderingCost c = source.second; - llvm::dbgs() << " <- " << source.first << ": " << c.cost.first - << ":" << c.cost.second << " via " << c.connector.getLoc() - << "\n"; + RootOrderingEntry &entry = source.second; + llvm::dbgs() << " <- " << source.first << ": " << entry.cost.first + << ":" << entry.cost.second << " via " + << entry.connector.getLoc() << "\n"; } } }); diff --git a/mlir/lib/Conversion/PDLToPDLInterp/RootOrdering.cpp b/mlir/lib/Conversion/PDLToPDLInterp/RootOrdering.cpp index a4d68b1343f7..e1a9fa599cff 100644 --- a/mlir/lib/Conversion/PDLToPDLInterp/RootOrdering.cpp +++ b/mlir/lib/Conversion/PDLToPDLInterp/RootOrdering.cpp @@ -46,19 +46,19 @@ static SmallVector getCycle(const DenseMap &parents, /// cycle, u \in C, are replaced with a single edge (v_C, v), and the selected /// node u is marked in the ouptut map actualSource[v]. static void contract(RootOrderingGraph &graph, ArrayRef cycle, - const DenseMap &parentCosts, + const DenseMap &parentDepths, DenseMap &actualSource, DenseMap &actualTarget) { Value rep = cycle.front(); DenseSet cycleSet(cycle.begin(), cycle.end()); // Now, contract the cycle, marking the actual sources and targets. - DenseMap repCosts; + DenseMap repEntries; for (auto outer = graph.begin(), e = graph.end(); outer != e; ++outer) { Value target = outer->first; if (cycleSet.contains(target)) { // Target in the cycle => edges incoming to the cycle or within the cycle. - unsigned parentCost = parentCosts.lookup(target); + unsigned parentDepth = parentDepths.lookup(target); for (const auto &inner : outer->second) { Value source = inner.first; // Ignore edges within the cycle. @@ -67,30 +67,30 @@ static void contract(RootOrderingGraph &graph, ArrayRef cycle, // Edge incoming to the cycle. std::pair cost = inner.second.cost; - assert(parentCost <= cost.first && "invalid parent cost"); + assert(parentDepth <= cost.first && "invalid parent depth"); // Subtract the cost of the parent within the cycle from the cost of // the edge incoming to the cycle. This update ensures that the cost // of the minimum-weight spanning arborescence of the entire graph is // the cost of arborescence for the contracted graph plus the cost of // the cycle, no matter which edge in the cycle we choose to drop. - cost.first -= parentCost; - auto it = repCosts.find(source); - if (it == repCosts.end() || it->second.cost > cost) { + cost.first -= parentDepth; + auto it = repEntries.find(source); + if (it == repEntries.end() || it->second.cost > cost) { actualTarget[source] = target; // Do not bother populating the connector (the connector is only // relevant for the final traversal, not for the optimal branching). - repCosts[source].cost = cost; + repEntries[source].cost = cost; } } // Erase the node in the cycle. graph.erase(outer); } else { // Target not in cycle => edges going away from or unrelated to the cycle. - DenseMap &costs = outer->second; + DenseMap &entries = outer->second; Value bestSource; std::pair bestCost; - auto inner = costs.begin(), innerE = costs.end(); + auto inner = entries.begin(), innerE = entries.end(); while (inner != innerE) { Value source = inner->first; if (cycleSet.contains(source)) { @@ -99,7 +99,7 @@ static void contract(RootOrderingGraph &graph, ArrayRef cycle, bestSource = source; bestCost = inner->second.cost; } - costs.erase(inner++); + entries.erase(inner++); } else { ++inner; } @@ -107,14 +107,14 @@ static void contract(RootOrderingGraph &graph, ArrayRef cycle, // There were going-away edges, contract them. if (bestSource) { - costs[rep].cost = bestCost; + entries[rep].cost = bestCost; actualSource[target] = bestSource; } } } // Store the edges to the representative. - graph[rep] = std::move(repCosts); + graph[rep] = std::move(repEntries); } OptimalBranching::OptimalBranching(RootOrderingGraph graph, Value root) @@ -128,8 +128,8 @@ unsigned OptimalBranching::solve() { // A map that stores the cost of the optimal local choice for each node // in a directed cycle. This map is cleared every time we seed the search. - DenseMap parentCosts; - parentCosts.reserve(graph.size()); + DenseMap parentDepths; + parentDepths.reserve(graph.size()); // Determine if the optimal local choice results in an acyclic graph. This is // done by computing the optimal local choice and traversing up the computed @@ -142,27 +142,30 @@ unsigned OptimalBranching::solve() { // Follow the trail of best sources until we reach an already visited node. // The code will assert if we cannot reach an already visited node, i.e., // the graph is not strongly connected. - parentCosts.clear(); + parentDepths.clear(); do { auto it = graph.find(node); assert(it != graph.end() && "the graph is not strongly connected"); + // Find the best local parent, taking into account both the depth and the + // tie breaking rules. Value &bestSource = parents[node]; - unsigned &bestCost = parentCosts[node]; + std::pair bestCost; for (const auto &inner : it->second) { - const RootOrderingCost &cost = inner.second; - if (!bestSource /* initial */ || bestCost > cost.cost.first) { + const RootOrderingEntry &entry = inner.second; + if (!bestSource /* initial */ || bestCost > entry.cost) { bestSource = inner.first; - bestCost = cost.cost.first; + bestCost = entry.cost; } } assert(bestSource && "the graph is not strongly connected"); + parentDepths[node] = bestCost.first; node = bestSource; - totalCost += bestCost; + totalCost += bestCost.first; } while (!parents.count(node)); // If we reached a non-root node, we have a cycle. - if (parentCosts.count(node)) { + if (parentDepths.count(node)) { // Determine the cycle starting at the representative node. SmallVector cycle = getCycle(parents, node); @@ -171,7 +174,7 @@ unsigned OptimalBranching::solve() { DenseMap actualSource, actualTarget; // Contract the cycle and recurse. - contract(graph, cycle, parentCosts, actualSource, actualTarget); + contract(graph, cycle, parentDepths, actualSource, actualTarget); totalCost = solve(); // Redirect the going-away edges. @@ -186,7 +189,7 @@ unsigned OptimalBranching::solve() { Value entry = actualTarget.lookup(parent); cycle.push_back(node); // complete the cycle for (size_t i = 0, e = cycle.size() - 1; i < e; ++i) { - totalCost += parentCosts.lookup(cycle[i]); + totalCost += parentDepths.lookup(cycle[i]); if (cycle[i] == entry) parents[cycle[i]] = parent; // break the cycle else diff --git a/mlir/lib/Conversion/PDLToPDLInterp/RootOrdering.h b/mlir/lib/Conversion/PDLToPDLInterp/RootOrdering.h index 642b6a076f5c..553c5b2de21a 100644 --- a/mlir/lib/Conversion/PDLToPDLInterp/RootOrdering.h +++ b/mlir/lib/Conversion/PDLToPDLInterp/RootOrdering.h @@ -56,12 +56,12 @@ namespace pdl_to_pdl_interp { /// op4 and op5 in the cost graph, because the subtrees rooted at these two /// roots do not intersect. It is easy to see that the optimal root for this /// pattern is op3, resulting in the spanning arborescence op3 -> {op4, op5}. -struct RootOrderingCost { +struct RootOrderingEntry { /// The depth of the connector `Value` w.r.t. the target root. /// - /// This is a pair where the first entry is the actual cost, and the second - /// entry is a priority for breaking ties (with 0 being the highest). - /// Typically, the priority is a unique edge ID. + /// This is a pair where the first value is the additive cost (the depth of + /// the connector), and the second value is a priority for breaking ties + /// (with 0 being the highest). Typically, the priority is a unique edge ID. std::pair cost; /// The connector value in the intersection of the two subtrees rooted at @@ -75,7 +75,7 @@ struct RootOrderingCost { /// is indexed by the target node, and the inner map is indexed by the source /// node. Each edge is associated with a cost and the underlying connector /// value. -using RootOrderingGraph = DenseMap>; +using RootOrderingGraph = DenseMap>; /// The optimal branching algorithm solver. This solver accepts a graph and the /// root in its constructor, and is invoked via the solve() member function. From db68e6ab93a4506f277ff9a004f37dc9a049256e Mon Sep 17 00:00:00 2001 From: Mogball Date: Wed, 22 Dec 2021 19:16:09 +0000 Subject: [PATCH 069/293] [mlir] Fix missing namespace (NFC) --- mlir/lib/Transforms/Canonicalizer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Transforms/Canonicalizer.cpp b/mlir/lib/Transforms/Canonicalizer.cpp index 92f21f6c226b..cb532746c448 100644 --- a/mlir/lib/Transforms/Canonicalizer.cpp +++ b/mlir/lib/Transforms/Canonicalizer.cpp @@ -67,9 +67,9 @@ std::unique_ptr mlir::createCanonicalizerPass() { /// Creates an instance of the Canonicalizer pass with the specified config. std::unique_ptr -createCanonicalizerPass(const GreedyRewriteConfig &config, - ArrayRef disabledPatterns = llvm::None, - ArrayRef enabledPatterns = llvm::None) { +mlir::createCanonicalizerPass(const GreedyRewriteConfig &config, + ArrayRef disabledPatterns, + ArrayRef enabledPatterns) { return std::make_unique(config, disabledPatterns, enabledPatterns); } From deaedab14a2f91ff0ed26197c22be6b8dd40562f Mon Sep 17 00:00:00 2001 From: Daniil Fukalov Date: Wed, 22 Dec 2021 22:32:09 +0300 Subject: [PATCH 070/293] [NFC][AMDGPU][CostModel] Add tests for AMDGPU cost model. --- llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll | 102 ++++ llvm/test/Analysis/CostModel/AMDGPU/fneg.ll | 36 ++ .../CostModel/AMDGPU/insertelement.ll | 559 ++++++++++++++++++ llvm/test/Analysis/CostModel/AMDGPU/mul.ll | 400 +++++++++++++ 4 files changed, 1097 insertions(+) diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll index 310598da87d7..fe5b57be5d3b 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll @@ -575,5 +575,107 @@ define amdgpu_kernel void @rcp_ftzdaz() #1 { ret void } +define i32 @frem(i32 %arg) { +; CIFASTF64-LABEL: 'frem' +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = frem float undef, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = frem <4 x float> undef, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = frem <8 x float> undef, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V16F32 = frem <16 x float> undef, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %F64 = frem double undef, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V2F64 = frem <2 x double> undef, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V4F64 = frem <4 x double> undef, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %V8F64 = frem <8 x double> undef, undef +; CIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; CISLOWF64-LABEL: 'frem' +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = frem float undef, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = frem <4 x float> undef, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = frem <8 x float> undef, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V16F32 = frem <16 x float> undef, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %F64 = frem double undef, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V2F64 = frem <2 x double> undef, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V4F64 = frem <4 x double> undef, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %V8F64 = frem <8 x double> undef, undef +; CISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SIFASTF64-LABEL: 'frem' +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = frem float undef, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = frem <4 x float> undef, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = frem <8 x float> undef, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V16F32 = frem <16 x float> undef, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %F64 = frem double undef, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V2F64 = frem <2 x double> undef, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V4F64 = frem <4 x double> undef, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 648 for instruction: %V8F64 = frem <8 x double> undef, undef +; SIFASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SISLOWF64-LABEL: 'frem' +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = frem float undef, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = frem <4 x float> undef, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = frem <8 x float> undef, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V16F32 = frem <16 x float> undef, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %F64 = frem double undef, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V2F64 = frem <2 x double> undef, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V4F64 = frem <4 x double> undef, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V8F64 = frem <8 x double> undef, undef +; SISLOWF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FP16-LABEL: 'frem' +; FP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = frem float undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V4F32 = frem <4 x float> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V8F32 = frem <8 x float> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V16F32 = frem <16 x float> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %F64 = frem double undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V2F64 = frem <2 x double> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %V4F64 = frem <4 x double> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 912 for instruction: %V8F64 = frem <8 x double> undef, undef +; FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; CI-SIZE-LABEL: 'frem' +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %F32 = frem float undef, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4F32 = frem <4 x float> undef, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8F32 = frem <8 x float> undef, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %V16F32 = frem <16 x float> undef, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %F64 = frem double undef, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V2F64 = frem <2 x double> undef, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V4F64 = frem <4 x double> undef, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %V8F64 = frem <8 x double> undef, undef +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SI-SIZE-LABEL: 'frem' +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %F32 = frem float undef, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4F32 = frem <4 x float> undef, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8F32 = frem <8 x float> undef, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %V16F32 = frem <16 x float> undef, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %F64 = frem double undef, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V2F64 = frem <2 x double> undef, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V4F64 = frem <4 x double> undef, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 600 for instruction: %V8F64 = frem <8 x double> undef, undef +; SI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; FP16-SIZE-LABEL: 'frem' +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %F32 = frem float undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4F32 = frem <4 x float> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8F32 = frem <8 x float> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 576 for instruction: %V16F32 = frem <16 x float> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %F64 = frem double undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V2F64 = frem <2 x double> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V4F64 = frem <4 x double> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %V8F64 = frem <8 x double> undef, undef +; FP16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = frem float undef, undef + %V4F32 = frem <4 x float> undef, undef + %V8F32 = frem <8 x float> undef, undef + %V16F32 = frem <16 x float> undef, undef + + %F64 = frem double undef, undef + %V2F64 = frem <2 x double> undef, undef + %V4F64 = frem <4 x double> undef, undef + %V8F64 = frem <8 x double> undef, undef + + ret i32 undef +} + attributes #0 = { nounwind "denormal-fp-math-f32"="ieee,ieee" } attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll b/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll index bf33cdc63553..4125d60f7ecb 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll @@ -89,3 +89,39 @@ define amdgpu_kernel void @fneg_f16() { %v17f16 = fneg <17 x half> undef ret void } + +define i32 @fneg_idiom(i32 %arg) { +; CHECK-LABEL: 'fneg_idiom' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fsub <8 x float> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = fsub <16 x float> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F64 = fsub double -0.000000e+00, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = fsub <2 x double> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4F64 = fsub <4 x double> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V8F64 = fsub <8 x double> , undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SIZE-LABEL: 'fneg_idiom' +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> , undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fsub <8 x float> , undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = fsub <16 x float> , undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double -0.000000e+00, undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> , undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> , undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8F64 = fsub <8 x double> , undef +; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = fsub float -0.0, undef + %V4F32 = fsub <4 x float> , undef + %V8F32 = fsub <8 x float> , undef + %V16F32 = fsub <16 x float> , undef + + %F64 = fsub double -0.0, undef + %V2F64 = fsub <2 x double> , undef + %V4F64 = fsub <4 x double> , undef + %V8F64 = fsub <8 x double> , undef + + ret i32 undef +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll b/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll index 1bdbb5b18920..e587240d8dbc 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/insertelement.ll @@ -220,3 +220,562 @@ define amdgpu_kernel void @insertelement_i64(i32 %arg) { %v5i64_a = insertelement <5 x i64> undef, i64 42, i32 %arg ret void } + +define i32 @insert_double_poison(i32 %arg) { +; ALL-LABEL: 'insert_double_poison' +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64_a = insertelement <2 x double> poison, double undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> poison, double undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_1 = insertelement <2 x double> poison, double undef, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64_a = insertelement <4 x double> poison, double undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> poison, double undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_3 = insertelement <4 x double> poison, double undef, i32 3 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f64_a = insertelement <8 x double> poison, double undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> poison, double undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_3 = insertelement <8 x double> poison, double undef, i32 3 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = insertelement <8 x double> poison, double undef, i32 4 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_7 = insertelement <8 x double> poison, double undef, i32 7 +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'insert_double_poison' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64_a = insertelement <2 x double> poison, double undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> poison, double undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_1 = insertelement <2 x double> poison, double undef, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64_a = insertelement <4 x double> poison, double undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> poison, double undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_3 = insertelement <4 x double> poison, double undef, i32 3 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f64_a = insertelement <8 x double> poison, double undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> poison, double undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_3 = insertelement <8 x double> poison, double undef, i32 3 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = insertelement <8 x double> poison, double undef, i32 4 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_7 = insertelement <8 x double> poison, double undef, i32 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %v2f64_a = insertelement <2 x double> poison, double undef, i32 %arg + %v2f64_0 = insertelement <2 x double> poison, double undef, i32 0 + %v2f64_1 = insertelement <2 x double> poison, double undef, i32 1 + + %v4f64_a = insertelement <4 x double> poison, double undef, i32 %arg + %v4f64_0 = insertelement <4 x double> poison, double undef, i32 0 + %v4f64_3 = insertelement <4 x double> poison, double undef, i32 3 + + %v8f64_a = insertelement <8 x double> poison, double undef, i32 %arg + %v8f64_0 = insertelement <8 x double> poison, double undef, i32 0 + %v8f64_3 = insertelement <8 x double> poison, double undef, i32 3 + %v8f64_4 = insertelement <8 x double> poison, double undef, i32 4 + %v8f64_7 = insertelement <8 x double> poison, double undef, i32 7 + + ret i32 undef +} + +define i32 @insert_float_poison(i32 %arg) { +; ALL-LABEL: 'insert_float_poison' +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> poison, float undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_1 = insertelement <2 x float> poison, float undef, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> poison, float undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_3 = insertelement <4 x float> poison, float undef, i32 3 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> poison, float undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_3 = insertelement <8 x float> poison, float undef, i32 3 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> poison, float undef, i32 4 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_7 = insertelement <8 x float> poison, float undef, i32 7 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> poison, float undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_3 = insertelement <16 x float> poison, float undef, i32 3 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> poison, float undef, i32 8 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_15 = insertelement <16 x float> poison, float undef, i32 15 +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'insert_float_poison' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> poison, float undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_1 = insertelement <2 x float> poison, float undef, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> poison, float undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_3 = insertelement <4 x float> poison, float undef, i32 3 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> poison, float undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_3 = insertelement <8 x float> poison, float undef, i32 3 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> poison, float undef, i32 4 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_7 = insertelement <8 x float> poison, float undef, i32 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> poison, float undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_3 = insertelement <16 x float> poison, float undef, i32 3 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> poison, float undef, i32 8 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_15 = insertelement <16 x float> poison, float undef, i32 15 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %v2f32_a = insertelement <2 x float> poison, float undef, i32 %arg + %v2f32_0 = insertelement <2 x float> poison, float undef, i32 0 + %v2f32_1 = insertelement <2 x float> poison, float undef, i32 1 + + %v4f32_a = insertelement <4 x float> poison, float undef, i32 %arg + %v4f32_0 = insertelement <4 x float> poison, float undef, i32 0 + %v4f32_3 = insertelement <4 x float> poison, float undef, i32 3 + + %v8f32_a = insertelement <8 x float> poison, float undef, i32 %arg + %v8f32_0 = insertelement <8 x float> poison, float undef, i32 0 + %v8f32_3 = insertelement <8 x float> poison, float undef, i32 3 + %v8f32_4 = insertelement <8 x float> poison, float undef, i32 4 + %v8f32_7 = insertelement <8 x float> poison, float undef, i32 7 + + %v16f32_a = insertelement <16 x float> poison, float undef, i32 %arg + %v16f32_0 = insertelement <16 x float> poison, float undef, i32 0 + %v16f32_3 = insertelement <16 x float> poison, float undef, i32 3 + %v16f32_8 = insertelement <16 x float> poison, float undef, i32 8 + %v16f32_15 = insertelement <16 x float> poison, float undef, i32 15 + + ret i32 undef +} + +define i32 @insert_i64_poison(i32 %arg) { +; ALL-LABEL: 'insert_i64_poison' +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_7 = insertelement <8 x i64> poison, i64 undef, i32 7 +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'insert_i64_poison' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i64_7 = insertelement <8 x i64> poison, i64 undef, i32 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %v2i64_a = insertelement <2 x i64> poison, i64 undef, i32 %arg + %v2i64_0 = insertelement <2 x i64> poison, i64 undef, i32 0 + %v2i64_1 = insertelement <2 x i64> poison, i64 undef, i32 1 + + %v4i64_a = insertelement <4 x i64> poison, i64 undef, i32 %arg + %v4i64_0 = insertelement <4 x i64> poison, i64 undef, i32 0 + %v4i64_3 = insertelement <4 x i64> poison, i64 undef, i32 3 + + %v8i64_a = insertelement <8 x i64> poison, i64 undef, i32 %arg + %v8i64_0 = insertelement <8 x i64> poison, i64 undef, i32 0 + %v8i64_3 = insertelement <8 x i64> poison, i64 undef, i32 3 + %v8i64_4 = insertelement <8 x i64> poison, i64 undef, i32 4 + %v8i64_7 = insertelement <8 x i64> poison, i64 undef, i32 7 + + ret i32 undef +} + +define i32 @insert_i32_poison(i32 %arg) { +; ALL-LABEL: 'insert_i32_poison' +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15 +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'insert_i32_poison' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %v2i32_a = insertelement <2 x i32> poison, i32 undef, i32 %arg + %v2i32_0 = insertelement <2 x i32> poison, i32 undef, i32 0 + %v2i32_1 = insertelement <2 x i32> poison, i32 undef, i32 1 + + %v4i32_a = insertelement <4 x i32> poison, i32 undef, i32 %arg + %v4i32_0 = insertelement <4 x i32> poison, i32 undef, i32 0 + %v4i32_3 = insertelement <4 x i32> poison, i32 undef, i32 3 + + %v8i32_a = insertelement <8 x i32> poison, i32 undef, i32 %arg + %v8i32_0 = insertelement <8 x i32> poison, i32 undef, i32 0 + %v8i32_3 = insertelement <8 x i32> poison, i32 undef, i32 3 + %v8i32_4 = insertelement <8 x i32> poison, i32 undef, i32 4 + %v8i32_7 = insertelement <8 x i32> poison, i32 undef, i32 7 + + %v16i32_a = insertelement <16 x i32> poison, i32 undef, i32 %arg + %v16i32_0 = insertelement <16 x i32> poison, i32 undef, i32 0 + %v16i32_3 = insertelement <16 x i32> poison, i32 undef, i32 3 + %v16i32_8 = insertelement <16 x i32> poison, i32 undef, i32 8 + %v16i32_15 = insertelement <16 x i32> poison, i32 undef, i32 15 + + ret i32 undef +} + +define i32 @insert_i16_poison(i32 %arg) { +; CI-LABEL: 'insert_i16_poison' +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> poison, i16 undef, i32 %arg +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> poison, i16 undef, i32 0 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> poison, i16 undef, i32 1 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> poison, i16 undef, i32 %arg +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = insertelement <4 x i16> poison, i16 undef, i32 0 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = insertelement <4 x i16> poison, i16 undef, i32 3 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> poison, i16 undef, i32 %arg +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> poison, i16 undef, i32 0 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> poison, i16 undef, i32 7 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> poison, i16 undef, i32 %arg +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> poison, i16 undef, i32 0 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> poison, i16 undef, i32 7 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = insertelement <16 x i16> poison, i16 undef, i32 8 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = insertelement <16 x i16> poison, i16 undef, i32 15 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> poison, i16 undef, i32 %arg +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> poison, i16 undef, i32 0 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> poison, i16 undef, i32 7 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = insertelement <32 x i16> poison, i16 undef, i32 8 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = insertelement <32 x i16> poison, i16 undef, i32 15 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = insertelement <32 x i16> poison, i16 undef, i32 16 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = insertelement <32 x i16> poison, i16 undef, i32 24 +; CI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = insertelement <32 x i16> poison, i16 undef, i32 31 +; CI-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; GFX89-LABEL: 'insert_i16_poison' +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> poison, i16 undef, i32 %arg +; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = insertelement <2 x i16> poison, i16 undef, i32 0 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> poison, i16 undef, i32 1 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> poison, i16 undef, i32 %arg +; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16_0 = insertelement <4 x i16> poison, i16 undef, i32 0 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = insertelement <4 x i16> poison, i16 undef, i32 3 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> poison, i16 undef, i32 %arg +; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_0 = insertelement <8 x i16> poison, i16 undef, i32 0 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> poison, i16 undef, i32 7 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> poison, i16 undef, i32 %arg +; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i16_0 = insertelement <16 x i16> poison, i16 undef, i32 0 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> poison, i16 undef, i32 7 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = insertelement <16 x i16> poison, i16 undef, i32 8 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = insertelement <16 x i16> poison, i16 undef, i32 15 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> poison, i16 undef, i32 %arg +; GFX89-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i16_0 = insertelement <32 x i16> poison, i16 undef, i32 0 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> poison, i16 undef, i32 7 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = insertelement <32 x i16> poison, i16 undef, i32 8 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = insertelement <32 x i16> poison, i16 undef, i32 15 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = insertelement <32 x i16> poison, i16 undef, i32 16 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = insertelement <32 x i16> poison, i16 undef, i32 24 +; GFX89-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = insertelement <32 x i16> poison, i16 undef, i32 31 +; GFX89-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; CI-SIZE-LABEL: 'insert_i16_poison' +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> poison, i16 undef, i32 %arg +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_0 = insertelement <2 x i16> poison, i16 undef, i32 0 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> poison, i16 undef, i32 1 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> poison, i16 undef, i32 %arg +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_0 = insertelement <4 x i16> poison, i16 undef, i32 0 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = insertelement <4 x i16> poison, i16 undef, i32 3 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> poison, i16 undef, i32 %arg +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_0 = insertelement <8 x i16> poison, i16 undef, i32 0 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> poison, i16 undef, i32 7 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> poison, i16 undef, i32 %arg +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_0 = insertelement <16 x i16> poison, i16 undef, i32 0 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> poison, i16 undef, i32 7 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = insertelement <16 x i16> poison, i16 undef, i32 8 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = insertelement <16 x i16> poison, i16 undef, i32 15 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> poison, i16 undef, i32 %arg +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_0 = insertelement <32 x i16> poison, i16 undef, i32 0 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> poison, i16 undef, i32 7 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = insertelement <32 x i16> poison, i16 undef, i32 8 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = insertelement <32 x i16> poison, i16 undef, i32 15 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = insertelement <32 x i16> poison, i16 undef, i32 16 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = insertelement <32 x i16> poison, i16 undef, i32 24 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = insertelement <32 x i16> poison, i16 undef, i32 31 +; CI-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; GFX89-SIZE-LABEL: 'insert_i16_poison' +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_a = insertelement <2 x i16> poison, i16 undef, i32 %arg +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2i16_0 = insertelement <2 x i16> poison, i16 undef, i32 0 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16_1 = insertelement <2 x i16> poison, i16 undef, i32 1 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_a = insertelement <4 x i16> poison, i16 undef, i32 %arg +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4i16_0 = insertelement <4 x i16> poison, i16 undef, i32 0 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16_3 = insertelement <4 x i16> poison, i16 undef, i32 3 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_a = insertelement <8 x i16> poison, i16 undef, i32 %arg +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8i16_0 = insertelement <8 x i16> poison, i16 undef, i32 0 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_7 = insertelement <8 x i16> poison, i16 undef, i32 7 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_a = insertelement <16 x i16> poison, i16 undef, i32 %arg +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16i16_0 = insertelement <16 x i16> poison, i16 undef, i32 0 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_7 = insertelement <16 x i16> poison, i16 undef, i32 7 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_8 = insertelement <16 x i16> poison, i16 undef, i32 8 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_15 = insertelement <16 x i16> poison, i16 undef, i32 15 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_a = insertelement <32 x i16> poison, i16 undef, i32 %arg +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v32i16_0 = insertelement <32 x i16> poison, i16 undef, i32 0 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_7 = insertelement <32 x i16> poison, i16 undef, i32 7 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_8 = insertelement <32 x i16> poison, i16 undef, i32 8 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_15 = insertelement <32 x i16> poison, i16 undef, i32 15 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_16 = insertelement <32 x i16> poison, i16 undef, i32 16 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_24 = insertelement <32 x i16> poison, i16 undef, i32 24 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_31 = insertelement <32 x i16> poison, i16 undef, i32 31 +; GFX89-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %v2i16_a = insertelement <2 x i16> poison, i16 undef, i32 %arg + %v2i16_0 = insertelement <2 x i16> poison, i16 undef, i32 0 + %v2i16_1 = insertelement <2 x i16> poison, i16 undef, i32 1 + + %v4i16_a = insertelement <4 x i16> poison, i16 undef, i32 %arg + %v4i16_0 = insertelement <4 x i16> poison, i16 undef, i32 0 + %v4i16_3 = insertelement <4 x i16> poison, i16 undef, i32 3 + + %v8i16_a = insertelement <8 x i16> poison, i16 undef, i32 %arg + %v8i16_0 = insertelement <8 x i16> poison, i16 undef, i32 0 + %v8i16_7 = insertelement <8 x i16> poison, i16 undef, i32 7 + + %v16i16_a = insertelement <16 x i16> poison, i16 undef, i32 %arg + %v16i16_0 = insertelement <16 x i16> poison, i16 undef, i32 0 + %v16i16_7 = insertelement <16 x i16> poison, i16 undef, i32 7 + %v16i16_8 = insertelement <16 x i16> poison, i16 undef, i32 8 + %v16i16_15 = insertelement <16 x i16> poison, i16 undef, i32 15 + + %v32i16_a = insertelement <32 x i16> poison, i16 undef, i32 %arg + %v32i16_0 = insertelement <32 x i16> poison, i16 undef, i32 0 + %v32i16_7 = insertelement <32 x i16> poison, i16 undef, i32 7 + %v32i16_8 = insertelement <32 x i16> poison, i16 undef, i32 8 + %v32i16_15 = insertelement <32 x i16> poison, i16 undef, i32 15 + %v32i16_16 = insertelement <32 x i16> poison, i16 undef, i32 16 + %v32i16_24 = insertelement <32 x i16> poison, i16 undef, i32 24 + %v32i16_31 = insertelement <32 x i16> poison, i16 undef, i32 31 + + ret i32 undef +} + +define i32 @insert_i8_poison(i32 %arg) { +; ALL-LABEL: 'insert_i8_poison' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'insert_i8_poison' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %v2i8_a = insertelement <2 x i8> poison, i8 undef, i32 %arg + %v2i8_0 = insertelement <2 x i8> poison, i8 undef, i32 0 + %v2i8_3 = insertelement <2 x i8> poison, i8 undef, i32 1 + + %v4i8_a = insertelement <4 x i8> poison, i8 undef, i32 %arg + %v4i8_0 = insertelement <4 x i8> poison, i8 undef, i32 0 + %v4i8_3 = insertelement <4 x i8> poison, i8 undef, i32 3 + + %v8i8_a = insertelement <8 x i8> poison, i8 undef, i32 %arg + %v8i8_0 = insertelement <8 x i8> poison, i8 undef, i32 0 + %v8i8_7 = insertelement <8 x i8> poison, i8 undef, i32 7 + + %v16i8_a = insertelement <16 x i8> poison, i8 undef, i32 %arg + %v16i8_0 = insertelement <16 x i8> poison, i8 undef, i32 0 + %v16i8_8 = insertelement <16 x i8> poison, i8 undef, i32 8 + %v16i8_15 = insertelement <16 x i8> poison, i8 undef, i32 15 + + %v32i8_a = insertelement <32 x i8> poison, i8 undef, i32 %arg + %v32i8_0 = insertelement <32 x i8> poison, i8 undef, i32 0 + %v32i8_7 = insertelement <32 x i8> poison, i8 undef, i32 7 + %v32i8_8 = insertelement <32 x i8> poison, i8 undef, i32 8 + %v32i8_15 = insertelement <32 x i8> poison, i8 undef, i32 15 + %v32i8_24 = insertelement <32 x i8> poison, i8 undef, i32 24 + %v32i8_31 = insertelement <32 x i8> poison, i8 undef, i32 31 + + %v64i8_a = insertelement <64 x i8> poison, i8 undef, i32 %arg + %v64i8_0 = insertelement <64 x i8> poison, i8 undef, i32 0 + %v64i8_7 = insertelement <64 x i8> poison, i8 undef, i32 7 + %v64i8_8 = insertelement <64 x i8> poison, i8 undef, i32 8 + %v64i8_15 = insertelement <64 x i8> poison, i8 undef, i32 15 + %v64i8_24 = insertelement <64 x i8> poison, i8 undef, i32 24 + %v64i8_31 = insertelement <64 x i8> poison, i8 undef, i32 31 + %v64i8_32 = insertelement <64 x i8> poison, i8 undef, i32 32 + %v64i8_48 = insertelement <64 x i8> poison, i8 undef, i32 48 + %v64i8_63 = insertelement <64 x i8> poison, i8 undef, i32 63 + + ret i32 undef +} + +define i32 @insert_i1_poison(i32 %arg) { +; ALL-LABEL: 'insert_i1_poison' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'insert_i1_poison' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %v2i1_a = insertelement <2 x i1> poison, i1 undef, i32 %arg + %v2i1_0 = insertelement <2 x i1> poison, i1 undef, i32 0 + %v2i1_1 = insertelement <2 x i1> poison, i1 undef, i32 1 + + %v4i1_a = insertelement <4 x i1> poison, i1 undef, i32 %arg + %v4i1_0 = insertelement <4 x i1> poison, i1 undef, i32 0 + %v4i1_2 = insertelement <4 x i1> poison, i1 undef, i32 2 + + %v8i1_a = insertelement <8 x i1> poison, i1 undef, i32 %arg + %v8i1_0 = insertelement <8 x i1> poison, i1 undef, i32 0 + %v8i1_4 = insertelement <8 x i1> poison, i1 undef, i32 4 + + %v16i1_a = insertelement <16 x i1> poison, i1 undef, i32 %arg + %v16i1_0 = insertelement <16 x i1> poison, i1 undef, i32 0 + %v16i1_8 = insertelement <16 x i1> poison, i1 undef, i32 8 + %v16i1_15 = insertelement <16 x i1> poison, i1 undef, i32 15 + + %v32i1_a = insertelement <32 x i1> poison, i1 undef, i32 %arg + %v32i1_0 = insertelement <32 x i1> poison, i1 undef, i32 0 + %v32i1_7 = insertelement <32 x i1> poison, i1 undef, i32 7 + %v32i1_8 = insertelement <32 x i1> poison, i1 undef, i32 8 + %v32i1_15 = insertelement <32 x i1> poison, i1 undef, i32 15 + %v32i1_24 = insertelement <32 x i1> poison, i1 undef, i32 24 + %v32i1_31 = insertelement <32 x i1> poison, i1 undef, i32 31 + + %v64i1_a = insertelement <64 x i1> poison, i1 undef, i32 %arg + %v64i1_0 = insertelement <64 x i1> poison, i1 undef, i32 0 + %v64i1_7 = insertelement <64 x i1> poison, i1 undef, i32 7 + %v64i1_8 = insertelement <64 x i1> poison, i1 undef, i32 8 + %v64i1_15 = insertelement <64 x i1> poison, i1 undef, i32 15 + %v64i1_24 = insertelement <64 x i1> poison, i1 undef, i32 24 + %v64i1_31 = insertelement <64 x i1> poison, i1 undef, i32 31 + %v64i1_32 = insertelement <64 x i1> poison, i1 undef, i32 32 + %v64i1_48 = insertelement <64 x i1> poison, i1 undef, i32 48 + %v64i1_63 = insertelement <64 x i1> poison, i1 undef, i32 63 + + ret i32 undef +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll index 824b7e1ee93d..8fddcbefaf76 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll @@ -112,4 +112,404 @@ define amdgpu_kernel void @mul_i16() #0 { ret void } +define i32 @mul_constpow2() { +; SLOW16-LABEL: 'mul_constpow2' +; SLOW16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, 16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, 16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, 16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, 16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST16-LABEL: 'mul_constpow2' +; FAST16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, 16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, 16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, 16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, 16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW16-SIZE-LABEL: 'mul_constpow2' +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, 16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, 16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, 16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, 16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; FAST16-SIZE-LABEL: 'mul_constpow2' +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, 16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, 16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, 16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, 16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = mul i64 undef, 16 + %V2i64 = mul <2 x i64> undef, + %V4i64 = mul <4 x i64> undef, + %V8i64 = mul <8 x i64> undef, + + %I32 = mul i32 undef, 16 + %V4i32 = mul <4 x i32> undef, + %V8i32 = mul <8 x i32> undef, + %V16i32 = mul <16 x i32> undef, + + %I16 = mul i16 undef, 16 + %V8i16 = mul <8 x i16> undef, + %V16i16 = mul <16 x i16> undef, + %V32i16 = mul <32 x i16> undef, + + %I8 = mul i8 undef, 16 + %V16i8 = mul <16 x i8> undef, + %V32i8 = mul <32 x i8> undef, + %V64i8 = mul <64 x i8> undef, + + ret i32 undef +} + +define i32 @mul_uniformconstpow2() { +; SLOW16-LABEL: 'mul_uniformconstpow2' +; SLOW16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, 16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, 16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, 16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, 16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST16-LABEL: 'mul_uniformconstpow2' +; FAST16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, 16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, 16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, 16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, 16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW16-SIZE-LABEL: 'mul_uniformconstpow2' +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, 16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, 16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, 16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, 16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; FAST16-SIZE-LABEL: 'mul_uniformconstpow2' +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, 16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, 16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, 16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, 16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = mul i64 undef, 16 + %V2i64 = mul <2 x i64> undef, + %V4i64 = mul <4 x i64> undef, + %V8i64 = mul <8 x i64> undef, + + %I32 = mul i32 undef, 16 + %V4i32 = mul <4 x i32> undef, + %V8i32 = mul <8 x i32> undef, + %V16i32 = mul <16 x i32> undef, + + %I16 = mul i16 undef, 16 + %V8i16 = mul <8 x i16> undef, + %V16i16 = mul <16 x i16> undef, + %V32i16 = mul <32 x i16> undef, + + %I8 = mul i8 undef, 16 + %V16i8 = mul <16 x i8> undef, + %V32i8 = mul <32 x i8> undef, + %V64i8 = mul <64 x i8> undef, + + ret i32 undef +} + +define i32 @mul_constnegpow2() { +; SLOW16-LABEL: 'mul_constnegpow2' +; SLOW16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, -16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, -16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, -16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, -16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST16-LABEL: 'mul_constnegpow2' +; FAST16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, -16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, -16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, -16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, -16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW16-SIZE-LABEL: 'mul_constnegpow2' +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, -16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; FAST16-SIZE-LABEL: 'mul_constnegpow2' +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, -16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = mul i64 undef, -16 + %V2i64 = mul <2 x i64> undef, + %V4i64 = mul <4 x i64> undef, + %V8i64 = mul <8 x i64> undef, + + %I32 = mul i32 undef, -16 + %V4i32 = mul <4 x i32> undef, + %V8i32 = mul <8 x i32> undef, + %V16i32 = mul <16 x i32> undef, + + %I16 = mul i16 undef, -16 + %V8i16 = mul <8 x i16> undef, + %V16i16 = mul <16 x i16> undef, + %V32i16 = mul <32 x i16> undef, + + %I8 = mul i8 undef, -16 + %V16i8 = mul <16 x i8> undef, + %V32i8 = mul <32 x i8> undef, + %V64i8 = mul <64 x i8> undef, + + ret i32 undef +} + +define i32 @mul_uniformconstnegpow2() { +; SLOW16-LABEL: 'mul_uniformconstnegpow2' +; SLOW16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, -16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, -16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, -16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, -16 +; SLOW16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST16-LABEL: 'mul_uniformconstnegpow2' +; FAST16-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, -16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, -16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, -16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, -16 +; FAST16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW16-SIZE-LABEL: 'mul_uniformconstnegpow2' +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, -16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; SLOW16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; FAST16-SIZE-LABEL: 'mul_uniformconstnegpow2' +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, -16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = mul <32 x i16> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16 +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, +; FAST16-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = mul i64 undef, -16 + %V2i64 = mul <2 x i64> undef, + %V4i64 = mul <4 x i64> undef, + %V8i64 = mul <8 x i64> undef, + + %I32 = mul i32 undef, -16 + %V4i32 = mul <4 x i32> undef, + %V8i32 = mul <8 x i32> undef, + %V16i32 = mul <16 x i32> undef, + + %I16 = mul i16 undef, -16 + %V8i16 = mul <8 x i16> undef, + %V16i16 = mul <16 x i16> undef, + %V32i16 = mul <32 x i16> undef, + + %I8 = mul i8 undef, -16 + %V16i8 = mul <16 x i8> undef, + %V32i8 = mul <32 x i8> undef, + %V64i8 = mul <64 x i8> undef, + + ret i32 undef +} + attributes #0 = { nounwind } From a2120f6b449103f517e541abb77839a1e3e12272 Mon Sep 17 00:00:00 2001 From: Daniil Fukalov Date: Wed, 22 Dec 2021 22:33:57 +0300 Subject: [PATCH 071/293] [NFC][AMDGPU][CostModel] Add tests for AMDGPU cost model, part 2. --- .../Analysis/CostModel/AMDGPU/aggregates.ll | 47 + .../Analysis/CostModel/AMDGPU/arith-fp.ll | 103 ++ .../CostModel/AMDGPU/arith-sminmax.ll | 273 ++++ .../CostModel/AMDGPU/arith-uminmax.ll | 273 ++++ llvm/test/Analysis/CostModel/AMDGPU/cast.ll | 449 +++++++ llvm/test/Analysis/CostModel/AMDGPU/div.ll | 1145 +++++++++++++++++ llvm/test/Analysis/CostModel/AMDGPU/fptosi.ll | 259 ++++ llvm/test/Analysis/CostModel/AMDGPU/fptoui.ll | 259 ++++ llvm/test/Analysis/CostModel/AMDGPU/fround.ll | 307 +++++ llvm/test/Analysis/CostModel/AMDGPU/gep.ll | 73 ++ llvm/test/Analysis/CostModel/AMDGPU/rem.ll | 1145 +++++++++++++++++ 11 files changed, 4333 insertions(+) create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/aggregates.ll create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/arith-fp.ll create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/arith-sminmax.ll create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/arith-uminmax.ll create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/cast.ll create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/div.ll create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/fptosi.ll create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/fptoui.ll create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/fround.ll create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/gep.ll create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/rem.ll diff --git a/llvm/test/Analysis/CostModel/AMDGPU/aggregates.ll b/llvm/test/Analysis/CostModel/AMDGPU/aggregates.ll new file mode 100644 index 000000000000..4f9a30bc8841 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/aggregates.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE %s +; END. + +%T1 = type { i32, float, <4 x i1> } + +define void @extract_1() { +; ALL-LABEL: 'extract_1' +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = extractvalue { i32, i32 } undef, 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = extractvalue { i32, i32 } undef, 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = extractvalue { i32, i1 } undef, 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r3 = extractvalue { i32, i1 } undef, 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r4 = extractvalue { i32, float } undef, 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r5 = extractvalue { i32, [42 x i42] } undef, 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r6 = extractvalue { i32, <42 x i42> } undef, 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = extractvalue { i32, %T1 } undef, 1 +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; ALL-SIZE-LABEL: 'extract_1' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r0 = extractvalue { i32, i32 } undef, 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r1 = extractvalue { i32, i32 } undef, 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r2 = extractvalue { i32, i1 } undef, 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r3 = extractvalue { i32, i1 } undef, 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r4 = extractvalue { i32, float } undef, 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r5 = extractvalue { i32, [42 x i42] } undef, 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r6 = extractvalue { i32, <42 x i42> } undef, 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %r7 = extractvalue { i32, %T1 } undef, 1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %r0 = extractvalue {i32, i32} undef, 0 + %r1 = extractvalue {i32, i32} undef, 1 + %r2 = extractvalue {i32, i1} undef, 0 + %r3 = extractvalue {i32, i1} undef, 1 + %r4 = extractvalue {i32, float} undef, 1 + %r5 = extractvalue {i32, [42 x i42]} undef, 1 + %r6 = extractvalue {i32, <42 x i42>} undef, 1 + %r7 = extractvalue {i32, %T1} undef, 1 + ret void +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/arith-fp.ll b/llvm/test/Analysis/CostModel/AMDGPU/arith-fp.ll new file mode 100644 index 000000000000..01913a7204a6 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/arith-fp.ll @@ -0,0 +1,103 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE %s +; END. + +define i32 @fcopysign(i32 %arg) { +; ALL-LABEL: 'fcopysign' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'fcopysign' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.copysign.f32(float undef, float undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.copysign.f64(double undef, double undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.copysign.f32(float undef, float undef) + %V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef) + %V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef) + %V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef) + + %F64 = call double @llvm.copysign.f64(double undef, double undef) + %V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef) + %V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef) + %V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef) + + ret i32 undef +} + +define i32 @fsqrt(i32 %arg) { +; ALL-LABEL: 'fsqrt' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'fsqrt' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.sqrt.f32(float undef) + %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) + %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) + %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) + + %F64 = call double @llvm.sqrt.f64(double undef) + %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) + %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) + %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) + + ret i32 undef +} + +declare float @llvm.copysign.f32(float, float) +declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) +declare <8 x float> @llvm.copysign.v8f32(<8 x float>, <8 x float>) +declare <16 x float> @llvm.copysign.v16f32(<16 x float>, <16 x float>) + +declare double @llvm.copysign.f64(double, double) +declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) +declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) +declare <8 x double> @llvm.copysign.v8f64(<8 x double>, <8 x double>) + +declare float @llvm.sqrt.f32(float) +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) +declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) +declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) + +declare double @llvm.sqrt.f64(double) +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) +declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) +declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) diff --git a/llvm/test/Analysis/CostModel/AMDGPU/arith-sminmax.ll b/llvm/test/Analysis/CostModel/AMDGPU/arith-sminmax.ll new file mode 100644 index 000000000000..d75c38bfb342 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/arith-sminmax.ll @@ -0,0 +1,273 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE %s +; END. + +declare i64 @llvm.smax.i64(i64, i64) +declare <2 x i64> @llvm.smax.v2i64(<2 x i64>, <2 x i64>) +declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>) +declare <8 x i64> @llvm.smax.v8i64(<8 x i64>, <8 x i64>) + +declare i32 @llvm.smax.i32(i32, i32) +declare <2 x i32> @llvm.smax.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) +declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) +declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>) + +declare i16 @llvm.smax.i16(i16, i16) +declare <2 x i16> @llvm.smax.v2i16(<2 x i16>, <2 x i16>) +declare <4 x i16> @llvm.smax.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>) +declare <16 x i16> @llvm.smax.v16i16(<16 x i16>, <16 x i16>) +declare <32 x i16> @llvm.smax.v32i16(<32 x i16>, <32 x i16>) + +declare i8 @llvm.smax.i8(i8, i8) +declare <2 x i8> @llvm.smax.v2i8(<2 x i8>, <2 x i8>) +declare <4 x i8> @llvm.smax.v4i8(<4 x i8>, <4 x i8>) +declare <8 x i8> @llvm.smax.v8i8(<8 x i8>, <8 x i8>) +declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>) +declare <32 x i8> @llvm.smax.v32i8(<32 x i8>, <32 x i8>) +declare <64 x i8> @llvm.smax.v64i8(<64 x i8>, <64 x i8>) + +define i32 @smax(i32 %arg) { +; FAST-LABEL: 'smax' +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'smax' +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'smax' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = call i64 @llvm.smax.i64(i64 undef, i64 undef) + %V2I64 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) + %V4I64 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) + %V8I64 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef) + + %I32 = call i32 @llvm.smax.i32(i32 undef, i32 undef) + %V2I32 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef) + %V4I32 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef) + %V8I32 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef) + %V16I32 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef) + + %I16 = call i16 @llvm.smax.i16(i16 undef, i16 undef) + %V2I16 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef) + %V4I16 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef) + %V8I16 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef) + %V16I16 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef) + %V32I16 = call <32 x i16> @llvm.smax.v32i16(<32 x i16> undef, <32 x i16> undef) + + %I8 = call i8 @llvm.smax.i8(i8 undef, i8 undef) + %V2I8 = call <2 x i8> @llvm.smax.v2i8(<2 x i8> undef, <2 x i8> undef) + %V4I8 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> undef, <4 x i8> undef) + %V8I8 = call <8 x i8> @llvm.smax.v8i8(<8 x i8> undef, <8 x i8> undef) + %V16I8 = call <16 x i8> @llvm.smax.v16i8(<16 x i8> undef, <16 x i8> undef) + %V32I8 = call <32 x i8> @llvm.smax.v32i8(<32 x i8> undef, <32 x i8> undef) + %V64I8 = call <64 x i8> @llvm.smax.v64i8(<64 x i8> undef, <64 x i8> undef) + + ret i32 undef +} + +declare i64 @llvm.smin.i64(i64, i64) +declare <2 x i64> @llvm.smin.v2i64(<2 x i64>, <2 x i64>) +declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>) +declare <8 x i64> @llvm.smin.v8i64(<8 x i64>, <8 x i64>) + +declare i32 @llvm.smin.i32(i32, i32) +declare <2 x i32> @llvm.smin.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) +declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>) +declare <16 x i32> @llvm.smin.v16i32(<16 x i32>, <16 x i32>) + +declare i16 @llvm.smin.i16(i16, i16) +declare <2 x i16> @llvm.smin.v2i16(<2 x i16>, <2 x i16>) +declare <4 x i16> @llvm.smin.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>) +declare <16 x i16> @llvm.smin.v16i16(<16 x i16>, <16 x i16>) +declare <32 x i16> @llvm.smin.v32i16(<32 x i16>, <32 x i16>) + +declare i8 @llvm.smin.i8(i8, i8) +declare <2 x i8> @llvm.smin.v2i8(<2 x i8>, <2 x i8>) +declare <4 x i8> @llvm.smin.v4i8(<4 x i8>, <4 x i8>) +declare <8 x i8> @llvm.smin.v8i8(<8 x i8>, <8 x i8>) +declare <16 x i8> @llvm.smin.v16i8(<16 x i8>, <16 x i8>) +declare <32 x i8> @llvm.smin.v32i8(<32 x i8>, <32 x i8>) +declare <64 x i8> @llvm.smin.v64i8(<64 x i8>, <64 x i8>) + +define i32 @smin(i32 %arg) { +; FAST-LABEL: 'smin' +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'smin' +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'smin' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = call i64 @llvm.smin.i64(i64 undef, i64 undef) + %V2I64 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) + %V4I64 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) + %V8I64 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef) + + %I32 = call i32 @llvm.smin.i32(i32 undef, i32 undef) + %V2I32 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef) + %V4I32 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef) + %V8I32 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef) + %V16I32 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef) + + %I16 = call i16 @llvm.smin.i16(i16 undef, i16 undef) + %V2I16 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef) + %V4I16 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef) + %V8I16 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef) + %V16I16 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef) + %V32I16 = call <32 x i16> @llvm.smin.v32i16(<32 x i16> undef, <32 x i16> undef) + + %I8 = call i8 @llvm.smin.i8(i8 undef, i8 undef) + %V2I8 = call <2 x i8> @llvm.smin.v2i8(<2 x i8> undef, <2 x i8> undef) + %V4I8 = call <4 x i8> @llvm.smin.v4i8(<4 x i8> undef, <4 x i8> undef) + %V8I8 = call <8 x i8> @llvm.smin.v8i8(<8 x i8> undef, <8 x i8> undef) + %V16I8 = call <16 x i8> @llvm.smin.v16i8(<16 x i8> undef, <16 x i8> undef) + %V32I8 = call <32 x i8> @llvm.smin.v32i8(<32 x i8> undef, <32 x i8> undef) + %V64I8 = call <64 x i8> @llvm.smin.v64i8(<64 x i8> undef, <64 x i8> undef) + + ret i32 undef +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/arith-uminmax.ll b/llvm/test/Analysis/CostModel/AMDGPU/arith-uminmax.ll new file mode 100644 index 000000000000..f42a1a0990c2 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/arith-uminmax.ll @@ -0,0 +1,273 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE %s +; END. + +declare i64 @llvm.umax.i64(i64, i64) +declare <2 x i64> @llvm.umax.v2i64(<2 x i64>, <2 x i64>) +declare <4 x i64> @llvm.umax.v4i64(<4 x i64>, <4 x i64>) +declare <8 x i64> @llvm.umax.v8i64(<8 x i64>, <8 x i64>) + +declare i32 @llvm.umax.i32(i32, i32) +declare <2 x i32> @llvm.umax.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>) +declare <8 x i32> @llvm.umax.v8i32(<8 x i32>, <8 x i32>) +declare <16 x i32> @llvm.umax.v16i32(<16 x i32>, <16 x i32>) + +declare i16 @llvm.umax.i16(i16, i16) +declare <2 x i16> @llvm.umax.v2i16(<2 x i16>, <2 x i16>) +declare <4 x i16> @llvm.umax.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>) +declare <16 x i16> @llvm.umax.v16i16(<16 x i16>, <16 x i16>) +declare <32 x i16> @llvm.umax.v32i16(<32 x i16>, <32 x i16>) + +declare i8 @llvm.umax.i8(i8, i8) +declare <2 x i8> @llvm.umax.v2i8(<2 x i8>, <2 x i8>) +declare <4 x i8> @llvm.umax.v4i8(<4 x i8>, <4 x i8>) +declare <8 x i8> @llvm.umax.v8i8(<8 x i8>, <8 x i8>) +declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>) +declare <32 x i8> @llvm.umax.v32i8(<32 x i8>, <32 x i8>) +declare <64 x i8> @llvm.umax.v64i8(<64 x i8>, <64 x i8>) + +define i32 @umax(i32 %arg) { +; FAST-LABEL: 'umax' +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'umax' +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'umax' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = call i64 @llvm.umax.i64(i64 undef, i64 undef) + %V2I64 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) + %V4I64 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) + %V8I64 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef) + + %I32 = call i32 @llvm.umax.i32(i32 undef, i32 undef) + %V2I32 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef) + %V4I32 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef) + %V8I32 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef) + %V16I32 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef) + + %I16 = call i16 @llvm.umax.i16(i16 undef, i16 undef) + %V2I16 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef) + %V4I16 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef) + %V8I16 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef) + %V16I16 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef) + %V32I16 = call <32 x i16> @llvm.umax.v32i16(<32 x i16> undef, <32 x i16> undef) + + %I8 = call i8 @llvm.umax.i8(i8 undef, i8 undef) + %V2I8 = call <2 x i8> @llvm.umax.v2i8(<2 x i8> undef, <2 x i8> undef) + %V4I8 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> undef, <4 x i8> undef) + %V8I8 = call <8 x i8> @llvm.umax.v8i8(<8 x i8> undef, <8 x i8> undef) + %V16I8 = call <16 x i8> @llvm.umax.v16i8(<16 x i8> undef, <16 x i8> undef) + %V32I8 = call <32 x i8> @llvm.umax.v32i8(<32 x i8> undef, <32 x i8> undef) + %V64I8 = call <64 x i8> @llvm.umax.v64i8(<64 x i8> undef, <64 x i8> undef) + + ret i32 undef +} + +declare i64 @llvm.umin.i64(i64, i64) +declare <2 x i64> @llvm.umin.v2i64(<2 x i64>, <2 x i64>) +declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>) +declare <8 x i64> @llvm.umin.v8i64(<8 x i64>, <8 x i64>) + +declare i32 @llvm.umin.i32(i32, i32) +declare <2 x i32> @llvm.umin.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>) +declare <8 x i32> @llvm.umin.v8i32(<8 x i32>, <8 x i32>) +declare <16 x i32> @llvm.umin.v16i32(<16 x i32>, <16 x i32>) + +declare i16 @llvm.umin.i16(i16, i16) +declare <2 x i16> @llvm.umin.v2i16(<2 x i16>, <2 x i16>) +declare <4 x i16> @llvm.umin.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>) +declare <16 x i16> @llvm.umin.v16i16(<16 x i16>, <16 x i16>) +declare <32 x i16> @llvm.umin.v32i16(<32 x i16>, <32 x i16>) + +declare i8 @llvm.umin.i8(i8, i8) +declare <2 x i8> @llvm.umin.v2i8(<2 x i8>, <2 x i8>) +declare <4 x i8> @llvm.umin.v4i8(<4 x i8>, <4 x i8>) +declare <8 x i8> @llvm.umin.v8i8(<8 x i8>, <8 x i8>) +declare <16 x i8> @llvm.umin.v16i8(<16 x i8>, <16 x i8>) +declare <32 x i8> @llvm.umin.v32i8(<32 x i8>, <32 x i8>) +declare <64 x i8> @llvm.umin.v64i8(<64 x i8>, <64 x i8>) + +define i32 @umin(i32 %arg) { +; FAST-LABEL: 'umin' +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'umin' +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'umin' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = call i64 @llvm.umin.i64(i64 undef, i64 undef) + %V2I64 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef) + %V4I64 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef) + %V8I64 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef) + + %I32 = call i32 @llvm.umin.i32(i32 undef, i32 undef) + %V2I32 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef) + %V4I32 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef) + %V8I32 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef) + %V16I32 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef) + + %I16 = call i16 @llvm.umin.i16(i16 undef, i16 undef) + %V2I16 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef) + %V4I16 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef) + %V8I16 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef) + %V16I16 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef) + %V32I16 = call <32 x i16> @llvm.umin.v32i16(<32 x i16> undef, <32 x i16> undef) + + %I8 = call i8 @llvm.umin.i8(i8 undef, i8 undef) + %V2I8 = call <2 x i8> @llvm.umin.v2i8(<2 x i8> undef, <2 x i8> undef) + %V4I8 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> undef, <4 x i8> undef) + %V8I8 = call <8 x i8> @llvm.umin.v8i8(<8 x i8> undef, <8 x i8> undef) + %V16I8 = call <16 x i8> @llvm.umin.v16i8(<16 x i8> undef, <16 x i8> undef) + %V32I8 = call <32 x i8> @llvm.umin.v32i8(<32 x i8> undef, <32 x i8> undef) + %V64I8 = call <64 x i8> @llvm.umin.v64i8(<64 x i8> undef, <64 x i8> undef) + + ret i32 undef +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/cast.ll b/llvm/test/Analysis/CostModel/AMDGPU/cast.ll new file mode 100644 index 000000000000..308dd4c40a10 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/cast.ll @@ -0,0 +1,449 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,SLOW %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE,SLOW-SIZE %s +; END. + +define i32 @add(i32 %arg) { + ; -- Same size registeres -- +; ALL-LABEL: 'add' +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A = zext <4 x i1> undef to <4 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B = sext <4 x i1> undef to <4 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = trunc <4 x i32> undef to <4 x i1> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D = zext <8 x i1> undef to <8 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %E = sext <8 x i1> undef to <8 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %F = trunc <8 x i32> undef to <8 x i1> +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %G = zext i1 undef to i32 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %H = trunc i32 undef to i1 +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'add' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A = zext <4 x i1> undef to <4 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B = sext <4 x i1> undef to <4 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = trunc <4 x i32> undef to <4 x i1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D = zext <8 x i1> undef to <8 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %E = sext <8 x i1> undef to <8 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %F = trunc <8 x i32> undef to <8 x i1> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %G = zext i1 undef to i32 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %H = trunc i32 undef to i1 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %A = zext <4 x i1> undef to <4 x i32> + %B = sext <4 x i1> undef to <4 x i32> + %C = trunc <4 x i32> undef to <4 x i1> + + ; -- Different size registers -- + %D = zext <8 x i1> undef to <8 x i32> + %E = sext <8 x i1> undef to <8 x i32> + %F = trunc <8 x i32> undef to <8 x i1> + + ; -- scalars -- + %G = zext i1 undef to i32 + %H = trunc i32 undef to i1 + + ret i32 undef +} + +define i32 @zext_sext(<8 x i1> %in) { +; FAST-LABEL: 'zext_sext' +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %Z = zext <8 x i1> %in to <8 x i32> +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %S = sext <8 x i1> %in to <8 x i32> +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %A1 = zext <16 x i8> undef to <16 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %A2 = sext <16 x i8> undef to <16 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A = sext <8 x i16> undef to <8 x i32> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %B = zext <8 x i16> undef to <8 x i32> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = sext <4 x i32> undef to <4 x i64> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32> +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D = zext <4 x i32> undef to <4 x i64> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D1 = zext <8 x i32> undef to <8 x i64> +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D2 = sext <8 x i32> undef to <8 x i64> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D3 = zext <16 x i16> undef to <16 x i32> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D4 = zext <16 x i8> undef to <16 x i32> +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %D5 = zext <16 x i1> undef to <16 x i32> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %E = trunc <4 x i64> undef to <4 x i32> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F = trunc <8 x i32> undef to <8 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %G = trunc <8 x i64> undef to <8 x i32> +; FAST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'zext_sext' +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %Z = zext <8 x i1> %in to <8 x i32> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %S = sext <8 x i1> %in to <8 x i32> +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %A1 = zext <16 x i8> undef to <16 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %A2 = sext <16 x i8> undef to <16 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A = sext <8 x i16> undef to <8 x i32> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B = zext <8 x i16> undef to <8 x i32> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = sext <4 x i32> undef to <4 x i64> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D = zext <4 x i32> undef to <4 x i64> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D1 = zext <8 x i32> undef to <8 x i64> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D2 = sext <8 x i32> undef to <8 x i64> +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %D3 = zext <16 x i16> undef to <16 x i32> +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %D4 = zext <16 x i8> undef to <16 x i32> +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %D5 = zext <16 x i1> undef to <16 x i32> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %E = trunc <4 x i64> undef to <4 x i32> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F = trunc <8 x i32> undef to <8 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %G = trunc <8 x i64> undef to <8 x i32> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'zext_sext' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %Z = zext <8 x i1> %in to <8 x i32> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %S = sext <8 x i1> %in to <8 x i32> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %A1 = zext <16 x i8> undef to <16 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %A2 = sext <16 x i8> undef to <16 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A = sext <8 x i16> undef to <8 x i32> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %B = zext <8 x i16> undef to <8 x i32> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = sext <4 x i32> undef to <4 x i64> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D = zext <4 x i32> undef to <4 x i64> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D1 = zext <8 x i32> undef to <8 x i64> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D2 = sext <8 x i32> undef to <8 x i64> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D3 = zext <16 x i16> undef to <16 x i32> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D4 = zext <16 x i8> undef to <16 x i32> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %D5 = zext <16 x i1> undef to <16 x i32> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %E = trunc <4 x i64> undef to <4 x i32> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F = trunc <8 x i32> undef to <8 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %G = trunc <8 x i64> undef to <8 x i32> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'zext_sext' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %Z = zext <8 x i1> %in to <8 x i32> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %S = sext <8 x i1> %in to <8 x i32> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %A1 = zext <16 x i8> undef to <16 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %A2 = sext <16 x i8> undef to <16 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A = sext <8 x i16> undef to <8 x i32> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B = zext <8 x i16> undef to <8 x i32> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C = sext <4 x i32> undef to <4 x i64> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C.v8i8.z = zext <8 x i8> undef to <8 x i32> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C.v8i8.s = sext <8 x i8> undef to <8 x i32> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C.v4i16.z = zext <4 x i16> undef to <4 x i64> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i16.s = sext <4 x i16> undef to <4 x i64> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C.v4i8.z = zext <4 x i8> undef to <4 x i64> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C.v4i8.s = sext <4 x i8> undef to <4 x i64> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D = zext <4 x i32> undef to <4 x i64> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %D1 = zext <8 x i32> undef to <8 x i64> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D2 = sext <8 x i32> undef to <8 x i64> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %D3 = zext <16 x i16> undef to <16 x i32> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %D4 = zext <16 x i8> undef to <16 x i32> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %D5 = zext <16 x i1> undef to <16 x i32> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %E = trunc <4 x i64> undef to <4 x i32> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F = trunc <8 x i32> undef to <8 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F1 = trunc <16 x i16> undef to <16 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F2 = trunc <8 x i32> undef to <8 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %F3 = trunc <4 x i64> undef to <4 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %G = trunc <8 x i64> undef to <8 x i32> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %G1 = trunc <16 x i32> undef to <16 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %G2 = trunc <16 x i32> undef to <16 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %Z = zext <8 x i1> %in to <8 x i32> + %S = sext <8 x i1> %in to <8 x i32> + + %A1 = zext <16 x i8> undef to <16 x i16> + %A2 = sext <16 x i8> undef to <16 x i16> + %A = sext <8 x i16> undef to <8 x i32> + %B = zext <8 x i16> undef to <8 x i32> + %C = sext <4 x i32> undef to <4 x i64> + + %C.v8i8.z = zext <8 x i8> undef to <8 x i32> + %C.v8i8.s = sext <8 x i8> undef to <8 x i32> + %C.v4i16.z = zext <4 x i16> undef to <4 x i64> + %C.v4i16.s = sext <4 x i16> undef to <4 x i64> + + %C.v4i8.z = zext <4 x i8> undef to <4 x i64> + %C.v4i8.s = sext <4 x i8> undef to <4 x i64> + + %D = zext <4 x i32> undef to <4 x i64> + + %D1 = zext <8 x i32> undef to <8 x i64> + + %D2 = sext <8 x i32> undef to <8 x i64> + + %D3 = zext <16 x i16> undef to <16 x i32> + %D4 = zext <16 x i8> undef to <16 x i32> + %D5 = zext <16 x i1> undef to <16 x i32> + + %E = trunc <4 x i64> undef to <4 x i32> + %F = trunc <8 x i32> undef to <8 x i16> + %F1 = trunc <16 x i16> undef to <16 x i8> + %F2 = trunc <8 x i32> undef to <8 x i8> + %F3 = trunc <4 x i64> undef to <4 x i8> + + %G = trunc <8 x i64> undef to <8 x i32> + %G1 = trunc <16 x i32> undef to <16 x i16> + %G2 = trunc <16 x i32> undef to <16 x i8> + ret i32 undef +} + +define i32 @masks8(<8 x i1> %in) { +; ALL-LABEL: 'masks8' +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %Z = zext <8 x i1> %in to <8 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %S = sext <8 x i1> %in to <8 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'masks8' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %Z = zext <8 x i1> %in to <8 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %S = sext <8 x i1> %in to <8 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %Z = zext <8 x i1> %in to <8 x i32> + %S = sext <8 x i1> %in to <8 x i32> + ret i32 undef +} + +define i32 @masks4(<4 x i1> %in) { +; ALL-LABEL: 'masks4' +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %Z = zext <4 x i1> %in to <4 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %S = sext <4 x i1> %in to <4 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'masks4' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %Z = zext <4 x i1> %in to <4 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %S = sext <4 x i1> %in to <4 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %Z = zext <4 x i1> %in to <4 x i64> + %S = sext <4 x i1> %in to <4 x i64> + ret i32 undef +} + +define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { +; FAST-LABEL: 'sitofp4' +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; SLOW-LABEL: 'sitofp4' +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; FAST-SIZE-LABEL: 'sitofp4' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SLOW-SIZE-LABEL: 'sitofp4' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A1 = sitofp <4 x i1> %a to <4 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A2 = sitofp <4 x i1> %a to <4 x double> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = sitofp <4 x i8> %b to <4 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B2 = sitofp <4 x i8> %b to <4 x double> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C1 = sitofp <4 x i16> %c to <4 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C2 = sitofp <4 x i16> %c to <4 x double> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = sitofp <4 x i32> %d to <4 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D2 = sitofp <4 x i32> %d to <4 x double> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %A1 = sitofp <4 x i1> %a to <4 x float> + %A2 = sitofp <4 x i1> %a to <4 x double> + %B1 = sitofp <4 x i8> %b to <4 x float> + %B2 = sitofp <4 x i8> %b to <4 x double> + %C1 = sitofp <4 x i16> %c to <4 x float> + %C2 = sitofp <4 x i16> %c to <4 x double> + %D1 = sitofp <4 x i32> %d to <4 x float> + %D2 = sitofp <4 x i32> %d to <4 x double> + ret void +} + +define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { +; FAST-LABEL: 'sitofp8' +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; SLOW-LABEL: 'sitofp8' +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; FAST-SIZE-LABEL: 'sitofp8' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SLOW-SIZE-LABEL: 'sitofp8' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %A1 = sitofp <8 x i1> %a to <8 x float> + %B1 = sitofp <8 x i8> %b to <8 x float> + %C1 = sitofp <8 x i16> %c to <8 x float> + %D1 = sitofp <8 x i32> %d to <8 x float> + ret void +} + +define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) { +; FAST-LABEL: 'uitofp4' +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double> +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double> +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; SLOW-LABEL: 'uitofp4' +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double> +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; FAST-SIZE-LABEL: 'uitofp4' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SLOW-SIZE-LABEL: 'uitofp4' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A1 = uitofp <4 x i1> %a to <4 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A2 = uitofp <4 x i1> %a to <4 x double> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B1 = uitofp <4 x i8> %b to <4 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %B2 = uitofp <4 x i8> %b to <4 x double> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C1 = uitofp <4 x i16> %c to <4 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %C2 = uitofp <4 x i16> %c to <4 x double> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %D1 = uitofp <4 x i32> %d to <4 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D2 = uitofp <4 x i32> %d to <4 x double> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %A1 = uitofp <4 x i1> %a to <4 x float> + %A2 = uitofp <4 x i1> %a to <4 x double> + %B1 = uitofp <4 x i8> %b to <4 x float> + %B2 = uitofp <4 x i8> %b to <4 x double> + %C1 = uitofp <4 x i16> %c to <4 x float> + %C2 = uitofp <4 x i16> %c to <4 x double> + %D1 = uitofp <4 x i32> %d to <4 x float> + %D2 = uitofp <4 x i32> %d to <4 x double> + ret void +} + +define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) { +; FAST-LABEL: 'uitofp8' +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; SLOW-LABEL: 'uitofp8' +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; FAST-SIZE-LABEL: 'uitofp8' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SLOW-SIZE-LABEL: 'uitofp8' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %A1 = uitofp <8 x i1> %a to <8 x float> + %B1 = uitofp <8 x i8> %b to <8 x float> + %C1 = uitofp <8 x i16> %c to <8 x float> + %D1 = uitofp <8 x i32> %d to <8 x float> + ret void +} + +define void @fp_conv(<8 x float> %a, <16 x float>%b, <4 x float> %c) { +; ALL-LABEL: 'fp_conv' +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A1 = fpext <4 x float> %c to <4 x double> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A2 = fpext <8 x float> %a to <8 x double> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A3 = fptrunc <4 x double> undef to <4 x float> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A4 = fptrunc <8 x double> undef to <8 x float> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; ALL-SIZE-LABEL: 'fp_conv' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A1 = fpext <4 x float> %c to <4 x double> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A2 = fpext <8 x float> %a to <8 x double> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A3 = fptrunc <4 x double> undef to <4 x float> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A4 = fptrunc <8 x double> undef to <8 x float> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %A1 = fpext <4 x float> %c to <4 x double> + %A2 = fpext <8 x float> %a to <8 x double> + %A3 = fptrunc <4 x double> undef to <4 x float> + %A4 = fptrunc <8 x double> undef to <8 x float> + ret void +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/div.ll b/llvm/test/Analysis/CostModel/AMDGPU/div.ll new file mode 100644 index 000000000000..7114b81025be --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/div.ll @@ -0,0 +1,1145 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE %s +; END. + +define i32 @sdiv() { +; FAST-LABEL: 'sdiv' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'sdiv' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'sdiv' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = sdiv i64 undef, undef + %V2i64 = sdiv <2 x i64> undef, undef + %V4i64 = sdiv <4 x i64> undef, undef + %V8i64 = sdiv <8 x i64> undef, undef + + %I32 = sdiv i32 undef, undef + %V4i32 = sdiv <4 x i32> undef, undef + %V8i32 = sdiv <8 x i32> undef, undef + %V16i32 = sdiv <16 x i32> undef, undef + + %I16 = sdiv i16 undef, undef + %V8i16 = sdiv <8 x i16> undef, undef + %V16i16 = sdiv <16 x i16> undef, undef + %V32i16 = sdiv <32 x i16> undef, undef + + %I8 = sdiv i8 undef, undef + %V16i8 = sdiv <16 x i8> undef, undef + %V32i8 = sdiv <32 x i8> undef, undef + %V64i8 = sdiv <64 x i8> undef, undef + + ret i32 undef +} + +define i32 @udiv() { +; FAST-LABEL: 'udiv' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'udiv' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'udiv' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = udiv i64 undef, undef + %V2i64 = udiv <2 x i64> undef, undef + %V4i64 = udiv <4 x i64> undef, undef + %V8i64 = udiv <8 x i64> undef, undef + + %I32 = udiv i32 undef, undef + %V4i32 = udiv <4 x i32> undef, undef + %V8i32 = udiv <8 x i32> undef, undef + %V16i32 = udiv <16 x i32> undef, undef + + %I16 = udiv i16 undef, undef + %V8i16 = udiv <8 x i16> undef, undef + %V16i16 = udiv <16 x i16> undef, undef + %V32i16 = udiv <32 x i16> undef, undef + + %I8 = udiv i8 undef, undef + %V16i8 = udiv <16 x i8> undef, undef + %V32i8 = udiv <32 x i8> undef, undef + %V64i8 = udiv <64 x i8> undef, undef + + ret i32 undef +} + +define i32 @sdiv_const() { +; FAST-LABEL: 'sdiv_const' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'sdiv_const' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'sdiv_const' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = sdiv i64 undef, 7 + %V2i64 = sdiv <2 x i64> undef, + %V4i64 = sdiv <4 x i64> undef, + %V8i64 = sdiv <8 x i64> undef, + + %I32 = sdiv i32 undef, 7 + %V4i32 = sdiv <4 x i32> undef, + %V8i32 = sdiv <8 x i32> undef, + %V16i32 = sdiv <16 x i32> undef, + + %I16 = sdiv i16 undef, 7 + %V8i16 = sdiv <8 x i16> undef, + %V16i16 = sdiv <16 x i16> undef, + %V32i16 = sdiv <32 x i16> undef, + + %I8 = sdiv i8 undef, 7 + %V16i8 = sdiv <16 x i8> undef, + %V32i8 = sdiv <32 x i8> undef, + %V64i8 = sdiv <64 x i8> undef, + + ret i32 undef +} + +define i32 @udiv_const() { +; FAST-LABEL: 'udiv_const' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'udiv_const' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'udiv_const' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = udiv i64 undef, 7 + %V2i64 = udiv <2 x i64> undef, + %V4i64 = udiv <4 x i64> undef, + %V8i64 = udiv <8 x i64> undef, + + %I32 = udiv i32 undef, 7 + %V4i32 = udiv <4 x i32> undef, + %V8i32 = udiv <8 x i32> undef, + %V16i32 = udiv <16 x i32> undef, + + %I16 = udiv i16 undef, 7 + %V8i16 = udiv <8 x i16> undef, + %V16i16 = udiv <16 x i16> undef, + %V32i16 = udiv <32 x i16> undef, + + %I8 = udiv i8 undef, 7 + %V16i8 = udiv <16 x i8> undef, + %V32i8 = udiv <32 x i8> undef, + %V64i8 = udiv <64 x i8> undef, + + ret i32 undef +} + +define i32 @sdiv_uniformconst() { +; FAST-LABEL: 'sdiv_uniformconst' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'sdiv_uniformconst' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'sdiv_uniformconst' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = sdiv i64 undef, 7 + %V2i64 = sdiv <2 x i64> undef, + %V4i64 = sdiv <4 x i64> undef, + %V8i64 = sdiv <8 x i64> undef, + + %I32 = sdiv i32 undef, 7 + %V4i32 = sdiv <4 x i32> undef, + %V8i32 = sdiv <8 x i32> undef, + %V16i32 = sdiv <16 x i32> undef, + + %I16 = sdiv i16 undef, 7 + %V8i16 = sdiv <8 x i16> undef, + %V16i16 = sdiv <16 x i16> undef, + %V32i16 = sdiv <32 x i16> undef, + + %I8 = sdiv i8 undef, 7 + %V16i8 = sdiv <16 x i8> undef, + %V32i8 = sdiv <32 x i8> undef, + %V64i8 = sdiv <64 x i8> undef, + + ret i32 undef +} + +define i32 @udiv_uniformconst() { +; FAST-LABEL: 'udiv_uniformconst' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'udiv_uniformconst' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'udiv_uniformconst' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = udiv i64 undef, 7 + %V2i64 = udiv <2 x i64> undef, + %V4i64 = udiv <4 x i64> undef, + %V8i64 = udiv <8 x i64> undef, + + %I32 = udiv i32 undef, 7 + %V4i32 = udiv <4 x i32> undef, + %V8i32 = udiv <8 x i32> undef, + %V16i32 = udiv <16 x i32> undef, + + %I16 = udiv i16 undef, 7 + %V8i16 = udiv <8 x i16> undef, + %V16i16 = udiv <16 x i16> undef, + %V32i16 = udiv <32 x i16> undef, + + %I8 = udiv i8 undef, 7 + %V16i8 = udiv <16 x i8> undef, + %V32i8 = udiv <32 x i8> undef, + %V64i8 = udiv <64 x i8> undef, + + ret i32 undef +} + +define i32 @sdiv_constpow2() { +; FAST-LABEL: 'sdiv_constpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'sdiv_constpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'sdiv_constpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = sdiv i64 undef, 16 + %V2i64 = sdiv <2 x i64> undef, + %V4i64 = sdiv <4 x i64> undef, + %V8i64 = sdiv <8 x i64> undef, + + %I32 = sdiv i32 undef, 16 + %V4i32 = sdiv <4 x i32> undef, + %V8i32 = sdiv <8 x i32> undef, + %V16i32 = sdiv <16 x i32> undef, + + %I16 = sdiv i16 undef, 16 + %V8i16 = sdiv <8 x i16> undef, + %V16i16 = sdiv <16 x i16> undef, + %V32i16 = sdiv <32 x i16> undef, + + %I8 = sdiv i8 undef, 16 + %V16i8 = sdiv <16 x i8> undef, + %V32i8 = sdiv <32 x i8> undef, + %V64i8 = sdiv <64 x i8> undef, + + ret i32 undef +} + +define i32 @udiv_constpow2() { +; FAST-LABEL: 'udiv_constpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'udiv_constpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'udiv_constpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = udiv i64 undef, 16 + %V2i64 = udiv <2 x i64> undef, + %V4i64 = udiv <4 x i64> undef, + %V8i64 = udiv <8 x i64> undef, + + %I32 = udiv i32 undef, 16 + %V4i32 = udiv <4 x i32> undef, + %V8i32 = udiv <8 x i32> undef, + %V16i32 = udiv <16 x i32> undef, + + %I16 = udiv i16 undef, 16 + %V8i16 = udiv <8 x i16> undef, + %V16i16 = udiv <16 x i16> undef, + %V32i16 = udiv <32 x i16> undef, + + %I8 = udiv i8 undef, 16 + %V16i8 = udiv <16 x i8> undef, + %V32i8 = udiv <32 x i8> undef, + %V64i8 = udiv <64 x i8> undef, + + ret i32 undef +} + +define i32 @sdiv_uniformconstpow2() { +; FAST-LABEL: 'sdiv_uniformconstpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'sdiv_uniformconstpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'sdiv_uniformconstpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = sdiv i64 undef, 16 + %V2i64 = sdiv <2 x i64> undef, + %V4i64 = sdiv <4 x i64> undef, + %V8i64 = sdiv <8 x i64> undef, + + %I32 = sdiv i32 undef, 16 + %V4i32 = sdiv <4 x i32> undef, + %V8i32 = sdiv <8 x i32> undef, + %V16i32 = sdiv <16 x i32> undef, + + %I16 = sdiv i16 undef, 16 + %V8i16 = sdiv <8 x i16> undef, + %V16i16 = sdiv <16 x i16> undef, + %V32i16 = sdiv <32 x i16> undef, + + %I8 = sdiv i8 undef, 16 + %V16i8 = sdiv <16 x i8> undef, + %V32i8 = sdiv <32 x i8> undef, + %V64i8 = sdiv <64 x i8> undef, + + ret i32 undef +} + +define i32 @udiv_uniformconstpow2() { +; FAST-LABEL: 'udiv_uniformconstpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'udiv_uniformconstpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'udiv_uniformconstpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = udiv i64 undef, 16 + %V2i64 = udiv <2 x i64> undef, + %V4i64 = udiv <4 x i64> undef, + %V8i64 = udiv <8 x i64> undef, + + %I32 = udiv i32 undef, 16 + %V4i32 = udiv <4 x i32> undef, + %V8i32 = udiv <8 x i32> undef, + %V16i32 = udiv <16 x i32> undef, + + %I16 = udiv i16 undef, 16 + %V8i16 = udiv <8 x i16> undef, + %V16i16 = udiv <16 x i16> undef, + %V32i16 = udiv <32 x i16> undef, + + %I8 = udiv i8 undef, 16 + %V16i8 = udiv <16 x i8> undef, + %V32i8 = udiv <32 x i8> undef, + %V64i8 = udiv <64 x i8> undef, + + ret i32 undef +} + +define i32 @sdiv_constnegpow2() { +; FAST-LABEL: 'sdiv_constnegpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'sdiv_constnegpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'sdiv_constnegpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = sdiv i64 undef, -16 + %V2i64 = sdiv <2 x i64> undef, + %V4i64 = sdiv <4 x i64> undef, + %V8i64 = sdiv <8 x i64> undef, + + %I32 = sdiv i32 undef, -16 + %V4i32 = sdiv <4 x i32> undef, + %V8i32 = sdiv <8 x i32> undef, + %V16i32 = sdiv <16 x i32> undef, + + %I16 = sdiv i16 undef, -16 + %V8i16 = sdiv <8 x i16> undef, + %V16i16 = sdiv <16 x i16> undef, + %V32i16 = sdiv <32 x i16> undef, + + %I8 = sdiv i8 undef, -16 + %V16i8 = sdiv <16 x i8> undef, + %V32i8 = sdiv <32 x i8> undef, + %V64i8 = sdiv <64 x i8> undef, + + ret i32 undef +} + +define i32 @udiv_constnegpow2() { +; FAST-LABEL: 'udiv_constnegpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'udiv_constnegpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'udiv_constnegpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = udiv i64 undef, -16 + %V2i64 = udiv <2 x i64> undef, + %V4i64 = udiv <4 x i64> undef, + %V8i64 = udiv <8 x i64> undef, + + %I32 = udiv i32 undef, -16 + %V4i32 = udiv <4 x i32> undef, + %V8i32 = udiv <8 x i32> undef, + %V16i32 = udiv <16 x i32> undef, + + %I16 = udiv i16 undef, -16 + %V8i16 = udiv <8 x i16> undef, + %V16i16 = udiv <16 x i16> undef, + %V32i16 = udiv <32 x i16> undef, + + %I8 = udiv i8 undef, -16 + %V16i8 = udiv <16 x i8> undef, + %V32i8 = udiv <32 x i8> undef, + %V64i8 = udiv <64 x i8> undef, + + ret i32 undef +} + +define i32 @sdiv_uniformconstnegpow2() { +; FAST-LABEL: 'sdiv_uniformconstnegpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'sdiv_uniformconstnegpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'sdiv_uniformconstnegpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = sdiv i64 undef, -16 + %V2i64 = sdiv <2 x i64> undef, + %V4i64 = sdiv <4 x i64> undef, + %V8i64 = sdiv <8 x i64> undef, + + %I32 = sdiv i32 undef, -16 + %V4i32 = sdiv <4 x i32> undef, + %V8i32 = sdiv <8 x i32> undef, + %V16i32 = sdiv <16 x i32> undef, + + %I16 = sdiv i16 undef, -16 + %V8i16 = sdiv <8 x i16> undef, + %V16i16 = sdiv <16 x i16> undef, + %V32i16 = sdiv <32 x i16> undef, + + %I8 = sdiv i8 undef, -16 + %V16i8 = sdiv <16 x i8> undef, + %V32i8 = sdiv <32 x i8> undef, + %V64i8 = sdiv <64 x i8> undef, + + ret i32 undef +} + +define i32 @udiv_uniformconstnegpow2() { +; FAST-LABEL: 'udiv_uniformconstnegpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'udiv_uniformconstnegpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'udiv_uniformconstnegpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = udiv i64 undef, -16 + %V2i64 = udiv <2 x i64> undef, + %V4i64 = udiv <4 x i64> undef, + %V8i64 = udiv <8 x i64> undef, + + %I32 = udiv i32 undef, -16 + %V4i32 = udiv <4 x i32> undef, + %V8i32 = udiv <8 x i32> undef, + %V16i32 = udiv <16 x i32> undef, + + %I16 = udiv i16 undef, -16 + %V8i16 = udiv <8 x i16> undef, + %V16i16 = udiv <16 x i16> undef, + %V32i16 = udiv <32 x i16> undef, + + %I8 = udiv i8 undef, -16 + %V16i8 = udiv <16 x i8> undef, + %V32i8 = udiv <32 x i8> undef, + %V64i8 = udiv <64 x i8> undef, + + ret i32 undef +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fptosi.ll b/llvm/test/Analysis/CostModel/AMDGPU/fptosi.ll new file mode 100644 index 000000000000..3c24d6cdcd28 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/fptosi.ll @@ -0,0 +1,259 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,SLOW %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE,SLOW-SIZE %s +; END. + +define i32 @fptosi_double_i64(i32 %arg) { +; ALL-LABEL: 'fptosi_double_i64' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'fptosi_double_i64' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = fptosi double undef to i64 + %V2I64 = fptosi <2 x double> undef to <2 x i64> + %V4I64 = fptosi <4 x double> undef to <4 x i64> + %V8I64 = fptosi <8 x double> undef to <8 x i64> + ret i32 undef +} + +define i32 @fptosi_double_i32(i32 %arg) { +; ALL-LABEL: 'fptosi_double_i32' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'fptosi_double_i32' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi double undef to i32 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = fptosi <2 x double> undef to <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = fptosi <4 x double> undef to <4 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = fptosi <8 x double> undef to <8 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I32 = fptosi double undef to i32 + %V2I32 = fptosi <2 x double> undef to <2 x i32> + %V4I32 = fptosi <4 x double> undef to <4 x i32> + %V8I32 = fptosi <8 x double> undef to <8 x i32> + ret i32 undef +} + +define i32 @fptosi_double_i16(i32 %arg) { +; FAST-LABEL: 'fptosi_double_i16' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'fptosi_double_i16' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'fptosi_double_i16' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'fptosi_double_i16' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16 +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I16 = fptosi double undef to i16 + %V2I16 = fptosi <2 x double> undef to <2 x i16> + %V4I16 = fptosi <4 x double> undef to <4 x i16> + %V8I16 = fptosi <8 x double> undef to <8 x i16> + ret i32 undef +} + +define i32 @fptosi_double_i8(i32 %arg) { +; FAST-LABEL: 'fptosi_double_i8' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'fptosi_double_i8' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'fptosi_double_i8' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'fptosi_double_i8' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi double undef to i8 +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x double> undef to <2 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptosi <4 x double> undef to <4 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptosi <8 x double> undef to <8 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I8 = fptosi double undef to i8 + %V2I8 = fptosi <2 x double> undef to <2 x i8> + %V4I8 = fptosi <4 x double> undef to <4 x i8> + %V8I8 = fptosi <8 x double> undef to <8 x i8> + ret i32 undef +} + +define i32 @fptosi_float_i64(i32 %arg) { +; ALL-LABEL: 'fptosi_float_i64' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'fptosi_float_i64' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = fptosi float undef to i64 + %V2I64 = fptosi <2 x float> undef to <2 x i64> + %V4I64 = fptosi <4 x float> undef to <4 x i64> + %V8I64 = fptosi <8 x float> undef to <8 x i64> + %V16I64 = fptosi <16 x float> undef to <16 x i64> + ret i32 undef +} + +define i32 @fptosi_float_i32(i32 %arg) { +; ALL-LABEL: 'fptosi_float_i32' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'fptosi_float_i32' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptosi float undef to i32 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = fptosi <2 x float> undef to <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = fptosi <4 x float> undef to <4 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = fptosi <8 x float> undef to <8 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = fptosi <16 x float> undef to <16 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I32 = fptosi float undef to i32 + %V2I32 = fptosi <2 x float> undef to <2 x i32> + %V4I32 = fptosi <4 x float> undef to <4 x i32> + %V8I32 = fptosi <8 x float> undef to <8 x i32> + %V16I32 = fptosi <16 x float> undef to <16 x i32> + ret i32 undef +} + +define i32 @fptosi_float_i16(i32 %arg) { +; FAST-LABEL: 'fptosi_float_i16' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'fptosi_float_i16' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'fptosi_float_i16' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'fptosi_float_i16' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16 +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I16 = fptosi float undef to i16 + %V2I16 = fptosi <2 x float> undef to <2 x i16> + %V4I16 = fptosi <4 x float> undef to <4 x i16> + %V8I16 = fptosi <8 x float> undef to <8 x i16> + %V16I16 = fptosi <16 x float> undef to <16 x i16> + ret i32 undef +} + +define i32 @fptosi_float_i8(i32 %arg) { +; FAST-LABEL: 'fptosi_float_i8' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'fptosi_float_i8' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'fptosi_float_i8' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'fptosi_float_i8' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptosi float undef to i8 +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptosi <2 x float> undef to <2 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptosi <4 x float> undef to <4 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptosi <8 x float> undef to <8 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = fptosi <16 x float> undef to <16 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I8 = fptosi float undef to i8 + %V2I8 = fptosi <2 x float> undef to <2 x i8> + %V4I8 = fptosi <4 x float> undef to <4 x i8> + %V8I8 = fptosi <8 x float> undef to <8 x i8> + %V16I8 = fptosi <16 x float> undef to <16 x i8> + ret i32 undef +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fptoui.ll b/llvm/test/Analysis/CostModel/AMDGPU/fptoui.ll new file mode 100644 index 000000000000..e2f75a9f302c --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/fptoui.ll @@ -0,0 +1,259 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,SLOW %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE,SLOW-SIZE %s +; END. + +define i32 @fptoui_double_i64(i32 %arg) { +; ALL-LABEL: 'fptoui_double_i64' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui double undef to i64 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'fptoui_double_i64' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui double undef to i64 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = fptoui double undef to i64 + %V2I64 = fptoui <2 x double> undef to <2 x i64> + %V4I64 = fptoui <4 x double> undef to <4 x i64> + %V8I64 = fptoui <8 x double> undef to <8 x i64> + ret i32 undef +} + +define i32 @fptoui_double_i32(i32 %arg) { +; ALL-LABEL: 'fptoui_double_i32' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'fptoui_double_i32' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui double undef to i32 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = fptoui <2 x double> undef to <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = fptoui <4 x double> undef to <4 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = fptoui <8 x double> undef to <8 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I32 = fptoui double undef to i32 + %V2I32 = fptoui <2 x double> undef to <2 x i32> + %V4I32 = fptoui <4 x double> undef to <4 x i32> + %V8I32 = fptoui <8 x double> undef to <8 x i32> + ret i32 undef +} + +define i32 @fptoui_double_i16(i32 %arg) { +; FAST-LABEL: 'fptoui_double_i16' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'fptoui_double_i16' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'fptoui_double_i16' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'fptoui_double_i16' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16 +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I16 = fptoui double undef to i16 + %V2I16 = fptoui <2 x double> undef to <2 x i16> + %V4I16 = fptoui <4 x double> undef to <4 x i16> + %V8I16 = fptoui <8 x double> undef to <8 x i16> + ret i32 undef +} + +define i32 @fptoui_double_i8(i32 %arg) { +; FAST-LABEL: 'fptoui_double_i8' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'fptoui_double_i8' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'fptoui_double_i8' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'fptoui_double_i8' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui double undef to i8 +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x double> undef to <2 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptoui <4 x double> undef to <4 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptoui <8 x double> undef to <8 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I8 = fptoui double undef to i8 + %V2I8 = fptoui <2 x double> undef to <2 x i8> + %V4I8 = fptoui <4 x double> undef to <4 x i8> + %V8I8 = fptoui <8 x double> undef to <8 x i8> + ret i32 undef +} + +define i32 @fptoui_float_i64(i32 %arg) { +; ALL-LABEL: 'fptoui_float_i64' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui float undef to i64 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'fptoui_float_i64' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui float undef to i64 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = fptoui float undef to i64 + %V2I64 = fptoui <2 x float> undef to <2 x i64> + %V4I64 = fptoui <4 x float> undef to <4 x i64> + %V8I64 = fptoui <8 x float> undef to <8 x i64> + %V16I64 = fptoui <16 x float> undef to <16 x i64> + ret i32 undef +} + +define i32 @fptoui_float_i32(i32 %arg) { +; ALL-LABEL: 'fptoui_float_i32' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32 +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'fptoui_float_i32' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = fptoui float undef to i32 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = fptoui <2 x float> undef to <2 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I32 = fptoui <4 x float> undef to <4 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I32 = fptoui <8 x float> undef to <8 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I32 = fptoui <16 x float> undef to <16 x i32> +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I32 = fptoui float undef to i32 + %V2I32 = fptoui <2 x float> undef to <2 x i32> + %V4I32 = fptoui <4 x float> undef to <4 x i32> + %V8I32 = fptoui <8 x float> undef to <8 x i32> + %V16I32 = fptoui <16 x float> undef to <16 x i32> + ret i32 undef +} + +define i32 @fptoui_float_i16(i32 %arg) { +; FAST-LABEL: 'fptoui_float_i16' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'fptoui_float_i16' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'fptoui_float_i16' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'fptoui_float_i16' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16 +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I16 = fptoui float undef to i16 + %V2I16 = fptoui <2 x float> undef to <2 x i16> + %V4I16 = fptoui <4 x float> undef to <4 x i16> + %V8I16 = fptoui <8 x float> undef to <8 x i16> + %V16I16 = fptoui <16 x float> undef to <16 x i16> + ret i32 undef +} + +define i32 @fptoui_float_i8(i32 %arg) { +; FAST-LABEL: 'fptoui_float_i8' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'fptoui_float_i8' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'fptoui_float_i8' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'fptoui_float_i8' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = fptoui float undef to i8 +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I8 = fptoui <2 x float> undef to <2 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I8 = fptoui <4 x float> undef to <4 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I8 = fptoui <8 x float> undef to <8 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I8 = fptoui <16 x float> undef to <16 x i8> +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I8 = fptoui float undef to i8 + %V2I8 = fptoui <2 x float> undef to <2 x i8> + %V4I8 = fptoui <4 x float> undef to <4 x i8> + %V8I8 = fptoui <8 x float> undef to <8 x i8> + %V16I8 = fptoui <16 x float> undef to <16 x i8> + ret i32 undef +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fround.ll b/llvm/test/Analysis/CostModel/AMDGPU/fround.ll new file mode 100644 index 000000000000..fec0bd737304 --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/fround.ll @@ -0,0 +1,307 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,SLOW %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE,SLOW-SIZE %s +; END. + +define i32 @ceil(i32 %arg) { +; FAST-LABEL: 'ceil' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.ceil.f64(double undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'ceil' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.ceil.f64(double undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'ceil' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.ceil.f64(double undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'ceil' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.ceil.f64(double undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.ceil.f32(float undef) + %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef) + %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef) + %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef) + + %F64 = call double @llvm.ceil.f64(double undef) + %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef) + %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef) + %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef) + + ret i32 undef +} + +define i32 @floor(i32 %arg) { +; ALL-LABEL: 'floor' +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.floor.f32(float undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.floor.f64(double undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'floor' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.floor.f32(float undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.floor.f64(double undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.floor.f32(float undef) + %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef) + %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef) + %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef) + + %F64 = call double @llvm.floor.f64(double undef) + %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef) + %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef) + %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef) + + ret i32 undef +} + +define i32 @nearbyint(i32 %arg) { +; ALL-LABEL: 'nearbyint' +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef) +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'nearbyint' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef) +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.nearbyint.f32(float undef) + %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef) + %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef) + %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef) + + %F64 = call double @llvm.nearbyint.f64(double undef) + %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef) + %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef) + %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef) + + ret i32 undef +} + +define i32 @rint(i32 %arg) { +; FAST-LABEL: 'rint' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'rint' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.rint.f64(double undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'rint' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'rint' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.rint.f64(double undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.rint.f32(float undef) + %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef) + %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef) + %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef) + + %F64 = call double @llvm.rint.f64(double undef) + %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef) + %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef) + %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef) + + ret i32 undef +} + +define i32 @trunc(i32 %arg) { +; FAST-LABEL: 'trunc' +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.trunc.f64(double undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef) +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'trunc' +; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.trunc.f64(double undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef) +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; FAST-SIZE-LABEL: 'trunc' +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.trunc.f64(double undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef) +; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; +; SLOW-SIZE-LABEL: 'trunc' +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.trunc.f64(double undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef) +; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %F32 = call float @llvm.trunc.f32(float undef) + %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef) + %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef) + %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef) + + %F64 = call double @llvm.trunc.f64(double undef) + %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef) + %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef) + %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef) + + ret i32 undef +} + +declare float @llvm.ceil.f32(float) +declare <4 x float> @llvm.ceil.v4f32(<4 x float>) +declare <8 x float> @llvm.ceil.v8f32(<8 x float>) +declare <16 x float> @llvm.ceil.v16f32(<16 x float>) + +declare double @llvm.ceil.f64(double) +declare <2 x double> @llvm.ceil.v2f64(<2 x double>) +declare <4 x double> @llvm.ceil.v4f64(<4 x double>) +declare <8 x double> @llvm.ceil.v8f64(<8 x double>) + +declare float @llvm.floor.f32(float) +declare <4 x float> @llvm.floor.v4f32(<4 x float>) +declare <8 x float> @llvm.floor.v8f32(<8 x float>) +declare <16 x float> @llvm.floor.v16f32(<16 x float>) + +declare double @llvm.floor.f64(double) +declare <2 x double> @llvm.floor.v2f64(<2 x double>) +declare <4 x double> @llvm.floor.v4f64(<4 x double>) +declare <8 x double> @llvm.floor.v8f64(<8 x double>) + +declare float @llvm.nearbyint.f32(float) +declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) +declare <8 x float> @llvm.nearbyint.v8f32(<8 x float>) +declare <16 x float> @llvm.nearbyint.v16f32(<16 x float>) + +declare double @llvm.nearbyint.f64(double) +declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) +declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) +declare <8 x double> @llvm.nearbyint.v8f64(<8 x double>) + +declare float @llvm.rint.f32(float) +declare <4 x float> @llvm.rint.v4f32(<4 x float>) +declare <8 x float> @llvm.rint.v8f32(<8 x float>) +declare <16 x float> @llvm.rint.v16f32(<16 x float>) + +declare double @llvm.rint.f64(double) +declare <2 x double> @llvm.rint.v2f64(<2 x double>) +declare <4 x double> @llvm.rint.v4f64(<4 x double>) +declare <8 x double> @llvm.rint.v8f64(<8 x double>) + +declare float @llvm.trunc.f32(float) +declare <4 x float> @llvm.trunc.v4f32(<4 x float>) +declare <8 x float> @llvm.trunc.v8f32(<8 x float>) +declare <16 x float> @llvm.trunc.v16f32(<16 x float>) + +declare double @llvm.trunc.f64(double) +declare <2 x double> @llvm.trunc.v2f64(<2 x double>) +declare <4 x double> @llvm.trunc.v4f64(<4 x double>) +declare <8 x double> @llvm.trunc.v8f64(<8 x double>) diff --git a/llvm/test/Analysis/CostModel/AMDGPU/gep.ll b/llvm/test/Analysis/CostModel/AMDGPU/gep.ll new file mode 100644 index 000000000000..da60908299be --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/gep.ll @@ -0,0 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE %s +; END. + +define void @test_geps() { +; ALL-LABEL: 'test_geps' +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = getelementptr inbounds i32, i32* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = getelementptr inbounds i64, i64* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4 = getelementptr inbounds float, float* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = getelementptr inbounds double, double* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep0 = getelementptr inbounds i8, i8* undef, i64 9223372036854775807 +; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep1 = getelementptr inbounds i8, i8* undef, i128 295147905179352825855 +; ALL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void +; +; ALL-SIZE-LABEL: 'test_geps' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a0 = getelementptr inbounds i8, i8* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a1 = getelementptr inbounds i16, i16* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a2 = getelementptr inbounds i32, i32* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a3 = getelementptr inbounds i64, i64* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a4 = getelementptr inbounds float, float* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a5 = getelementptr inbounds double, double* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep0 = getelementptr inbounds i8, i8* undef, i64 9223372036854775807 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %giant_gep1 = getelementptr inbounds i8, i8* undef, i128 295147905179352825855 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %a0 = getelementptr inbounds i8, i8* undef, i32 0 + %a1 = getelementptr inbounds i16, i16* undef, i32 0 + %a2 = getelementptr inbounds i32, i32* undef, i32 0 + %a3 = getelementptr inbounds i64, i64* undef, i32 0 + + %a4 = getelementptr inbounds float, float* undef, i32 0 + %a5 = getelementptr inbounds double, double* undef, i32 0 + + %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0 + %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0 + %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0 + %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0 + %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0 + %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0 + + ; Check that we handle outlandishly large GEPs properly. This is unlikely to + ; be a valid pointer, but LLVM still generates GEPs like this sometimes in + ; dead code. + ; This GEP has index INT64_MAX, which is cost 1. + %giant_gep0 = getelementptr inbounds i8, i8* undef, i64 9223372036854775807 + + ; This GEP index wraps around to -1, which is cost 0. + %giant_gep1 = getelementptr inbounds i8, i8* undef, i128 295147905179352825855 + + ret void +} diff --git a/llvm/test/Analysis/CostModel/AMDGPU/rem.ll b/llvm/test/Analysis/CostModel/AMDGPU/rem.ll new file mode 100644 index 000000000000..a2d38d2314bd --- /dev/null +++ b/llvm/test/Analysis/CostModel/AMDGPU/rem.ll @@ -0,0 +1,1145 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s + +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE %s +; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE %s +; END. + +define i32 @srem() { +; FAST-LABEL: 'srem' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'srem' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'srem' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = srem i64 undef, undef + %V2i64 = srem <2 x i64> undef, undef + %V4i64 = srem <4 x i64> undef, undef + %V8i64 = srem <8 x i64> undef, undef + + %I32 = srem i32 undef, undef + %V4i32 = srem <4 x i32> undef, undef + %V8i32 = srem <8 x i32> undef, undef + %V16i32 = srem <16 x i32> undef, undef + + %I16 = srem i16 undef, undef + %V8i16 = srem <8 x i16> undef, undef + %V16i16 = srem <16 x i16> undef, undef + %V32i16 = srem <32 x i16> undef, undef + + %I8 = srem i8 undef, undef + %V16i8 = srem <16 x i8> undef, undef + %V32i8 = srem <32 x i8> undef, undef + %V64i8 = srem <64 x i8> undef, undef + + ret i32 undef +} + +define i32 @urem() { +; FAST-LABEL: 'urem' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, undef +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'urem' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, undef +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'urem' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, undef +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = urem i64 undef, undef + %V2i64 = urem <2 x i64> undef, undef + %V4i64 = urem <4 x i64> undef, undef + %V8i64 = urem <8 x i64> undef, undef + + %I32 = urem i32 undef, undef + %V4i32 = urem <4 x i32> undef, undef + %V8i32 = urem <8 x i32> undef, undef + %V16i32 = urem <16 x i32> undef, undef + + %I16 = urem i16 undef, undef + %V8i16 = urem <8 x i16> undef, undef + %V16i16 = urem <16 x i16> undef, undef + %V32i16 = urem <32 x i16> undef, undef + + %I8 = urem i8 undef, undef + %V16i8 = urem <16 x i8> undef, undef + %V32i8 = urem <32 x i8> undef, undef + %V64i8 = urem <64 x i8> undef, undef + + ret i32 undef +} + +define i32 @srem_const() { +; FAST-LABEL: 'srem_const' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'srem_const' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'srem_const' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = srem i64 undef, 7 + %V2i64 = srem <2 x i64> undef, + %V4i64 = srem <4 x i64> undef, + %V8i64 = srem <8 x i64> undef, + + %I32 = srem i32 undef, 7 + %V4i32 = srem <4 x i32> undef, + %V8i32 = srem <8 x i32> undef, + %V16i32 = srem <16 x i32> undef, + + %I16 = srem i16 undef, 7 + %V8i16 = srem <8 x i16> undef, + %V16i16 = srem <16 x i16> undef, + %V32i16 = srem <32 x i16> undef, + + %I8 = srem i8 undef, 7 + %V16i8 = srem <16 x i8> undef, + %V32i8 = srem <32 x i8> undef, + %V64i8 = srem <64 x i8> undef, + + ret i32 undef +} + +define i32 @urem_const() { +; FAST-LABEL: 'urem_const' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'urem_const' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'urem_const' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = urem i64 undef, 7 + %V2i64 = urem <2 x i64> undef, + %V4i64 = urem <4 x i64> undef, + %V8i64 = urem <8 x i64> undef, + + %I32 = urem i32 undef, 7 + %V4i32 = urem <4 x i32> undef, + %V8i32 = urem <8 x i32> undef, + %V16i32 = urem <16 x i32> undef, + + %I16 = urem i16 undef, 7 + %V8i16 = urem <8 x i16> undef, + %V16i16 = urem <16 x i16> undef, + %V32i16 = urem <32 x i16> undef, + + %I8 = urem i8 undef, 7 + %V16i8 = urem <16 x i8> undef, + %V32i8 = urem <32 x i8> undef, + %V64i8 = urem <64 x i8> undef, + + ret i32 undef +} + +define i32 @srem_uniformconst() { +; FAST-LABEL: 'srem_uniformconst' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'srem_uniformconst' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'srem_uniformconst' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = srem i64 undef, 7 + %V2i64 = srem <2 x i64> undef, + %V4i64 = srem <4 x i64> undef, + %V8i64 = srem <8 x i64> undef, + + %I32 = srem i32 undef, 7 + %V4i32 = srem <4 x i32> undef, + %V8i32 = srem <8 x i32> undef, + %V16i32 = srem <16 x i32> undef, + + %I16 = srem i16 undef, 7 + %V8i16 = srem <8 x i16> undef, + %V16i16 = srem <16 x i16> undef, + %V32i16 = srem <32 x i16> undef, + + %I8 = srem i8 undef, 7 + %V16i8 = srem <16 x i8> undef, + %V32i8 = srem <32 x i8> undef, + %V64i8 = srem <64 x i8> undef, + + ret i32 undef +} + +define i32 @urem_uniformconst() { +; FAST-LABEL: 'urem_uniformconst' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 7 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'urem_uniformconst' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, 7 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'urem_uniformconst' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 7 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = urem i64 undef, 7 + %V2i64 = urem <2 x i64> undef, + %V4i64 = urem <4 x i64> undef, + %V8i64 = urem <8 x i64> undef, + + %I32 = urem i32 undef, 7 + %V4i32 = urem <4 x i32> undef, + %V8i32 = urem <8 x i32> undef, + %V16i32 = urem <16 x i32> undef, + + %I16 = urem i16 undef, 7 + %V8i16 = urem <8 x i16> undef, + %V16i16 = urem <16 x i16> undef, + %V32i16 = urem <32 x i16> undef, + + %I8 = urem i8 undef, 7 + %V16i8 = urem <16 x i8> undef, + %V32i8 = urem <32 x i8> undef, + %V64i8 = urem <64 x i8> undef, + + ret i32 undef +} + +define i32 @srem_constpow2() { +; FAST-LABEL: 'srem_constpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'srem_constpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'srem_constpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = srem i64 undef, 16 + %V2i64 = srem <2 x i64> undef, + %V4i64 = srem <4 x i64> undef, + %V8i64 = srem <8 x i64> undef, + + %I32 = srem i32 undef, 16 + %V4i32 = srem <4 x i32> undef, + %V8i32 = srem <8 x i32> undef, + %V16i32 = srem <16 x i32> undef, + + %I16 = srem i16 undef, 16 + %V8i16 = srem <8 x i16> undef, + %V16i16 = srem <16 x i16> undef, + %V32i16 = srem <32 x i16> undef, + + %I8 = srem i8 undef, 16 + %V16i8 = srem <16 x i8> undef, + %V32i8 = srem <32 x i8> undef, + %V64i8 = srem <64 x i8> undef, + + ret i32 undef +} + +define i32 @urem_constpow2() { +; FAST-LABEL: 'urem_constpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'urem_constpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'urem_constpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = urem i64 undef, 16 + %V2i64 = urem <2 x i64> undef, + %V4i64 = urem <4 x i64> undef, + %V8i64 = urem <8 x i64> undef, + + %I32 = urem i32 undef, 16 + %V4i32 = urem <4 x i32> undef, + %V8i32 = urem <8 x i32> undef, + %V16i32 = urem <16 x i32> undef, + + %I16 = urem i16 undef, 16 + %V8i16 = urem <8 x i16> undef, + %V16i16 = urem <16 x i16> undef, + %V32i16 = urem <32 x i16> undef, + + %I8 = urem i8 undef, 16 + %V16i8 = urem <16 x i8> undef, + %V32i8 = urem <32 x i8> undef, + %V64i8 = urem <64 x i8> undef, + + ret i32 undef +} + +define i32 @srem_uniformconstpow2() { +; FAST-LABEL: 'srem_uniformconstpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'srem_uniformconstpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'srem_uniformconstpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = srem i64 undef, 16 + %V2i64 = srem <2 x i64> undef, + %V4i64 = srem <4 x i64> undef, + %V8i64 = srem <8 x i64> undef, + + %I32 = srem i32 undef, 16 + %V4i32 = srem <4 x i32> undef, + %V8i32 = srem <8 x i32> undef, + %V16i32 = srem <16 x i32> undef, + + %I16 = srem i16 undef, 16 + %V8i16 = srem <8 x i16> undef, + %V16i16 = srem <16 x i16> undef, + %V32i16 = srem <32 x i16> undef, + + %I8 = srem i8 undef, 16 + %V16i8 = srem <16 x i8> undef, + %V32i8 = srem <32 x i8> undef, + %V64i8 = srem <64 x i8> undef, + + ret i32 undef +} + +define i32 @urem_uniformconstpow2() { +; FAST-LABEL: 'urem_uniformconstpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'urem_uniformconstpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, 16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'urem_uniformconstpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = urem i64 undef, 16 + %V2i64 = urem <2 x i64> undef, + %V4i64 = urem <4 x i64> undef, + %V8i64 = urem <8 x i64> undef, + + %I32 = urem i32 undef, 16 + %V4i32 = urem <4 x i32> undef, + %V8i32 = urem <8 x i32> undef, + %V16i32 = urem <16 x i32> undef, + + %I16 = urem i16 undef, 16 + %V8i16 = urem <8 x i16> undef, + %V16i16 = urem <16 x i16> undef, + %V32i16 = urem <32 x i16> undef, + + %I8 = urem i8 undef, 16 + %V16i8 = urem <16 x i8> undef, + %V32i8 = urem <32 x i8> undef, + %V64i8 = urem <64 x i8> undef, + + ret i32 undef +} + +define i32 @srem_constnegpow2() { +; FAST-LABEL: 'srem_constnegpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'srem_constnegpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'srem_constnegpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = srem i64 undef, -16 + %V2i64 = srem <2 x i64> undef, + %V4i64 = srem <4 x i64> undef, + %V8i64 = srem <8 x i64> undef, + + %I32 = srem i32 undef, -16 + %V4i32 = srem <4 x i32> undef, + %V8i32 = srem <8 x i32> undef, + %V16i32 = srem <16 x i32> undef, + + %I16 = srem i16 undef, -16 + %V8i16 = srem <8 x i16> undef, + %V16i16 = srem <16 x i16> undef, + %V32i16 = srem <32 x i16> undef, + + %I8 = srem i8 undef, -16 + %V16i8 = srem <16 x i8> undef, + %V32i8 = srem <32 x i8> undef, + %V64i8 = srem <64 x i8> undef, + + ret i32 undef +} + +define i32 @urem_constnegpow2() { +; FAST-LABEL: 'urem_constnegpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'urem_constnegpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'urem_constnegpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = urem i64 undef, -16 + %V2i64 = urem <2 x i64> undef, + %V4i64 = urem <4 x i64> undef, + %V8i64 = urem <8 x i64> undef, + + %I32 = urem i32 undef, -16 + %V4i32 = urem <4 x i32> undef, + %V8i32 = urem <8 x i32> undef, + %V16i32 = urem <16 x i32> undef, + + %I16 = urem i16 undef, -16 + %V8i16 = urem <8 x i16> undef, + %V16i16 = urem <16 x i16> undef, + %V32i16 = urem <32 x i16> undef, + + %I8 = urem i8 undef, -16 + %V16i8 = urem <16 x i8> undef, + %V32i8 = urem <32 x i8> undef, + %V64i8 = urem <64 x i8> undef, + + ret i32 undef +} + +define i32 @srem_uniformconstnegpow2() { +; FAST-LABEL: 'srem_uniformconstnegpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'srem_uniformconstnegpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'srem_uniformconstnegpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = srem i64 undef, -16 + %V2i64 = srem <2 x i64> undef, + %V4i64 = srem <4 x i64> undef, + %V8i64 = srem <8 x i64> undef, + + %I32 = srem i32 undef, -16 + %V4i32 = srem <4 x i32> undef, + %V8i32 = srem <8 x i32> undef, + %V16i32 = srem <16 x i32> undef, + + %I16 = srem i16 undef, -16 + %V8i16 = srem <8 x i16> undef, + %V16i16 = srem <16 x i16> undef, + %V32i16 = srem <32 x i16> undef, + + %I8 = srem i8 undef, -16 + %V16i8 = srem <16 x i8> undef, + %V32i8 = srem <32 x i8> undef, + %V64i8 = srem <64 x i8> undef, + + ret i32 undef +} + +define i32 @urem_uniformconstnegpow2() { +; FAST-LABEL: 'urem_uniformconstnegpow2' +; FAST-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, -16 +; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, +; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; SLOW-LABEL: 'urem_uniformconstnegpow2' +; SLOW-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, -16 +; SLOW-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, +; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef +; +; ALL-SIZE-LABEL: 'urem_uniformconstnegpow2' +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, -16 +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, +; ALL-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef +; + %I64 = urem i64 undef, -16 + %V2i64 = urem <2 x i64> undef, + %V4i64 = urem <4 x i64> undef, + %V8i64 = urem <8 x i64> undef, + + %I32 = urem i32 undef, -16 + %V4i32 = urem <4 x i32> undef, + %V8i32 = urem <8 x i32> undef, + %V16i32 = urem <16 x i32> undef, + + %I16 = urem i16 undef, -16 + %V8i16 = urem <8 x i16> undef, + %V16i16 = urem <16 x i16> undef, + %V32i16 = urem <32 x i16> undef, + + %I8 = urem i8 undef, -16 + %V16i8 = urem <16 x i8> undef, + %V32i8 = urem <32 x i8> undef, + %V64i8 = urem <64 x i8> undef, + + ret i32 undef +} From 79abf89303757cbd5a273525a4b922b3fefd2a88 Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Wed, 22 Dec 2021 11:24:02 -0800 Subject: [PATCH 072/293] [libc][obvious] fix formatting mistake I missed two instances of "SetUp" being replaced by "set_up" and "TearDown" being replaced by "tear_down" when finalizing the formatting change. This fixes that. Differential Revision: https://reviews.llvm.org/D116178 --- libc/benchmarks/LibcBenchmarkTest.cpp | 4 ++-- libc/test/src/signal/sigprocmask_test.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libc/benchmarks/LibcBenchmarkTest.cpp b/libc/benchmarks/LibcBenchmarkTest.cpp index e4afab531fef..c9b21e48e8f7 100644 --- a/libc/benchmarks/LibcBenchmarkTest.cpp +++ b/libc/benchmarks/LibcBenchmarkTest.cpp @@ -50,9 +50,9 @@ class LibcBenchmark : public ::testing::Test { } protected: - void set_up() override { Options.Log = BenchmarkLog::Full; } + void SetUp() override { Options.Log = BenchmarkLog::Full; } - void tear_down() override { + void TearDown() override { // We make sure all the expected measurements were performed. if (MaybeTimepoints) EXPECT_THAT(*MaybeTimepoints, IsEmpty()); diff --git a/libc/test/src/signal/sigprocmask_test.cpp b/libc/test/src/signal/sigprocmask_test.cpp index 8fe3f03b6cc8..e6463627ea9e 100644 --- a/libc/test/src/signal/sigprocmask_test.cpp +++ b/libc/test/src/signal/sigprocmask_test.cpp @@ -21,9 +21,9 @@ class LlvmLibcSignalTest : public __llvm_libc::testing::Test { sigset_t oldSet; public: - void set_up() override { __llvm_libc::sigprocmask(0, nullptr, &oldSet); } + void SetUp() override { __llvm_libc::sigprocmask(0, nullptr, &oldSet); } - void tear_down() override { + void TearDown() override { __llvm_libc::sigprocmask(SIG_SETMASK, &oldSet, nullptr); } }; From ad761f0c3961a09dffd3e7eaea82cd331db9184f Mon Sep 17 00:00:00 2001 From: Mogball Date: Wed, 22 Dec 2021 19:26:23 +0000 Subject: [PATCH 073/293] [mlir] Update BUILD.bazel to include `scf_tests` --- .../mlir/unittests/BUILD.bazel | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel index 095841a60e94..ff22e88a10a4 100644 --- a/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel @@ -104,6 +104,21 @@ cc_test( ], ) +cc_test( + name = "scf_tests", + size = "small", + srcs = glob([ + "Dialect/SCF/*.cpp", + "Dialect/SCF/*.h", + ]), + deps = [ + "//llvm:gtest_main", + "//mlir:Parser", + "//mlir:SCFDialect", + "//mlir:StandardOps", + ], +) + cc_test( name = "sparse_tensor_tests", size = "small", From df590567aa435211dea1f306b55b8d4ba92ddbb1 Mon Sep 17 00:00:00 2001 From: Marianne Mailhot-Sarrasin Date: Wed, 22 Dec 2021 14:39:15 -0500 Subject: [PATCH 074/293] [DSE] Add test case showing bug PR52774. Pre-commiting the test case before the bug fix. Reviewed by: Florian Hahn Differential revision: https://reviews.llvm.org/D115965 --- .../DeadStoreElimination/store-after-loop.ll | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 llvm/test/Transforms/DeadStoreElimination/store-after-loop.ll diff --git a/llvm/test/Transforms/DeadStoreElimination/store-after-loop.ll b/llvm/test/Transforms/DeadStoreElimination/store-after-loop.ll new file mode 100644 index 000000000000..27c7c139acb1 --- /dev/null +++ b/llvm/test/Transforms/DeadStoreElimination/store-after-loop.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=dse -S | FileCheck %s + +target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S32" + +%struct.ilist = type { i32, %struct.ilist* } + +; There is no dead store in this test. Make sure no store is deleted by DSE. +; Test case related to bug report PR52774. +; NOTE: Showing actual (broken) DSE behavior. + +define %struct.ilist* @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[LIST_NEXT:%.*]] = phi %struct.ilist* [ null, [[TMP0]] ], [ [[LIST_NEW_ILIST_PTR:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[LIST_NEW_I8_PTR:%.*]] = tail call align 8 dereferenceable_or_null(8) i8* @malloc(i32 8) +; CHECK-NEXT: [[LIST_NEW_ILIST_PTR]] = bitcast i8* [[LIST_NEW_I8_PTR]] to %struct.ilist* +; CHECK-NEXT: [[GEP_NEW_VALUE:%.*]] = getelementptr inbounds [[STRUCT_ILIST:%.*]], %struct.ilist* [[LIST_NEW_ILIST_PTR]], i32 0, i32 0 +; CHECK-NEXT: store i32 42, i32* [[GEP_NEW_VALUE]], align 8 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], 10 +; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: [[LIST_LAST_ILIST_PTR:%.*]] = bitcast i8* [[LIST_NEW_I8_PTR]] to %struct.ilist* +; CHECK-NEXT: [[GEP_LIST_LAST_NEXT:%.*]] = getelementptr inbounds [[STRUCT_ILIST]], %struct.ilist* [[LIST_LAST_ILIST_PTR]], i32 0, i32 1 +; CHECK-NEXT: store %struct.ilist* null, %struct.ilist** [[GEP_LIST_LAST_NEXT]], align 4 +; CHECK-NEXT: [[GEP_LIST_NEXT_NEXT:%.*]] = getelementptr inbounds [[STRUCT_ILIST]], %struct.ilist* [[LIST_NEXT]], i32 0, i32 1 +; CHECK-NEXT: [[LOADED_PTR:%.*]] = load %struct.ilist*, %struct.ilist** [[GEP_LIST_NEXT_NEXT]], align 4 +; CHECK-NEXT: ret %struct.ilist* [[LOADED_PTR]] +; + br label %loop + +loop: + %iv = phi i32 [ 0, %0 ], [ %iv.next, %loop ] + %list.next = phi %struct.ilist* [ null, %0 ], [ %list.new.ilist.ptr, %loop ] + %list.new.i8.ptr = tail call align 8 dereferenceable_or_null(8) i8* @malloc(i32 8) + %list.new.ilist.ptr = bitcast i8* %list.new.i8.ptr to %struct.ilist* + %gep.new.value = getelementptr inbounds %struct.ilist, %struct.ilist* %list.new.ilist.ptr, i32 0, i32 0 + store i32 42, i32* %gep.new.value, align 8 + %gep.new.next = getelementptr inbounds %struct.ilist, %struct.ilist* %list.new.ilist.ptr, i32 0, i32 1 + store %struct.ilist* %list.next, %struct.ilist** %gep.new.next, align 4 + %iv.next = add nuw nsw i32 %iv, 1 + %cond = icmp eq i32 %iv.next, 10 + br i1 %cond, label %exit, label %loop + +exit: + %list.last.ilist.ptr = bitcast i8* %list.new.i8.ptr to %struct.ilist* + %gep.list.last.next = getelementptr inbounds %struct.ilist, %struct.ilist* %list.last.ilist.ptr, i32 0, i32 1 + store %struct.ilist* null, %struct.ilist** %gep.list.last.next, align 4 + %gep.list.next.next = getelementptr inbounds %struct.ilist, %struct.ilist* %list.next, i32 0, i32 1 + %loaded_ptr = load %struct.ilist*, %struct.ilist** %gep.list.next.next, align 4 + ret %struct.ilist* %loaded_ptr ; use loaded pointer +} + +; Function Attrs: inaccessiblememonly nounwind +declare noalias noundef align 8 i8* @malloc(i32 noundef) local_unnamed_addr #0 + +attributes #0 = { inaccessiblememonly nounwind} From 7347c28def5637d3b44c6980383437672f43fdf5 Mon Sep 17 00:00:00 2001 From: Mogball Date: Wed, 22 Dec 2021 20:00:00 +0000 Subject: [PATCH 075/293] [mlir] Add missing unit tests to BUILD.bazel Several unit test folders were missing from the bazel build, including: Transforms, Conversion, Rewrite --- mlir/unittests/Analysis/CMakeLists.txt | 2 +- .../mlir/unittests/BUILD.bazel | 43 +++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) diff --git a/mlir/unittests/Analysis/CMakeLists.txt b/mlir/unittests/Analysis/CMakeLists.txt index 3346426fa036..b6340ec72812 100644 --- a/mlir/unittests/Analysis/CMakeLists.txt +++ b/mlir/unittests/Analysis/CMakeLists.txt @@ -7,7 +7,7 @@ add_mlir_unittest(MLIRAnalysisTests ) target_link_libraries(MLIRAnalysisTests - PRIVATE + PRIVATE MLIRLoopAnalysis MLIRParser ) diff --git a/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel index ff22e88a10a4..5ee06e75f435 100644 --- a/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel @@ -75,6 +75,20 @@ cc_test( ], ) +cc_test( + name = "rewrite_tests", + size = "small", + srcs = glob([ + "Rewrite/*.cpp", + "Rewrite/*.h", + ]), + deps = [ + "//llvm:gtest_main", + "//mlir:IR", + "//mlir:Rewrite", + ], +) + cc_test( name = "dialect_tests", size = "small", @@ -243,6 +257,19 @@ cc_test( ], ) +cc_test( + name = "transforms_test", + size = "small", + srcs = glob([ + "Transforms/*.cpp", + "Transforms/*.h", + ]), + deps = [ + "//llvm:gtest_main", + "//mlir:TransformUtils", + ], +) + cc_test( name = "analysis_tests", size = "small", @@ -260,6 +287,22 @@ cc_test( ], ) +cc_test( + name = "conversion_tests", + size = "small", + srcs = glob([ + "Conversion/*.cpp", + "Conversion/*.h", + "Conversion/*/*.cpp", + "Conversion/*/*.h", + ]), + deps = [ + "//llvm:gtest_main", + "//mlir:ArithmeticDialect", + "//mlir:PDLToPDLInterp", + ], +) + cc_test( name = "execution_engine_tests", size = "small", From 86618e37bded924d0a7a06ef0818bb9b2311532d Mon Sep 17 00:00:00 2001 From: Salman Javed Date: Thu, 23 Dec 2021 09:31:45 +1300 Subject: [PATCH 076/293] Resolve lint warning about converting unsigned to signed (NFC) FileOffset is unsigned while getLocWithOffset() requires a signed value. --- clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp index b09079b5e8ba..6c5054fdca28 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp @@ -193,7 +193,8 @@ DiagnosticBuilder ClangTidyContext::diag(const ClangTidyError &Error) { SM.getFileManager().getFile(Error.Message.FilePath); FileID ID = SM.getOrCreateFileID(*File, SrcMgr::C_User); SourceLocation FileStartLoc = SM.getLocForStartOfFile(ID); - SourceLocation Loc = FileStartLoc.getLocWithOffset(Error.Message.FileOffset); + SourceLocation Loc = FileStartLoc.getLocWithOffset( + static_cast(Error.Message.FileOffset)); return diag(Error.DiagnosticName, Loc, Error.Message.Message, static_cast(Error.DiagLevel)); } From dcb3e8083a3229b44e23d5d1c19453f0777f3aa2 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 22 Dec 2021 11:29:36 -0800 Subject: [PATCH 077/293] [Hexagon] Make conversions to vector predicate types explicit for builtins HVX does not have load/store instructions for vector predicates (i.e. bool vectors). Because of that, vector predicates need to be converted to another type before being stored, and the most convenient representation is an HVX vector. As a consequence, in C/C++, source-level builtins that either take or produce vector predicates take or return regular vectors instead. On the other hand, the corresponding LLVM intrinsics do have boolean types that, and so a conversion of the operand or the return value was necessary. This conversion would happen inside clang's codegen, but was somewhat fragile. This patch changes the strategy: a builtin that takes a vector predicate now really expects a vector predicate. Since such a predicate cannot be provided via a variable, this builtin must be composed with other builtins that either convert vector to a predicate (V6_vandvrt) or predicate to a vector (V6_vandqrt). For users using builtins defined in hvx_hexagon_protos.h there is no impact: the conversions were added to that file. Other users will need to insert - __builtin_HEXAGON_V6_vandvrt[_128B](V, -1) to convert vector V to a vector predicate, or - __builtin_HEXAGON_V6_vandqrt[_128B](Q, -1) to convert vector predicate Q to a vector. Builtins __builtin_HEXAGON_V6_vmaskedstore.* are a temporary exception to that, but they are deprecated and should not be used anyway. In the future they will either follow the same rule, or be removed. --- .../Basic/BuiltinsHexagonMapCustomDep.def | 192 ----- clang/lib/CodeGen/CGBuiltin.cpp | 56 +- clang/lib/Headers/hvx_hexagon_protos.h | 795 +++++++++--------- .../test/CodeGen/builtins-hexagon-v66-128B.c | 2 +- clang/test/CodeGen/builtins-hexagon-v66.c | 2 +- clang/test/CodeGen/builtins-hvx128.c | 157 ++-- clang/test/CodeGen/builtins-hvx64.c | 157 ++-- llvm/include/llvm/IR/IntrinsicsHexagonDep.td | 571 ++++++------- 8 files changed, 849 insertions(+), 1083 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsHexagonMapCustomDep.def b/clang/include/clang/Basic/BuiltinsHexagonMapCustomDep.def index 93f560fc5adc..389eadb21d01 100644 --- a/clang/include/clang/Basic/BuiltinsHexagonMapCustomDep.def +++ b/clang/include/clang/Basic/BuiltinsHexagonMapCustomDep.def @@ -8,199 +8,7 @@ // Automatically generated file, do not edit! //===----------------------------------------------------------------------===// -CUSTOM_BUILTIN_MAPPING(A2_add, 0) -CUSTOM_BUILTIN_MAPPING(A2_addi, 0) -CUSTOM_BUILTIN_MAPPING(A2_addp, 0) -CUSTOM_BUILTIN_MAPPING(A2_and, 0) -CUSTOM_BUILTIN_MAPPING(A2_andir, 0) -CUSTOM_BUILTIN_MAPPING(A2_neg, 0) -CUSTOM_BUILTIN_MAPPING(A2_not, 0) -CUSTOM_BUILTIN_MAPPING(A2_or, 0) -CUSTOM_BUILTIN_MAPPING(A2_orir, 0) -CUSTOM_BUILTIN_MAPPING(A2_sub, 0) -CUSTOM_BUILTIN_MAPPING(A2_subp, 0) -CUSTOM_BUILTIN_MAPPING(A2_subri, 0) -CUSTOM_BUILTIN_MAPPING(A2_sxtb, 0) -CUSTOM_BUILTIN_MAPPING(A2_sxth, 0) -CUSTOM_BUILTIN_MAPPING(A2_xor, 0) -CUSTOM_BUILTIN_MAPPING(A2_zxtb, 0) -CUSTOM_BUILTIN_MAPPING(A2_zxth, 0) -CUSTOM_BUILTIN_MAPPING(M2_dpmpyss_s0, 0) -CUSTOM_BUILTIN_MAPPING(M2_dpmpyuu_s0, 0) -CUSTOM_BUILTIN_MAPPING(M2_mpyi, 0) -CUSTOM_BUILTIN_MAPPING(M2_mpysmi, 0) -CUSTOM_BUILTIN_MAPPING(M2_mpyui, 0) -CUSTOM_BUILTIN_MAPPING(S2_asl_i_p, 0) -CUSTOM_BUILTIN_MAPPING(S2_asl_i_r, 0) -CUSTOM_BUILTIN_MAPPING(S2_asr_i_p, 0) -CUSTOM_BUILTIN_MAPPING(S2_asr_i_r, 0) -CUSTOM_BUILTIN_MAPPING(S2_lsr_i_p, 0) -CUSTOM_BUILTIN_MAPPING(S2_lsr_i_r, 0) -CUSTOM_BUILTIN_MAPPING(V6_pred_and, 64) -CUSTOM_BUILTIN_MAPPING(V6_pred_and_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_pred_and_n, 64) -CUSTOM_BUILTIN_MAPPING(V6_pred_and_n_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_pred_not, 64) -CUSTOM_BUILTIN_MAPPING(V6_pred_not_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_pred_or, 64) -CUSTOM_BUILTIN_MAPPING(V6_pred_or_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_pred_or_n, 64) -CUSTOM_BUILTIN_MAPPING(V6_pred_or_n_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_pred_scalar2, 64) -CUSTOM_BUILTIN_MAPPING(V6_pred_scalar2_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_pred_xor, 64) -CUSTOM_BUILTIN_MAPPING(V6_pred_xor_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vS32b_nqpred_ai, 64) -CUSTOM_BUILTIN_MAPPING(V6_vS32b_nqpred_ai_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vS32b_nt_nqpred_ai, 64) -CUSTOM_BUILTIN_MAPPING(V6_vS32b_nt_nqpred_ai_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vS32b_nt_qpred_ai, 64) -CUSTOM_BUILTIN_MAPPING(V6_vS32b_nt_qpred_ai_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vS32b_qpred_ai, 64) -CUSTOM_BUILTIN_MAPPING(V6_vS32b_qpred_ai_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vaddbnq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vaddbnq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vaddbq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vaddbq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vaddhnq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vaddhnq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vaddhq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vaddhq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vaddwnq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vaddwnq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vaddwq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vaddwq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vandqrt, 64) -CUSTOM_BUILTIN_MAPPING(V6_vandqrt_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vandqrt_acc, 64) -CUSTOM_BUILTIN_MAPPING(V6_vandqrt_acc_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vandvrt, 64) -CUSTOM_BUILTIN_MAPPING(V6_vandvrt_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vandvrt_acc, 64) -CUSTOM_BUILTIN_MAPPING(V6_vandvrt_acc_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqb, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqb_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqb_and, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqb_and_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqb_or, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqb_or_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqb_xor, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqb_xor_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqh, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqh_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqh_and, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqh_and_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqh_or, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqh_or_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqh_xor, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqh_xor_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqw, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqw_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqw_and, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqw_and_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqw_or, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqw_or_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_veqw_xor, 64) -CUSTOM_BUILTIN_MAPPING(V6_veqw_xor_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtb, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtb_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtb_and, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtb_and_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtb_or, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtb_or_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtb_xor, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtb_xor_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgth, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgth_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgth_and, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgth_and_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgth_or, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgth_or_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgth_xor, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgth_xor_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtub, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtub_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtub_and, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtub_and_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtub_or, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtub_or_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtub_xor, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtub_xor_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtuh, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtuh_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtuh_and, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtuh_and_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtuh_or, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtuh_or_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtuh_xor, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtuh_xor_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtuw, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtuw_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtuw_and, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtuw_and_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtuw_or, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtuw_or_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtuw_xor, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtuw_xor_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtw, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtw_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtw_and, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtw_and_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtw_or, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtw_or_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgtw_xor, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgtw_xor_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vmux, 64) -CUSTOM_BUILTIN_MAPPING(V6_vmux_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vsubbnq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vsubbnq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vsubbq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vsubbq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vsubhnq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vsubhnq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vsubhq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vsubhq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vsubwnq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vsubwnq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vsubwq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vsubwq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vswap, 64) -CUSTOM_BUILTIN_MAPPING(V6_vswap_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_pred_scalar2v2, 64) -CUSTOM_BUILTIN_MAPPING(V6_pred_scalar2v2_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_shuffeqh, 64) -CUSTOM_BUILTIN_MAPPING(V6_shuffeqh_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_shuffeqw, 64) -CUSTOM_BUILTIN_MAPPING(V6_shuffeqw_128B, 128) CUSTOM_BUILTIN_MAPPING(V6_vaddcarry, 64) CUSTOM_BUILTIN_MAPPING(V6_vaddcarry_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vandnqrt, 64) -CUSTOM_BUILTIN_MAPPING(V6_vandnqrt_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vandnqrt_acc, 64) -CUSTOM_BUILTIN_MAPPING(V6_vandnqrt_acc_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vandvnqv, 64) -CUSTOM_BUILTIN_MAPPING(V6_vandvnqv_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vandvqv, 64) -CUSTOM_BUILTIN_MAPPING(V6_vandvqv_128B, 128) CUSTOM_BUILTIN_MAPPING(V6_vsubcarry, 64) CUSTOM_BUILTIN_MAPPING(V6_vsubcarry_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgathermhq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgathermhq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgathermhwq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgathermhwq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vgathermwq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vgathermwq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vprefixqb, 64) -CUSTOM_BUILTIN_MAPPING(V6_vprefixqb_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vprefixqh, 64) -CUSTOM_BUILTIN_MAPPING(V6_vprefixqh_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vprefixqw, 64) -CUSTOM_BUILTIN_MAPPING(V6_vprefixqw_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vscattermhq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vscattermhq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vscattermhwq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vscattermhwq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vscattermwq, 64) -CUSTOM_BUILTIN_MAPPING(V6_vscattermwq_128B, 128) -CUSTOM_BUILTIN_MAPPING(V6_vaddcarrysat, 64) -CUSTOM_BUILTIN_MAPPING(V6_vaddcarrysat_128B, 128) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 7224b10f2e90..1982b40ff667 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -18596,6 +18596,7 @@ getIntrinsicForHexagonNonGCCBuiltin(unsigned BuiltinID) { CUSTOM_BUILTIN_MAPPING(S2_storerf_pcr, 0) CUSTOM_BUILTIN_MAPPING(S2_storeri_pcr, 0) CUSTOM_BUILTIN_MAPPING(S2_storerd_pcr, 0) + // Legacy builtins that take a vector in place of a vector predicate. CUSTOM_BUILTIN_MAPPING(V6_vmaskedstoreq, 64) CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorenq, 64) CUSTOM_BUILTIN_MAPPING(V6_vmaskedstorentq, 64) @@ -18733,6 +18734,27 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, return Builder.CreateExtractValue(Result, 0); } + case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq: + case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq: + case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq: + case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq: + case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstoreq_128B: + case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorenq_128B: + case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentq_128B: + case Hexagon::BI__builtin_HEXAGON_V6_vmaskedstorentnq_128B: { + SmallVector Ops; + const Expr *PredOp = E->getArg(0); + // There will be an implicit cast to a boolean vector. Strip it. + if (auto *Cast = dyn_cast(PredOp)) { + if (Cast->getCastKind() == CK_BitCast) + PredOp = Cast->getSubExpr(); + Ops.push_back(V2Q(EmitScalarExpr(PredOp))); + } + for (int i = 1, e = E->getNumArgs(); i != e; ++i) + Ops.push_back(EmitScalarExpr(E->getArg(i))); + return Builder.CreateCall(CGM.getIntrinsic(ID), Ops); + } + case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci: case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci: case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci: @@ -18769,40 +18791,6 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID, return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty); case Hexagon::BI__builtin_brev_ldd: return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty); - - default: { - if (ID == Intrinsic::not_intrinsic) - return nullptr; - - auto IsVectorPredTy = [](llvm::Type *T) { - return T->isVectorTy() && - cast(T)->getElementType()->isIntegerTy(1); - }; - - llvm::Function *IntrFn = CGM.getIntrinsic(ID); - llvm::FunctionType *IntrTy = IntrFn->getFunctionType(); - SmallVector Ops; - for (unsigned i = 0, e = IntrTy->getNumParams(); i != e; ++i) { - llvm::Type *T = IntrTy->getParamType(i); - const Expr *A = E->getArg(i); - if (IsVectorPredTy(T)) { - // There will be an implicit cast to a boolean vector. Strip it. - if (auto *Cast = dyn_cast(A)) { - if (Cast->getCastKind() == CK_BitCast) - A = Cast->getSubExpr(); - } - Ops.push_back(V2Q(EmitScalarExpr(A))); - } else { - Ops.push_back(EmitScalarExpr(A)); - } - } - - llvm::Value *Call = Builder.CreateCall(IntrFn, Ops); - if (IsVectorPredTy(IntrTy->getReturnType())) - Call = Q2V(Call); - - return Call; - } // default } // switch return nullptr; diff --git a/clang/lib/Headers/hvx_hexagon_protos.h b/clang/lib/Headers/hvx_hexagon_protos.h index 41ce7a6b93e9..dab19949b889 100644 --- a/clang/lib/Headers/hvx_hexagon_protos.h +++ b/clang/lib/Headers/hvx_hexagon_protos.h @@ -9,7 +9,6 @@ //===----------------------------------------------------------------------===// - #ifndef _HVX_HEXAGON_PROTOS_H_ #define _HVX_HEXAGON_PROTOS_H_ 1 @@ -28,7 +27,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_R_vextract_VR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_extractw) +#define Q6_R_vextract_VR(Vu,Rs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_extractw)(Vu,Rs) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -39,7 +38,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_hi_W __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_hi) +#define Q6_V_hi_W(Vss) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_hi)(Vss) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -50,7 +49,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_lo_W __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lo) +#define Q6_V_lo_W(Vss) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lo)(Vss) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -61,7 +60,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_V_vsplat_R __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplatw) +#define Q6_V_vsplat_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplatw)(Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -72,7 +71,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_and_QQ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_and) +#define Q6_Q_and_QQ(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -83,7 +82,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_and_QQn __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_and_n) +#define Q6_Q_and_QQn(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_and_n)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -94,7 +93,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_not_Q __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_not) +#define Q6_Q_not_Q(Qs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_not)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -105,7 +104,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_or_QQ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_or) +#define Q6_Q_or_QQ(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -116,7 +115,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_or_QQn __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_or_n) +#define Q6_Q_or_QQn(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_or_n)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -127,7 +126,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vsetq_R __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_scalar2) +#define Q6_Q_vsetq_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_scalar2)(Rt)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -138,7 +137,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_xor_QQ __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_xor) +#define Q6_Q_xor_QQ(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -149,7 +148,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vmem_QnRIV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nqpred_ai) +#define Q6_vmem_QnRIV(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nqpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -160,7 +159,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vmem_QnRIV_nt __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai) +#define Q6_vmem_QnRIV_nt(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nt_nqpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -171,7 +170,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vmem_QRIV_nt __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai) +#define Q6_vmem_QRIV_nt(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_nt_qpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -182,7 +181,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vmem_QRIV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_qpred_ai) +#define Q6_vmem_QRIV(Qv,Rt,Vs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vS32b_qpred_ai)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Rt,Vs) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -193,7 +192,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vuh_vabsdiff_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffh) +#define Q6_Vuh_vabsdiff_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -204,7 +203,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vub_vabsdiff_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffub) +#define Q6_Vub_vabsdiff_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -215,7 +214,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vuh_vabsdiff_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffuh) +#define Q6_Vuh_vabsdiff_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -226,7 +225,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vuw_vabsdiff_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffw) +#define Q6_Vuw_vabsdiff_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsdiffw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -237,7 +236,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vabs_Vh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsh) +#define Q6_Vh_vabs_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -248,7 +247,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vabs_Vh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsh_sat) +#define Q6_Vh_vabs_Vh_sat(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsh_sat)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -259,7 +258,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vabs_Vw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsw) +#define Q6_Vw_vabs_Vw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsw)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -270,7 +269,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vabs_Vw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsw_sat) +#define Q6_Vw_vabs_Vw_sat(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsw_sat)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -281,7 +280,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vadd_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddb) +#define Q6_Vb_vadd_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -292,7 +291,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wb_vadd_WbWb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddb_dv) +#define Q6_Wb_vadd_WbWb(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddb_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -303,7 +302,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_condacc_QnVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbnq) +#define Q6_Vb_condacc_QnVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -314,7 +313,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_condacc_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbq) +#define Q6_Vb_condacc_QVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -325,7 +324,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vadd_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddh) +#define Q6_Vh_vadd_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -336,7 +335,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vadd_WhWh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddh_dv) +#define Q6_Wh_vadd_WhWh(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddh_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -347,7 +346,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_condacc_QnVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhnq) +#define Q6_Vh_condacc_QnVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -358,7 +357,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_condacc_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhq) +#define Q6_Vh_condacc_QVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -369,7 +368,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vadd_VhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhsat) +#define Q6_Vh_vadd_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -380,7 +379,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vadd_WhWh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhsat_dv) +#define Q6_Wh_vadd_WhWh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -391,7 +390,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vadd_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhw) +#define Q6_Ww_vadd_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -402,7 +401,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vadd_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubh) +#define Q6_Wh_vadd_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -413,7 +412,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vadd_VubVub_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubsat) +#define Q6_Vub_vadd_VubVub_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -424,7 +423,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wub_vadd_WubWub_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubsat_dv) +#define Q6_Wub_vadd_WubWub_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -435,7 +434,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vadd_VuhVuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhsat) +#define Q6_Vuh_vadd_VuhVuh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -446,7 +445,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wuh_vadd_WuhWuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhsat_dv) +#define Q6_Wuh_vadd_WuhWuh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -457,7 +456,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vadd_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhw) +#define Q6_Ww_vadd_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -468,7 +467,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vadd_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddw) +#define Q6_Vw_vadd_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -479,7 +478,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Ww_vadd_WwWw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddw_dv) +#define Q6_Ww_vadd_WwWw(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddw_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -490,7 +489,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_condacc_QnVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwnq) +#define Q6_Vw_condacc_QnVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -501,7 +500,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_condacc_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwq) +#define Q6_Vw_condacc_QVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -512,7 +511,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vadd_VwVw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwsat) +#define Q6_Vw_vadd_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -523,7 +522,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Ww_vadd_WwWw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwsat_dv) +#define Q6_Ww_vadd_WwWw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddwsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -534,7 +533,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_valign_VVR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valignb) +#define Q6_V_valign_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valignb)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -545,7 +544,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_valign_VVI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valignbi) +#define Q6_V_valign_VVI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_valignbi)(Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -556,7 +555,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vand_VV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vand) +#define Q6_V_vand_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vand)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -567,7 +566,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_V_vand_QR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt) +#define Q6_V_vand_QR(Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -578,7 +577,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_V_vandor_VQR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt_acc) +#define Q6_V_vandor_VQR(Vx,Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt_acc)(Vx,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -589,7 +588,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Q_vand_VR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt) +#define Q6_Q_vand_VR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)(Vu,Rt)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -600,7 +599,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Q_vandor_QVR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt_acc) +#define Q6_Q_vandor_QVR(Qx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt_acc)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Rt)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -611,7 +610,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vasl_VhR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslh) +#define Q6_Vh_vasl_VhR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -622,7 +621,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vasl_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslhv) +#define Q6_Vh_vasl_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslhv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -633,7 +632,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vasl_VwR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslw) +#define Q6_Vw_vasl_VwR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslw)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -644,7 +643,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vaslacc_VwVwR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslw_acc) +#define Q6_Vw_vaslacc_VwVwR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslw_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -655,7 +654,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vasl_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslwv) +#define Q6_Vw_vasl_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslwv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -666,7 +665,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vasr_VhR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrh) +#define Q6_Vh_vasr_VhR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -677,7 +676,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vasr_VhVhR_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhbrndsat) +#define Q6_Vb_vasr_VhVhR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhbrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -688,7 +687,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vasr_VhVhR_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhubrndsat) +#define Q6_Vub_vasr_VhVhR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhubrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -699,7 +698,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vasr_VhVhR_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhubsat) +#define Q6_Vub_vasr_VhVhR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhubsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -710,7 +709,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vasr_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhv) +#define Q6_Vh_vasr_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -721,7 +720,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vasr_VwR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrw) +#define Q6_Vw_vasr_VwR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrw)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -732,7 +731,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vasracc_VwVwR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrw_acc) +#define Q6_Vw_vasracc_VwVwR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrw_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -743,7 +742,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vasr_VwVwR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwh) +#define Q6_Vh_vasr_VwVwR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwh)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -754,7 +753,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vasr_VwVwR_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwhrndsat) +#define Q6_Vh_vasr_VwVwR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwhrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -765,7 +764,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vasr_VwVwR_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwhsat) +#define Q6_Vh_vasr_VwVwR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwhsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -776,7 +775,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vasr_VwVwR_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwuhsat) +#define Q6_Vuh_vasr_VwVwR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwuhsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -787,7 +786,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vasr_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwv) +#define Q6_Vw_vasr_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -798,7 +797,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_equals_V __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassign) +#define Q6_V_equals_V(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassign)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -809,7 +808,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_W_equals_W __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassignp) +#define Q6_W_equals_W(Vuu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassignp)(Vuu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -820,7 +819,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vavg_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgh) +#define Q6_Vh_vavg_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -831,7 +830,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vavg_VhVh_rnd __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavghrnd) +#define Q6_Vh_vavg_VhVh_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavghrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -842,7 +841,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vavg_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgub) +#define Q6_Vub_vavg_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -853,7 +852,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vavg_VubVub_rnd __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgubrnd) +#define Q6_Vub_vavg_VubVub_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgubrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -864,7 +863,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vavg_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguh) +#define Q6_Vuh_vavg_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -875,7 +874,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vavg_VuhVuh_rnd __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguhrnd) +#define Q6_Vuh_vavg_VuhVuh_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguhrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -886,7 +885,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vavg_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgw) +#define Q6_Vw_vavg_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -897,7 +896,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vavg_VwVw_rnd __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgwrnd) +#define Q6_Vw_vavg_VwVw_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgwrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -908,7 +907,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vcl0_Vuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcl0h) +#define Q6_Vuh_vcl0_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcl0h)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -919,7 +918,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuw_vcl0_Vuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcl0w) +#define Q6_Vuw_vcl0_Vuw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcl0w)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -930,7 +929,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_W_vcombine_VV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcombine) +#define Q6_W_vcombine_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcombine)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -941,7 +940,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vzero __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vd0) +#define Q6_V_vzero() __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vd0)() #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -952,7 +951,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vdeal_Vb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealb) +#define Q6_Vb_vdeal_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealb)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -963,7 +962,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vdeale_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealb4w) +#define Q6_Vb_vdeale_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealb4w)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -974,7 +973,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vdeal_Vh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealh) +#define Q6_Vh_vdeal_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -985,7 +984,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_W_vdeal_VVR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealvdd) +#define Q6_W_vdeal_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdealvdd)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -996,7 +995,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vdelta_VV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdelta) +#define Q6_V_vdelta_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdelta)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1007,7 +1006,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vh_vdmpy_VubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus) +#define Q6_Vh_vdmpy_VubRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1018,7 +1017,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vh_vdmpyacc_VhVubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_acc) +#define Q6_Vh_vdmpyacc_VhVubRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1029,7 +1028,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vdmpy_WubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_dv) +#define Q6_Wh_vdmpy_WubRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_dv)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1040,7 +1039,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vdmpyacc_WhWubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_dv_acc) +#define Q6_Wh_vdmpyacc_WhWubRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpybus_dv_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1051,7 +1050,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpy_VhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb) +#define Q6_Vw_vdmpy_VhRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1062,7 +1061,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpyacc_VwVhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_acc) +#define Q6_Vw_vdmpyacc_VwVhRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1073,7 +1072,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vdmpy_WhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_dv) +#define Q6_Ww_vdmpy_WhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_dv)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1084,7 +1083,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vdmpyacc_WwWhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_dv_acc) +#define Q6_Ww_vdmpyacc_WwWhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhb_dv_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1095,7 +1094,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpy_WhRh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhisat) +#define Q6_Vw_vdmpy_WhRh_sat(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhisat)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1106,7 +1105,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpyacc_VwWhRh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhisat_acc) +#define Q6_Vw_vdmpyacc_VwWhRh_sat(Vx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhisat_acc)(Vx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1117,7 +1116,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpy_VhRh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsat) +#define Q6_Vw_vdmpy_VhRh_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsat)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1128,7 +1127,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpyacc_VwVhRh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsat_acc) +#define Q6_Vw_vdmpyacc_VwVhRh_sat(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsat_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1139,7 +1138,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpy_WhRuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsuisat) +#define Q6_Vw_vdmpy_WhRuh_sat(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsuisat)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1150,7 +1149,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpyacc_VwWhRuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsuisat_acc) +#define Q6_Vw_vdmpyacc_VwWhRuh_sat(Vx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsuisat_acc)(Vx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1161,7 +1160,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpy_VhRuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsusat) +#define Q6_Vw_vdmpy_VhRuh_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsusat)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1172,7 +1171,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpyacc_VwVhRuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsusat_acc) +#define Q6_Vw_vdmpyacc_VwVhRuh_sat(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhsusat_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1183,7 +1182,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpy_VhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhvsat) +#define Q6_Vw_vdmpy_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhvsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1194,7 +1193,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vdmpyacc_VwVhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhvsat_acc) +#define Q6_Vw_vdmpyacc_VwVhVh_sat(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpyhvsat_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1205,7 +1204,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuw_vdsad_WuhRuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdsaduh) +#define Q6_Wuw_vdsad_WuhRuh(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdsaduh)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1216,7 +1215,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuw_vdsadacc_WuwWuhRuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdsaduh_acc) +#define Q6_Wuw_vdsadacc_WuwWuhRuh(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdsaduh_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1227,7 +1226,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eq_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb) +#define Q6_Q_vcmp_eq_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1238,7 +1237,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eqand_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_and) +#define Q6_Q_vcmp_eqand_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1249,7 +1248,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eqor_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_or) +#define Q6_Q_vcmp_eqor_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1260,7 +1259,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eqxacc_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_xor) +#define Q6_Q_vcmp_eqxacc_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqb_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1271,7 +1270,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eq_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh) +#define Q6_Q_vcmp_eq_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1282,7 +1281,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eqand_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_and) +#define Q6_Q_vcmp_eqand_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1293,7 +1292,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eqor_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_or) +#define Q6_Q_vcmp_eqor_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1304,7 +1303,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eqxacc_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_xor) +#define Q6_Q_vcmp_eqxacc_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqh_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1315,7 +1314,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eq_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw) +#define Q6_Q_vcmp_eq_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1326,7 +1325,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eqand_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_and) +#define Q6_Q_vcmp_eqand_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1337,7 +1336,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eqor_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_or) +#define Q6_Q_vcmp_eqor_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1348,7 +1347,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_eqxacc_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_xor) +#define Q6_Q_vcmp_eqxacc_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_veqw_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1359,7 +1358,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gt_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb) +#define Q6_Q_vcmp_gt_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1370,7 +1369,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtand_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_and) +#define Q6_Q_vcmp_gtand_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1381,7 +1380,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtor_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_or) +#define Q6_Q_vcmp_gtor_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1392,7 +1391,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtxacc_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_xor) +#define Q6_Q_vcmp_gtxacc_QVbVb(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtb_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1403,7 +1402,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gt_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth) +#define Q6_Q_vcmp_gt_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1414,7 +1413,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtand_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_and) +#define Q6_Q_vcmp_gtand_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1425,7 +1424,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtor_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_or) +#define Q6_Q_vcmp_gtor_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1436,7 +1435,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtxacc_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_xor) +#define Q6_Q_vcmp_gtxacc_QVhVh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgth_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1447,7 +1446,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gt_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub) +#define Q6_Q_vcmp_gt_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1458,7 +1457,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtand_QVubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_and) +#define Q6_Q_vcmp_gtand_QVubVub(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1469,7 +1468,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtor_QVubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_or) +#define Q6_Q_vcmp_gtor_QVubVub(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1480,7 +1479,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtxacc_QVubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_xor) +#define Q6_Q_vcmp_gtxacc_QVubVub(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtub_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1491,7 +1490,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gt_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh) +#define Q6_Q_vcmp_gt_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1502,7 +1501,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtand_QVuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_and) +#define Q6_Q_vcmp_gtand_QVuhVuh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1513,7 +1512,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtor_QVuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_or) +#define Q6_Q_vcmp_gtor_QVuhVuh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1524,7 +1523,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtxacc_QVuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_xor) +#define Q6_Q_vcmp_gtxacc_QVuhVuh(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuh_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1535,7 +1534,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gt_VuwVuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw) +#define Q6_Q_vcmp_gt_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1546,7 +1545,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtand_QVuwVuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_and) +#define Q6_Q_vcmp_gtand_QVuwVuw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1557,7 +1556,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtor_QVuwVuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_or) +#define Q6_Q_vcmp_gtor_QVuwVuw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1568,7 +1567,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtxacc_QVuwVuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_xor) +#define Q6_Q_vcmp_gtxacc_QVuwVuw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtuw_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1579,7 +1578,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gt_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw) +#define Q6_Q_vcmp_gt_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw)(Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1590,7 +1589,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtand_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_and) +#define Q6_Q_vcmp_gtand_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1601,7 +1600,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtor_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_or) +#define Q6_Q_vcmp_gtor_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1612,7 +1611,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vcmp_gtxacc_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_xor) +#define Q6_Q_vcmp_gtxacc_QVwVw(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtw_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1623,7 +1622,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vinsert_VwR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vinsertwr) +#define Q6_Vw_vinsert_VwR(Vx,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vinsertwr)(Vx,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1634,7 +1633,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vlalign_VVR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlalignb) +#define Q6_V_vlalign_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlalignb)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1645,7 +1644,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vlalign_VVI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlalignbi) +#define Q6_V_vlalign_VVI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlalignbi)(Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1656,7 +1655,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vlsr_VuhR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrh) +#define Q6_Vuh_vlsr_VuhR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1667,7 +1666,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vlsr_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrhv) +#define Q6_Vh_vlsr_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrhv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1678,7 +1677,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuw_vlsr_VuwR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrw) +#define Q6_Vuw_vlsr_VuwR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrw)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1689,7 +1688,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vlsr_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrwv) +#define Q6_Vw_vlsr_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrwv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1700,7 +1699,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vlut32_VbVbR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb) +#define Q6_Vb_vlut32_VbVbR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1711,7 +1710,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vlut32or_VbVbVbR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_oracc) +#define Q6_Vb_vlut32or_VbVbVbR(Vx,Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_oracc)(Vx,Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1722,7 +1721,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vlut16_VbVhR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh) +#define Q6_Wh_vlut16_VbVhR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1733,7 +1732,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vlut16or_WhVbVhR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_oracc) +#define Q6_Wh_vlut16or_WhVbVhR(Vxx,Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_oracc)(Vxx,Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1744,7 +1743,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vmax_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxh) +#define Q6_Vh_vmax_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1755,7 +1754,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vmax_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxub) +#define Q6_Vub_vmax_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1766,7 +1765,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vmax_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxuh) +#define Q6_Vuh_vmax_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1777,7 +1776,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vmax_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxw) +#define Q6_Vw_vmax_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1788,7 +1787,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vmin_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminh) +#define Q6_Vh_vmin_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1799,7 +1798,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vmin_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminub) +#define Q6_Vub_vmin_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1810,7 +1809,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vmin_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminuh) +#define Q6_Vuh_vmin_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1821,7 +1820,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vmin_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminw) +#define Q6_Vw_vmin_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1832,7 +1831,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpa_WubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabus) +#define Q6_Wh_vmpa_WubRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabus)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1843,7 +1842,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpaacc_WhWubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabus_acc) +#define Q6_Wh_vmpaacc_WhWubRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabus_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1854,7 +1853,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpa_WubWb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabusv) +#define Q6_Wh_vmpa_WubWb(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabusv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1865,7 +1864,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpa_WubWub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuuv) +#define Q6_Wh_vmpa_WubWub(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuuv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1876,7 +1875,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vmpa_WhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahb) +#define Q6_Ww_vmpa_WhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahb)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1887,7 +1886,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vmpaacc_WwWhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahb_acc) +#define Q6_Ww_vmpaacc_WwWhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahb_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1898,7 +1897,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpy_VubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybus) +#define Q6_Wh_vmpy_VubRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybus)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1909,7 +1908,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpyacc_WhVubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybus_acc) +#define Q6_Wh_vmpyacc_WhVubRb(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybus_acc)(Vxx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1920,7 +1919,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpy_VubVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybusv) +#define Q6_Wh_vmpy_VubVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybusv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1931,7 +1930,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpyacc_WhVubVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybusv_acc) +#define Q6_Wh_vmpyacc_WhVubVb(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybusv_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1942,7 +1941,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpy_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybv) +#define Q6_Wh_vmpy_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1953,7 +1952,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpyacc_WhVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybv_acc) +#define Q6_Wh_vmpyacc_WhVbVb(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpybv_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1964,7 +1963,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpye_VwVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyewuh) +#define Q6_Vw_vmpye_VwVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyewuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1975,7 +1974,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vmpy_VhRh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyh) +#define Q6_Ww_vmpy_VhRh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1986,7 +1985,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vmpyacc_WwVhRh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhsat_acc) +#define Q6_Ww_vmpyacc_WwVhRh_sat(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhsat_acc)(Vxx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -1997,7 +1996,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vh_vmpy_VhRh_s1_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhsrs) +#define Q6_Vh_vmpy_VhRh_s1_rnd_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhsrs)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2008,7 +2007,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vh_vmpy_VhRh_s1_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhss) +#define Q6_Vh_vmpy_VhRh_s1_sat(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhss)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2019,7 +2018,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vmpy_VhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhus) +#define Q6_Ww_vmpy_VhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhus)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2030,7 +2029,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vmpyacc_WwVhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhus_acc) +#define Q6_Ww_vmpyacc_WwVhVuh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhus_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2041,7 +2040,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vmpy_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhv) +#define Q6_Ww_vmpy_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2052,7 +2051,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vmpyacc_WwVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhv_acc) +#define Q6_Ww_vmpyacc_WwVhVh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhv_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2063,7 +2062,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vh_vmpy_VhVh_s1_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhvsrs) +#define Q6_Vh_vmpy_VhVh_s1_rnd_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyhvsrs)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2074,7 +2073,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyieo_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyieoh) +#define Q6_Vw_vmpyieo_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyieoh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2085,7 +2084,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyieacc_VwVwVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewh_acc) +#define Q6_Vw_vmpyieacc_VwVwVh(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewh_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2096,7 +2095,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyie_VwVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewuh) +#define Q6_Vw_vmpyie_VwVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2107,7 +2106,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyieacc_VwVwVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewuh_acc) +#define Q6_Vw_vmpyieacc_VwVwVuh(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiewuh_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2118,7 +2117,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vh_vmpyi_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyih) +#define Q6_Vh_vmpyi_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyih)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2129,7 +2128,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vh_vmpyiacc_VhVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyih_acc) +#define Q6_Vh_vmpyiacc_VhVhVh(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyih_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2140,7 +2139,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vh_vmpyi_VhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyihb) +#define Q6_Vh_vmpyi_VhRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyihb)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2151,7 +2150,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vh_vmpyiacc_VhVhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyihb_acc) +#define Q6_Vh_vmpyiacc_VhVhRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyihb_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2162,7 +2161,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyio_VwVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiowh) +#define Q6_Vw_vmpyio_VwVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiowh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2173,7 +2172,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyi_VwRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwb) +#define Q6_Vw_vmpyi_VwRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwb)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2184,7 +2183,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyiacc_VwVwRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwb_acc) +#define Q6_Vw_vmpyiacc_VwVwRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwb_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2195,7 +2194,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyi_VwRh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwh) +#define Q6_Vw_vmpyi_VwRh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2206,7 +2205,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyiacc_VwVwRh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwh_acc) +#define Q6_Vw_vmpyiacc_VwVwRh(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwh_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2217,7 +2216,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyo_VwVh_s1_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh) +#define Q6_Vw_vmpyo_VwVh_s1_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2228,7 +2227,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyo_VwVh_s1_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_rnd) +#define Q6_Vw_vmpyo_VwVh_s1_rnd_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_rnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2239,7 +2238,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyoacc_VwVwVh_s1_rnd_sat_shift __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_rnd_sacc) +#define Q6_Vw_vmpyoacc_VwVwVh_s1_rnd_sat_shift(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_rnd_sacc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2250,7 +2249,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_sacc) +#define Q6_Vw_vmpyoacc_VwVwVh_s1_sat_shift(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_sacc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2261,7 +2260,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuh_vmpy_VubRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyub) +#define Q6_Wuh_vmpy_VubRub(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyub)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2272,7 +2271,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuh_vmpyacc_WuhVubRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyub_acc) +#define Q6_Wuh_vmpyacc_WuhVubRub(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyub_acc)(Vxx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2283,7 +2282,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuh_vmpy_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyubv) +#define Q6_Wuh_vmpy_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyubv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2294,7 +2293,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuh_vmpyacc_WuhVubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyubv_acc) +#define Q6_Wuh_vmpyacc_WuhVubVub(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyubv_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2305,7 +2304,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuw_vmpy_VuhRuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuh) +#define Q6_Wuw_vmpy_VuhRuh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuh)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2316,7 +2315,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuw_vmpyacc_WuwVuhRuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuh_acc) +#define Q6_Wuw_vmpyacc_WuwVuhRuh(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuh_acc)(Vxx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2327,7 +2326,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuw_vmpy_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhv) +#define Q6_Wuw_vmpy_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2338,7 +2337,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuw_vmpyacc_WuwVuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhv_acc) +#define Q6_Wuw_vmpyacc_WuwVuhVuh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhv_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2349,7 +2348,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vmux_QVV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmux) +#define Q6_V_vmux_QVV(Qt,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmux)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1),Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2360,7 +2359,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vnavg_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgh) +#define Q6_Vh_vnavg_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2371,7 +2370,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vnavg_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgub) +#define Q6_Vb_vnavg_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2382,7 +2381,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vnavg_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgw) +#define Q6_Vw_vnavg_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2393,7 +2392,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vnormamt_Vh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnormamth) +#define Q6_Vh_vnormamt_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnormamth)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2404,7 +2403,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vnormamt_Vw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnormamtw) +#define Q6_Vw_vnormamt_Vw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnormamtw)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2415,7 +2414,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vnot_V __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnot) +#define Q6_V_vnot_V(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnot)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2426,7 +2425,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vor_VV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vor) +#define Q6_V_vor_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vor)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2437,7 +2436,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vpacke_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackeb) +#define Q6_Vb_vpacke_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackeb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2448,7 +2447,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vpacke_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackeh) +#define Q6_Vh_vpacke_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackeh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2459,7 +2458,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vpack_VhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackhb_sat) +#define Q6_Vb_vpack_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackhb_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2470,7 +2469,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vpack_VhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackhub_sat) +#define Q6_Vub_vpack_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackhub_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2481,7 +2480,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vpacko_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackob) +#define Q6_Vb_vpacko_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackob)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2492,7 +2491,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vpacko_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackoh) +#define Q6_Vh_vpacko_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackoh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2503,7 +2502,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vpack_VwVw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackwh_sat) +#define Q6_Vh_vpack_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackwh_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2514,7 +2513,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vpack_VwVw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackwuh_sat) +#define Q6_Vuh_vpack_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpackwuh_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2525,7 +2524,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vpopcount_Vh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpopcounth) +#define Q6_Vh_vpopcount_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vpopcounth)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2536,7 +2535,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vrdelta_VV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrdelta) +#define Q6_V_vrdelta_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrdelta)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2547,7 +2546,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vrmpy_VubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybus) +#define Q6_Vw_vrmpy_VubRb(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybus)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2558,7 +2557,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vrmpyacc_VwVubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybus_acc) +#define Q6_Vw_vrmpyacc_VwVubRb(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybus_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2569,7 +2568,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vrmpy_WubRbI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusi) +#define Q6_Ww_vrmpy_WubRbI(Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusi)(Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2580,7 +2579,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vrmpyacc_WwWubRbI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusi_acc) +#define Q6_Ww_vrmpyacc_WwWubRbI(Vxx,Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusi_acc)(Vxx,Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2591,7 +2590,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vrmpy_VubVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusv) +#define Q6_Vw_vrmpy_VubVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2602,7 +2601,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vrmpyacc_VwVubVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusv_acc) +#define Q6_Vw_vrmpyacc_VwVubVb(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybusv_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2613,7 +2612,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vrmpy_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybv) +#define Q6_Vw_vrmpy_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2624,7 +2623,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vrmpyacc_VwVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybv_acc) +#define Q6_Vw_vrmpyacc_VwVbVb(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpybv_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2635,7 +2634,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vuw_vrmpy_VubRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyub) +#define Q6_Vuw_vrmpy_VubRub(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyub)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2646,7 +2645,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vuw_vrmpyacc_VuwVubRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyub_acc) +#define Q6_Vuw_vrmpyacc_VuwVubRub(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyub_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2657,7 +2656,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuw_vrmpy_WubRubI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubi) +#define Q6_Wuw_vrmpy_WubRubI(Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubi)(Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2668,7 +2667,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuw_vrmpyacc_WuwWubRubI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubi_acc) +#define Q6_Wuw_vrmpyacc_WuwWubRubI(Vxx,Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubi_acc)(Vxx,Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2679,7 +2678,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vuw_vrmpy_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubv) +#define Q6_Vuw_vrmpy_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubv)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2690,7 +2689,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vuw_vrmpyacc_VuwVubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubv_acc) +#define Q6_Vuw_vrmpyacc_VuwVubVub(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrmpyubv_acc)(Vx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2701,7 +2700,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vror_VR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vror) +#define Q6_V_vror_VR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vror)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2712,7 +2711,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vround_VhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundhb) +#define Q6_Vb_vround_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundhb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2723,7 +2722,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vround_VhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundhub) +#define Q6_Vub_vround_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundhub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2734,7 +2733,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vround_VwVw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundwh) +#define Q6_Vh_vround_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundwh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2745,7 +2744,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vround_VwVw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundwuh) +#define Q6_Vuh_vround_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vroundwuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2756,7 +2755,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuw_vrsad_WubRubI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrsadubi) +#define Q6_Wuw_vrsad_WubRubI(Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrsadubi)(Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2767,7 +2766,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wuw_vrsadacc_WuwWubRubI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrsadubi_acc) +#define Q6_Wuw_vrsadacc_WuwWubRubI(Vxx,Vuu,Rt,Iu1) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrsadubi_acc)(Vxx,Vuu,Rt,Iu1) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2778,7 +2777,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vsat_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsathub) +#define Q6_Vub_vsat_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsathub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2789,7 +2788,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vsat_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatwh) +#define Q6_Vh_vsat_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatwh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2800,7 +2799,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vsxt_Vb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsb) +#define Q6_Wh_vsxt_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsb)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2811,7 +2810,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Ww_vsxt_Vh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsh) +#define Q6_Ww_vsxt_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2822,7 +2821,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vshuffe_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufeh) +#define Q6_Vh_vshuffe_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufeh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2833,7 +2832,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vshuff_Vb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffb) +#define Q6_Vb_vshuff_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffb)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2844,7 +2843,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vshuffe_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffeb) +#define Q6_Vb_vshuffe_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffeb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2855,7 +2854,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vshuff_Vh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffh) +#define Q6_Vh_vshuff_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2866,7 +2865,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vshuffo_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffob) +#define Q6_Vb_vshuffo_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffob)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2877,7 +2876,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_W_vshuff_VVR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffvdd) +#define Q6_W_vshuff_VVR(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshuffvdd)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2888,7 +2887,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wb_vshuffoe_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoeb) +#define Q6_Wb_vshuffoe_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoeb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2899,7 +2898,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vshuffoe_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoeh) +#define Q6_Wh_vshuffoe_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoeh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2910,7 +2909,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vshuffo_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoh) +#define Q6_Vh_vshuffo_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vshufoh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2921,7 +2920,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vsub_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubb) +#define Q6_Vb_vsub_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2932,7 +2931,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wb_vsub_WbWb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubb_dv) +#define Q6_Wb_vsub_WbWb(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubb_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2943,7 +2942,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_condnac_QnVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbnq) +#define Q6_Vb_condnac_QnVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2954,7 +2953,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_condnac_QVbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbq) +#define Q6_Vb_condnac_QVbVb(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2965,7 +2964,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vsub_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubh) +#define Q6_Vh_vsub_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2976,7 +2975,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vsub_WhWh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubh_dv) +#define Q6_Wh_vsub_WhWh(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubh_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2987,7 +2986,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_condnac_QnVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhnq) +#define Q6_Vh_condnac_QnVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -2998,7 +2997,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_condnac_QVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhq) +#define Q6_Vh_condnac_QVhVh(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3009,7 +3008,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vsub_VhVh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhsat) +#define Q6_Vh_vsub_VhVh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3020,7 +3019,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vsub_WhWh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhsat_dv) +#define Q6_Wh_vsub_WhWh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3031,7 +3030,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vsub_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhw) +#define Q6_Ww_vsub_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubhw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3042,7 +3041,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vsub_VubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububh) +#define Q6_Wh_vsub_VubVub(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3053,7 +3052,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vsub_VubVub_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububsat) +#define Q6_Vub_vsub_VubVub_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3064,7 +3063,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wub_vsub_WubWub_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububsat_dv) +#define Q6_Wub_vsub_WubWub_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsububsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3075,7 +3074,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vsub_VuhVuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhsat) +#define Q6_Vuh_vsub_VuhVuh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3086,7 +3085,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wuh_vsub_WuhWuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhsat_dv) +#define Q6_Wuh_vsub_WuhWuh_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3097,7 +3096,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vsub_VuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhw) +#define Q6_Ww_vsub_VuhVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuhw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3108,7 +3107,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vsub_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubw) +#define Q6_Vw_vsub_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3119,7 +3118,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Ww_vsub_WwWw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubw_dv) +#define Q6_Ww_vsub_WwWw(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubw_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3130,7 +3129,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_condnac_QnVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwnq) +#define Q6_Vw_condnac_QnVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwnq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3141,7 +3140,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_condnac_QVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwq) +#define Q6_Vw_condnac_QVwVw(Qv,Vx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3152,7 +3151,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vsub_VwVw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwsat) +#define Q6_Vw_vsub_VwVw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3163,7 +3162,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Ww_vsub_WwWw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwsat_dv) +#define Q6_Ww_vsub_WwWw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubwsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3174,7 +3173,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_W_vswap_QVV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vswap) +#define Q6_W_vswap_QVV(Qt,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vswap)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1),Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3185,7 +3184,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vtmpy_WbRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyb) +#define Q6_Wh_vtmpy_WbRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyb)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3196,7 +3195,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vtmpyacc_WhWbRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyb_acc) +#define Q6_Wh_vtmpyacc_WhWbRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyb_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3207,7 +3206,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vtmpy_WubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpybus) +#define Q6_Wh_vtmpy_WubRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpybus)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3218,7 +3217,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vtmpyacc_WhWubRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpybus_acc) +#define Q6_Wh_vtmpyacc_WhWubRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpybus_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3229,7 +3228,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vtmpy_WhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyhb) +#define Q6_Ww_vtmpy_WhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyhb)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3240,7 +3239,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vtmpyacc_WwWhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyhb_acc) +#define Q6_Ww_vtmpyacc_WwWhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vtmpyhb_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3251,7 +3250,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vunpack_Vb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackb) +#define Q6_Wh_vunpack_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackb)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3262,7 +3261,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Ww_vunpack_Vh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackh) +#define Q6_Ww_vunpack_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3273,7 +3272,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vunpackoor_WhVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackob) +#define Q6_Wh_vunpackoor_WhVb(Vxx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackob)(Vxx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3284,7 +3283,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Ww_vunpackoor_WwVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackoh) +#define Q6_Ww_vunpackoor_WwVh(Vxx,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackoh)(Vxx,Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3295,7 +3294,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wuh_vunpack_Vub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackub) +#define Q6_Wuh_vunpack_Vub(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackub)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3306,7 +3305,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wuw_vunpack_Vuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackuh) +#define Q6_Wuw_vunpack_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vunpackuh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3317,7 +3316,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vxor_VV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vxor) +#define Q6_V_vxor_VV(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vxor)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3328,7 +3327,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wuh_vzxt_Vub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vzb) +#define Q6_Wuh_vzxt_Vub(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vzb)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 60 @@ -3339,7 +3338,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wuw_vzxt_Vuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vzh) +#define Q6_Wuw_vzxt_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vzh)(Vu) #endif /* __HEXAGON_ARCH___ >= 60 */ #if __HVX_ARCH__ >= 62 @@ -3350,7 +3349,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vb_vsplat_R __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplatb) +#define Q6_Vb_vsplat_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplatb)(Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3361,7 +3360,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vh_vsplat_R __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplath) +#define Q6_Vh_vsplat_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_lvsplath)(Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3372,7 +3371,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Q_vsetq2_R __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_scalar2v2) +#define Q6_Q_vsetq2_R(Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_pred_scalar2v2)(Rt)),-1) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3383,7 +3382,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Qb_vshuffe_QhQh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_shuffeqh) +#define Q6_Qb_vshuffe_QhQh(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_shuffeqh)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3394,7 +3393,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Qh_vshuffe_QwQw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_shuffeqw) +#define Q6_Qh_vshuffe_QwQw(Qs,Qt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_shuffeqw)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qt),-1))),-1) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3405,7 +3404,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vadd_VbVb_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbsat) +#define Q6_Vb_vadd_VbVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3416,7 +3415,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wb_vadd_WbWb_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbsat_dv) +#define Q6_Wb_vadd_WbWb_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddbsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3427,7 +3426,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vadd_VwVwQ_carry __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddcarry) +#define Q6_Vw_vadd_VwVwQ_carry(Vu,Vv,Qx) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddcarry)(Vu,Vv,Qx) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3438,7 +3437,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vadd_vclb_VhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddclbh) +#define Q6_Vh_vadd_vclb_VhVh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddclbh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3449,7 +3448,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vadd_vclb_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddclbw) +#define Q6_Vw_vadd_vclb_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddclbw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3460,7 +3459,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vaddacc_WwVhVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhw_acc) +#define Q6_Ww_vaddacc_WwVhVh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddhw_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3471,7 +3470,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vaddacc_WhVubVub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubh_acc) +#define Q6_Wh_vaddacc_WhVubVub(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddubh_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3482,7 +3481,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vadd_VubVb_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddububb_sat) +#define Q6_Vub_vadd_VubVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddububb_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3493,7 +3492,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vaddacc_WwVuhVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhw_acc) +#define Q6_Ww_vaddacc_WwVuhVuh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduhw_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3504,7 +3503,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuw_vadd_VuwVuw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduwsat) +#define Q6_Vuw_vadd_VuwVuw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduwsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3515,7 +3514,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wuw_vadd_WuwWuw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduwsat_dv) +#define Q6_Wuw_vadd_WuwWuw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadduwsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3526,7 +3525,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_V_vand_QnR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandnqrt) +#define Q6_V_vand_QnR(Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandnqrt)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3537,7 +3536,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_V_vandor_VQnR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandnqrt_acc) +#define Q6_V_vandor_VQnR(Vx,Qu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandnqrt_acc)(Vx,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qu),-1),Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3548,7 +3547,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vand_QnV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvnqv) +#define Q6_V_vand_QnV(Qv,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvnqv)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vu) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3559,7 +3558,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_V_vand_QV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvqv) +#define Q6_V_vand_QV(Qv,Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvqv)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1),Vu) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3570,7 +3569,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vasr_VhVhR_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhbsat) +#define Q6_Vb_vasr_VhVhR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrhbsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3581,7 +3580,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vasr_VuwVuwR_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruwuhrndsat) +#define Q6_Vuh_vasr_VuwVuwR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruwuhrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3592,7 +3591,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vasr_VwVwR_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwuhrndsat) +#define Q6_Vuh_vasr_VwVwR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrwuhrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3603,7 +3602,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vlsr_VubR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrb) +#define Q6_Vub_vlsr_VubR(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlsrb)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3614,7 +3613,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vlut32_VbVbR_nomatch __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_nm) +#define Q6_Vb_vlut32_VbVbR_nomatch(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_nm)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3625,7 +3624,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vlut32or_VbVbVbI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_oracci) +#define Q6_Vb_vlut32or_VbVbVbI(Vx,Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvb_oracci)(Vx,Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3636,7 +3635,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vlut32_VbVbI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvbi) +#define Q6_Vb_vlut32_VbVbI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvvbi)(Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3647,7 +3646,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vlut16_VbVhR_nomatch __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_nm) +#define Q6_Wh_vlut16_VbVhR_nomatch(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_nm)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3658,7 +3657,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vlut16or_WhVbVhI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_oracci) +#define Q6_Wh_vlut16or_WhVbVhI(Vxx,Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwh_oracci)(Vxx,Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3669,7 +3668,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wh_vlut16_VbVhI __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwhi) +#define Q6_Wh_vlut16_VbVhI(Vu,Vv,Iu3) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlutvwhi)(Vu,Vv,Iu3) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3680,7 +3679,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vmax_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxb) +#define Q6_Vb_vmax_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmaxb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3691,7 +3690,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vmin_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminb) +#define Q6_Vb_vmin_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vminb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3702,7 +3701,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vmpa_WuhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhb) +#define Q6_Ww_vmpa_WuhRb(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhb)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3713,7 +3712,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vmpaacc_WwWuhRb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhb_acc) +#define Q6_Ww_vmpaacc_WwWuhRb(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhb_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3724,7 +3723,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_W_vmpye_VwVuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyewuh_64) +#define Q6_W_vmpye_VwVuh(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyewuh_64)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3735,7 +3734,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyi_VwRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwub) +#define Q6_Vw_vmpyi_VwRub(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwub)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3746,7 +3745,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vw_vmpyiacc_VwVwRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwub_acc) +#define Q6_Vw_vmpyiacc_VwVwRub(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyiwub_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3757,7 +3756,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_W_vmpyoacc_WVwVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_64_acc) +#define Q6_W_vmpyoacc_WVwVh(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyowh_64_acc)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3768,7 +3767,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vround_VuhVuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrounduhub) +#define Q6_Vub_vround_VuhVuh_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrounduhub)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3779,7 +3778,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vround_VuwVuw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrounduwuh) +#define Q6_Vuh_vround_VuwVuw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrounduwuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3790,7 +3789,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vsat_VuwVuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatuwuh) +#define Q6_Vuh_vsat_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatuwuh)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3801,7 +3800,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vsub_VbVb_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbsat) +#define Q6_Vb_vsub_VbVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3812,7 +3811,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wb_vsub_WbWb_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbsat_dv) +#define Q6_Wb_vsub_WbWb_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubbsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3823,7 +3822,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vsub_VwVwQ_carry __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubcarry) +#define Q6_Vw_vsub_VwVwQ_carry(Vu,Vv,Qx) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubcarry)(Vu,Vv,Qx) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3834,7 +3833,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vsub_VubVb_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubububb_sat) +#define Q6_Vub_vsub_VubVb_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubububb_sat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3845,7 +3844,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuw_vsub_VuwVuw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuwsat) +#define Q6_Vuw_vsub_VuwVuw_sat(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuwsat)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 62 @@ -3856,7 +3855,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Wuw_vsub_WuwWuw_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuwsat_dv) +#define Q6_Wuw_vsub_WuwWuw_sat(Vuu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsubuwsat_dv)(Vuu,Vvv) #endif /* __HEXAGON_ARCH___ >= 62 */ #if __HVX_ARCH__ >= 65 @@ -3867,7 +3866,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vabs_Vb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsb) +#define Q6_Vb_vabs_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsb)(Vu) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3878,7 +3877,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vabs_Vb_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsb_sat) +#define Q6_Vb_vabs_Vb_sat(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabsb_sat)(Vu) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3889,7 +3888,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vaslacc_VhVhR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslh_acc) +#define Q6_Vh_vaslacc_VhVhR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaslh_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3900,7 +3899,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_vasracc_VhVhR __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrh_acc) +#define Q6_Vh_vasracc_VhVhR(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrh_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3911,7 +3910,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vasr_VuhVuhR_rnd_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruhubrndsat) +#define Q6_Vub_vasr_VuhVuhR_rnd_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruhubrndsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3922,7 +3921,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vub_vasr_VuhVuhR_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruhubsat) +#define Q6_Vub_vasr_VuhVuhR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruhubsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3933,7 +3932,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuh_vasr_VuwVuwR_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruwuhsat) +#define Q6_Vuh_vasr_VuwVuwR_sat(Vu,Vv,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasruwuhsat)(Vu,Vv,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3944,7 +3943,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vavg_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgb) +#define Q6_Vb_vavg_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3955,7 +3954,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vavg_VbVb_rnd __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgbrnd) +#define Q6_Vb_vavg_VbVb_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavgbrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3966,7 +3965,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuw_vavg_VuwVuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguw) +#define Q6_Vuw_vavg_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3977,7 +3976,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuw_vavg_VuwVuw_rnd __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguwrnd) +#define Q6_Vuw_vavg_VuwVuw_rnd(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vavguwrnd)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3988,7 +3987,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_W_vzero __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdd0) +#define Q6_W_vzero() __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdd0)() #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -3999,7 +3998,7 @@ Execution Slots: SLOT01 ========================================================================== */ -#define Q6_vgather_ARMVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermh) +#define Q6_vgather_ARMVh(Rs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermh)(Rs,Rt,Mu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4010,7 +4009,7 @@ Execution Slots: SLOT01 ========================================================================== */ -#define Q6_vgather_AQRMVh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhq) +#define Q6_vgather_AQRMVh(Rs,Qs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhq)(Rs,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4021,7 +4020,7 @@ Execution Slots: SLOT01 ========================================================================== */ -#define Q6_vgather_ARMWw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhw) +#define Q6_vgather_ARMWw(Rs,Rt,Mu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhw)(Rs,Rt,Mu,Vvv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4032,7 +4031,7 @@ Execution Slots: SLOT01 ========================================================================== */ -#define Q6_vgather_AQRMWw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhwq) +#define Q6_vgather_AQRMWw(Rs,Qs,Rt,Mu,Vvv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermhwq)(Rs,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vvv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4043,7 +4042,7 @@ Execution Slots: SLOT01 ========================================================================== */ -#define Q6_vgather_ARMVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermw) +#define Q6_vgather_ARMVw(Rs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermw)(Rs,Rt,Mu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4054,7 +4053,7 @@ Execution Slots: SLOT01 ========================================================================== */ -#define Q6_vgather_AQRMVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermwq) +#define Q6_vgather_AQRMVw(Rs,Qs,Rt,Mu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgathermwq)(Rs,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4065,7 +4064,7 @@ Execution Slots: SLOT2 ========================================================================== */ -#define Q6_Vh_vlut4_VuhPh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlut4) +#define Q6_Vh_vlut4_VuhPh(Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vlut4)(Vu,Rtt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4076,7 +4075,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpa_WubRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuu) +#define Q6_Wh_vmpa_WubRub(Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuu)(Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4087,7 +4086,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Wh_vmpaacc_WhWubRub __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuu_acc) +#define Q6_Wh_vmpaacc_WhWubRub(Vxx,Vuu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpabuu_acc)(Vxx,Vuu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4098,7 +4097,7 @@ Execution Slots: SLOT2 ========================================================================== */ -#define Q6_Vh_vmpa_VhVhVhPh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahhsat) +#define Q6_Vh_vmpa_VhVhVhPh_sat(Vx,Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpahhsat)(Vx,Vu,Rtt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4109,7 +4108,7 @@ Execution Slots: SLOT2 ========================================================================== */ -#define Q6_Vh_vmpa_VhVhVuhPuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhuhsat) +#define Q6_Vh_vmpa_VhVhVuhPuh_sat(Vx,Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpauhuhsat)(Vx,Vu,Rtt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4120,7 +4119,7 @@ Execution Slots: SLOT2 ========================================================================== */ -#define Q6_Vh_vmps_VhVhVuhPuh_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpsuhuhsat) +#define Q6_Vh_vmps_VhVhVuhPuh_sat(Vx,Vu,Rtt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpsuhuhsat)(Vx,Vu,Rtt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4131,7 +4130,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_vmpyacc_WwVhRh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyh_acc) +#define Q6_Ww_vmpyacc_WwVhRh(Vxx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyh_acc)(Vxx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4142,7 +4141,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vuw_vmpye_VuhRuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhe) +#define Q6_Vuw_vmpye_VuhRuh(Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhe)(Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4153,7 +4152,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Vuw_vmpyeacc_VuwVuhRuh __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhe_acc) +#define Q6_Vuw_vmpyeacc_VuwVuhRuh(Vx,Vu,Rt) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhe_acc)(Vx,Vu,Rt) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4164,7 +4163,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_vnavg_VbVb __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgb) +#define Q6_Vb_vnavg_VbVb(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vnavgb)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4175,7 +4174,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vb_prefixsum_Q __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqb) +#define Q6_Vb_prefixsum_Q(Qv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqb)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1)) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4186,7 +4185,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vh_prefixsum_Q __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqh) +#define Q6_Vh_prefixsum_Q(Qv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqh)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1)) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4197,7 +4196,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_prefixsum_Q __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqw) +#define Q6_Vw_prefixsum_Q(Qv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vprefixqw)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qv),-1)) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4208,7 +4207,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vscatter_RMVhV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermh) +#define Q6_vscatter_RMVhV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermh)(Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4219,7 +4218,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vscatteracc_RMVhV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermh_add) +#define Q6_vscatteracc_RMVhV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermh_add)(Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4230,7 +4229,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vscatter_QRMVhV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhq) +#define Q6_vscatter_QRMVhV(Qs,Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4241,7 +4240,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vscatter_RMWwV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhw) +#define Q6_vscatter_RMWwV(Rt,Mu,Vvv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhw)(Rt,Mu,Vvv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4252,7 +4251,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vscatteracc_RMWwV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhw_add) +#define Q6_vscatteracc_RMWwV(Rt,Mu,Vvv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhw_add)(Rt,Mu,Vvv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4263,7 +4262,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vscatter_QRMWwV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhwq) +#define Q6_vscatter_QRMWwV(Qs,Rt,Mu,Vvv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermhwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vvv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4274,7 +4273,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vscatter_RMVwV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermw) +#define Q6_vscatter_RMVwV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermw)(Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4285,7 +4284,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vscatteracc_RMVwV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermw_add) +#define Q6_vscatteracc_RMVwV(Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermw_add)(Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 65 @@ -4296,7 +4295,7 @@ Execution Slots: SLOT0 ========================================================================== */ -#define Q6_vscatter_QRMVwV __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermwq) +#define Q6_vscatter_QRMVwV(Qs,Rt,Mu,Vv,Vw) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vscattermwq)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1),Rt,Mu,Vv,Vw) #endif /* __HEXAGON_ARCH___ >= 65 */ #if __HVX_ARCH__ >= 66 @@ -4307,7 +4306,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vadd_VwVwQ_carry_sat __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddcarrysat) +#define Q6_Vw_vadd_VwVwQ_carry_sat(Vu,Vv,Qs) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vaddcarrysat)(Vu,Vv,__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qs),-1)) #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HVX_ARCH__ >= 66 @@ -4318,7 +4317,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Ww_vasrinto_WwVwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasr_into) +#define Q6_Ww_vasrinto_WwVwVw(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasr_into)(Vxx,Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HVX_ARCH__ >= 66 @@ -4329,7 +4328,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vuw_vrotr_VuwVuw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrotr) +#define Q6_Vuw_vrotr_VuwVuw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vrotr)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HVX_ARCH__ >= 66 @@ -4340,7 +4339,7 @@ Execution Slots: SLOT0123 ========================================================================== */ -#define Q6_Vw_vsatdw_VwVw __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatdw) +#define Q6_Vw_vsatdw_VwVw(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsatdw)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 66 */ #if __HVX_ARCH__ >= 68 @@ -4351,7 +4350,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_v6mpy_WubWbI_h __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyhubs10) +#define Q6_Ww_v6mpy_WubWbI_h(Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyhubs10)(Vuu,Vvv,Iu2) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 @@ -4362,7 +4361,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_v6mpyacc_WwWubWbI_h __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyhubs10_vxx) +#define Q6_Ww_v6mpyacc_WwWubWbI_h(Vxx,Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyhubs10_vxx)(Vxx,Vuu,Vvv,Iu2) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 @@ -4373,7 +4372,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_v6mpy_WubWbI_v __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyvubs10) +#define Q6_Ww_v6mpy_WubWbI_v(Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyvubs10)(Vuu,Vvv,Iu2) #endif /* __HEXAGON_ARCH___ >= 68 */ #if __HVX_ARCH__ >= 68 @@ -4384,7 +4383,7 @@ Execution Slots: SLOT23 ========================================================================== */ -#define Q6_Ww_v6mpyacc_WwWubWbI_v __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyvubs10_vxx) +#define Q6_Ww_v6mpyacc_WwWubWbI_v(Vxx,Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyvubs10_vxx)(Vxx,Vuu,Vvv,Iu2) #endif /* __HEXAGON_ARCH___ >= 68 */ #endif /* __HVX__ */ diff --git a/clang/test/CodeGen/builtins-hexagon-v66-128B.c b/clang/test/CodeGen/builtins-hexagon-v66-128B.c index 074728ec07ec..5839d3ae975e 100644 --- a/clang/test/CodeGen/builtins-hexagon-v66-128B.c +++ b/clang/test/CodeGen/builtins-hexagon-v66-128B.c @@ -20,7 +20,7 @@ HEXAGON_Vect1024 test1(void *in, void *out) { v2 = *p++; q1 = *p++; - return __builtin_HEXAGON_V6_vaddcarrysat_128B(v1, v2, q1); + return __builtin_HEXAGON_V6_vaddcarrysat_128B(v1, v2, __builtin_HEXAGON_V6_vandvrt_128B(q1, -1)); } // CHECK-LABEL: @test26 diff --git a/clang/test/CodeGen/builtins-hexagon-v66.c b/clang/test/CodeGen/builtins-hexagon-v66.c index 767f9faf7702..a222228dd683 100644 --- a/clang/test/CodeGen/builtins-hexagon-v66.c +++ b/clang/test/CodeGen/builtins-hexagon-v66.c @@ -44,7 +44,7 @@ HEXAGON_Vect512 test5(void *in, void *out) { v2 = *p++; q1 = *p++; - return __builtin_HEXAGON_V6_vaddcarrysat(v1, v2, q1); + return __builtin_HEXAGON_V6_vaddcarrysat(v1, v2, __builtin_HEXAGON_V6_vandvrt(q1, -1)); } // CHECK-LABEL: @test6 diff --git a/clang/test/CodeGen/builtins-hvx128.c b/clang/test/CodeGen/builtins-hvx128.c index d61afdefc2ae..226f6c0792a7 100644 --- a/clang/test/CodeGen/builtins-hvx128.c +++ b/clang/test/CodeGen/builtins-hvx128.c @@ -6,6 +6,17 @@ void test() { int v128 __attribute__((__vector_size__(128))); int v256 __attribute__((__vector_size__(256))); + // These are special and ugly: they take an HVX vector in place of + // the HVX vector predicate. + // CHECK: @llvm.hexagon.V6.vmaskedstorenq.128B + __builtin_HEXAGON_V6_vmaskedstorenq_128B(q128, 0, v128); + // CHECK: @llvm.hexagon.V6.vmaskedstorentnq.128B + __builtin_HEXAGON_V6_vmaskedstorentnq_128B(q128, 0, v128); + // CHECK: @llvm.hexagon.V6.vmaskedstorentq.128B + __builtin_HEXAGON_V6_vmaskedstorentq_128B(q128, 0, v128); + // CHECK: @llvm.hexagon.V6.vmaskedstoreq.128B + __builtin_HEXAGON_V6_vmaskedstoreq_128B(q128, 0, v128); + // CHECK: @llvm.hexagon.V6.extractw.128B __builtin_HEXAGON_V6_extractw_128B(v128, 0); // CHECK: @llvm.hexagon.V6.hi.128B @@ -19,33 +30,33 @@ void test() { // CHECK: @llvm.hexagon.V6.lvsplatw.128B __builtin_HEXAGON_V6_lvsplatw_128B(0); // CHECK: @llvm.hexagon.V6.pred.and.128B - __builtin_HEXAGON_V6_pred_and_128B(q128, q128); + __builtin_HEXAGON_V6_pred_and_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), __builtin_HEXAGON_V6_vandvrt_128B(q128, -1)); // CHECK: @llvm.hexagon.V6.pred.and.n.128B - __builtin_HEXAGON_V6_pred_and_n_128B(q128, q128); + __builtin_HEXAGON_V6_pred_and_n_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), __builtin_HEXAGON_V6_vandvrt_128B(q128, -1)); // CHECK: @llvm.hexagon.V6.pred.not.128B - __builtin_HEXAGON_V6_pred_not_128B(q128); + __builtin_HEXAGON_V6_pred_not_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1)); // CHECK: @llvm.hexagon.V6.pred.or.128B - __builtin_HEXAGON_V6_pred_or_128B(q128, q128); + __builtin_HEXAGON_V6_pred_or_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), __builtin_HEXAGON_V6_vandvrt_128B(q128, -1)); // CHECK: @llvm.hexagon.V6.pred.or.n.128B - __builtin_HEXAGON_V6_pred_or_n_128B(q128, q128); + __builtin_HEXAGON_V6_pred_or_n_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), __builtin_HEXAGON_V6_vandvrt_128B(q128, -1)); // CHECK: @llvm.hexagon.V6.pred.scalar2.128B __builtin_HEXAGON_V6_pred_scalar2_128B(0); // CHECK: @llvm.hexagon.V6.pred.scalar2v2.128B __builtin_HEXAGON_V6_pred_scalar2v2_128B(0); // CHECK: @llvm.hexagon.V6.pred.xor.128B - __builtin_HEXAGON_V6_pred_xor_128B(q128, q128); + __builtin_HEXAGON_V6_pred_xor_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), __builtin_HEXAGON_V6_vandvrt_128B(q128, -1)); // CHECK: @llvm.hexagon.V6.shuffeqh.128B - __builtin_HEXAGON_V6_shuffeqh_128B(q128, q128); + __builtin_HEXAGON_V6_shuffeqh_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), __builtin_HEXAGON_V6_vandvrt_128B(q128, -1)); // CHECK: @llvm.hexagon.V6.shuffeqw.128B - __builtin_HEXAGON_V6_shuffeqw_128B(q128, q128); + __builtin_HEXAGON_V6_shuffeqw_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), __builtin_HEXAGON_V6_vandvrt_128B(q128, -1)); // CHECK: @llvm.hexagon.V6.vS32b.nqpred.ai.128B - __builtin_HEXAGON_V6_vS32b_nqpred_ai_128B(q128, 0, v128); + __builtin_HEXAGON_V6_vS32b_nqpred_ai_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0, v128); // CHECK: @llvm.hexagon.V6.vS32b.nt.nqpred.ai.128B - __builtin_HEXAGON_V6_vS32b_nt_nqpred_ai_128B(q128, 0, v128); + __builtin_HEXAGON_V6_vS32b_nt_nqpred_ai_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0, v128); // CHECK: @llvm.hexagon.V6.vS32b.nt.qpred.ai.128B - __builtin_HEXAGON_V6_vS32b_nt_qpred_ai_128B(q128, 0, v128); + __builtin_HEXAGON_V6_vS32b_nt_qpred_ai_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0, v128); // CHECK: @llvm.hexagon.V6.vS32b.qpred.ai.128B - __builtin_HEXAGON_V6_vS32b_qpred_ai_128B(q128, 0, v128); + __builtin_HEXAGON_V6_vS32b_qpred_ai_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0, v128); // CHECK: @llvm.hexagon.V6.vabsb.128B __builtin_HEXAGON_V6_vabsb_128B(v128); // CHECK: @llvm.hexagon.V6.vabsb.sat.128B @@ -71,9 +82,9 @@ void test() { // CHECK: @llvm.hexagon.V6.vaddb.dv.128B __builtin_HEXAGON_V6_vaddb_dv_128B(v256, v256); // CHECK: @llvm.hexagon.V6.vaddbnq.128B - __builtin_HEXAGON_V6_vaddbnq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vaddbnq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vaddbq.128B - __builtin_HEXAGON_V6_vaddbq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vaddbq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vaddbsat.128B __builtin_HEXAGON_V6_vaddbsat_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vaddbsat.dv.128B @@ -89,9 +100,9 @@ void test() { // CHECK: @llvm.hexagon.V6.vaddh.dv.128B __builtin_HEXAGON_V6_vaddh_dv_128B(v256, v256); // CHECK: @llvm.hexagon.V6.vaddhnq.128B - __builtin_HEXAGON_V6_vaddhnq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vaddhnq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vaddhq.128B - __builtin_HEXAGON_V6_vaddhq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vaddhq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vaddhsat.128B __builtin_HEXAGON_V6_vaddhsat_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vaddhsat.dv.128B @@ -127,9 +138,9 @@ void test() { // CHECK: @llvm.hexagon.V6.vaddw.dv.128B __builtin_HEXAGON_V6_vaddw_dv_128B(v256, v256); // CHECK: @llvm.hexagon.V6.vaddwnq.128B - __builtin_HEXAGON_V6_vaddwnq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vaddwnq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vaddwq.128B - __builtin_HEXAGON_V6_vaddwq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vaddwq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vaddwsat.128B __builtin_HEXAGON_V6_vaddwsat_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vaddwsat.dv.128B @@ -141,21 +152,21 @@ void test() { // CHECK: @llvm.hexagon.V6.vand.128B __builtin_HEXAGON_V6_vand_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vandnqrt.128B - __builtin_HEXAGON_V6_vandnqrt_128B(q128, 0); + __builtin_HEXAGON_V6_vandnqrt_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0); // CHECK: @llvm.hexagon.V6.vandnqrt.acc.128B - __builtin_HEXAGON_V6_vandnqrt_acc_128B(v128, q128, 0); + __builtin_HEXAGON_V6_vandnqrt_acc_128B(v128, __builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0); // CHECK: @llvm.hexagon.V6.vandqrt.128B - __builtin_HEXAGON_V6_vandqrt_128B(q128, 0); + __builtin_HEXAGON_V6_vandqrt_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0); // CHECK: @llvm.hexagon.V6.vandqrt.acc.128B - __builtin_HEXAGON_V6_vandqrt_acc_128B(v128, q128, 0); + __builtin_HEXAGON_V6_vandqrt_acc_128B(v128, __builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0); // CHECK: @llvm.hexagon.V6.vandvnqv.128B - __builtin_HEXAGON_V6_vandvnqv_128B(q128, v128); + __builtin_HEXAGON_V6_vandvnqv_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128); // CHECK: @llvm.hexagon.V6.vandvqv.128B - __builtin_HEXAGON_V6_vandvqv_128B(q128, v128); + __builtin_HEXAGON_V6_vandvqv_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128); // CHECK: @llvm.hexagon.V6.vandvrt.128B __builtin_HEXAGON_V6_vandvrt_128B(v128, 0); // CHECK: @llvm.hexagon.V6.vandvrt.acc.128B - __builtin_HEXAGON_V6_vandvrt_acc_128B(q128, v128, 0); + __builtin_HEXAGON_V6_vandvrt_acc_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, 0); // CHECK: @llvm.hexagon.V6.vaslh.128B __builtin_HEXAGON_V6_vaslh_128B(v128, 0); // CHECK: @llvm.hexagon.V6.vaslh.acc.128B @@ -297,87 +308,87 @@ void test() { // CHECK: @llvm.hexagon.V6.veqb.128B __builtin_HEXAGON_V6_veqb_128B(v128, v128); // CHECK: @llvm.hexagon.V6.veqb.and.128B - __builtin_HEXAGON_V6_veqb_and_128B(q128, v128, v128); + __builtin_HEXAGON_V6_veqb_and_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.veqb.or.128B - __builtin_HEXAGON_V6_veqb_or_128B(q128, v128, v128); + __builtin_HEXAGON_V6_veqb_or_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.veqb.xor.128B - __builtin_HEXAGON_V6_veqb_xor_128B(q128, v128, v128); + __builtin_HEXAGON_V6_veqb_xor_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.veqh.128B __builtin_HEXAGON_V6_veqh_128B(v128, v128); // CHECK: @llvm.hexagon.V6.veqh.and.128B - __builtin_HEXAGON_V6_veqh_and_128B(q128, v128, v128); + __builtin_HEXAGON_V6_veqh_and_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.veqh.or.128B - __builtin_HEXAGON_V6_veqh_or_128B(q128, v128, v128); + __builtin_HEXAGON_V6_veqh_or_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.veqh.xor.128B - __builtin_HEXAGON_V6_veqh_xor_128B(q128, v128, v128); + __builtin_HEXAGON_V6_veqh_xor_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.veqw.128B __builtin_HEXAGON_V6_veqw_128B(v128, v128); // CHECK: @llvm.hexagon.V6.veqw.and.128B - __builtin_HEXAGON_V6_veqw_and_128B(q128, v128, v128); + __builtin_HEXAGON_V6_veqw_and_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.veqw.or.128B - __builtin_HEXAGON_V6_veqw_or_128B(q128, v128, v128); + __builtin_HEXAGON_V6_veqw_or_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.veqw.xor.128B - __builtin_HEXAGON_V6_veqw_xor_128B(q128, v128, v128); + __builtin_HEXAGON_V6_veqw_xor_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgathermh.128B __builtin_HEXAGON_V6_vgathermh_128B(0, 0, 0, v128); // CHECK: @llvm.hexagon.V6.vgathermhq.128B - __builtin_HEXAGON_V6_vgathermhq_128B(0, q128, 0, 0, v128); + __builtin_HEXAGON_V6_vgathermhq_128B(0, __builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0, 0, v128); // CHECK: @llvm.hexagon.V6.vgathermhw.128B __builtin_HEXAGON_V6_vgathermhw_128B(0, 0, 0, v256); // CHECK: @llvm.hexagon.V6.vgathermhwq.128B - __builtin_HEXAGON_V6_vgathermhwq_128B(0, q128, 0, 0, v256); + __builtin_HEXAGON_V6_vgathermhwq_128B(0, __builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0, 0, v256); // CHECK: @llvm.hexagon.V6.vgathermw.128B __builtin_HEXAGON_V6_vgathermw_128B(0, 0, 0, v128); // CHECK: @llvm.hexagon.V6.vgathermwq.128B - __builtin_HEXAGON_V6_vgathermwq_128B(0, q128, 0, 0, v128); + __builtin_HEXAGON_V6_vgathermwq_128B(0, __builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0, 0, v128); // CHECK: @llvm.hexagon.V6.vgtb.128B __builtin_HEXAGON_V6_vgtb_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vgtb.and.128B - __builtin_HEXAGON_V6_vgtb_and_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtb_and_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtb.or.128B - __builtin_HEXAGON_V6_vgtb_or_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtb_or_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtb.xor.128B - __builtin_HEXAGON_V6_vgtb_xor_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtb_xor_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgth.128B __builtin_HEXAGON_V6_vgth_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vgth.and.128B - __builtin_HEXAGON_V6_vgth_and_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgth_and_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgth.or.128B - __builtin_HEXAGON_V6_vgth_or_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgth_or_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgth.xor.128B - __builtin_HEXAGON_V6_vgth_xor_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgth_xor_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtub.128B __builtin_HEXAGON_V6_vgtub_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vgtub.and.128B - __builtin_HEXAGON_V6_vgtub_and_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtub_and_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtub.or.128B - __builtin_HEXAGON_V6_vgtub_or_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtub_or_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtub.xor.128B - __builtin_HEXAGON_V6_vgtub_xor_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtub_xor_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtuh.128B __builtin_HEXAGON_V6_vgtuh_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vgtuh.and.128B - __builtin_HEXAGON_V6_vgtuh_and_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtuh_and_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtuh.or.128B - __builtin_HEXAGON_V6_vgtuh_or_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtuh_or_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtuh.xor.128B - __builtin_HEXAGON_V6_vgtuh_xor_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtuh_xor_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtuw.128B __builtin_HEXAGON_V6_vgtuw_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vgtuw.and.128B - __builtin_HEXAGON_V6_vgtuw_and_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtuw_and_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtuw.or.128B - __builtin_HEXAGON_V6_vgtuw_or_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtuw_or_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtuw.xor.128B - __builtin_HEXAGON_V6_vgtuw_xor_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtuw_xor_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtw.128B __builtin_HEXAGON_V6_vgtw_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vgtw.and.128B - __builtin_HEXAGON_V6_vgtw_and_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtw_and_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtw.or.128B - __builtin_HEXAGON_V6_vgtw_or_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtw_or_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vgtw.xor.128B - __builtin_HEXAGON_V6_vgtw_xor_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vgtw_xor_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vinsertwr.128B __builtin_HEXAGON_V6_vinsertwr_128B(v128, 0); // CHECK: @llvm.hexagon.V6.vlalignb.128B @@ -416,14 +427,6 @@ void test() { __builtin_HEXAGON_V6_vlutvwh_oracci_128B(v256, v128, v128, 0); // CHECK: @llvm.hexagon.V6.vlutvwhi.128B __builtin_HEXAGON_V6_vlutvwhi_128B(v128, v128, 0); - // CHECK: @llvm.hexagon.V6.vmaskedstorenq.128B - __builtin_HEXAGON_V6_vmaskedstorenq_128B(q128, 0, v128); - // CHECK: @llvm.hexagon.V6.vmaskedstorentnq.128B - __builtin_HEXAGON_V6_vmaskedstorentnq_128B(q128, 0, v128); - // CHECK: @llvm.hexagon.V6.vmaskedstorentq.128B - __builtin_HEXAGON_V6_vmaskedstorentq_128B(q128, 0, v128); - // CHECK: @llvm.hexagon.V6.vmaskedstoreq.128B - __builtin_HEXAGON_V6_vmaskedstoreq_128B(q128, 0, v128); // CHECK: @llvm.hexagon.V6.vmaxb.128B __builtin_HEXAGON_V6_vmaxb_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vmaxh.128B @@ -567,7 +570,7 @@ void test() { // CHECK: @llvm.hexagon.V6.vmpyuhv.acc.128B __builtin_HEXAGON_V6_vmpyuhv_acc_128B(v256, v128, v128); // CHECK: @llvm.hexagon.V6.vmux.128B - __builtin_HEXAGON_V6_vmux_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vmux_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vnavgb.128B __builtin_HEXAGON_V6_vnavgb_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vnavgh.128B @@ -603,11 +606,11 @@ void test() { // CHECK: @llvm.hexagon.V6.vpopcounth.128B __builtin_HEXAGON_V6_vpopcounth_128B(v128); // CHECK: @llvm.hexagon.V6.vprefixqb.128B - __builtin_HEXAGON_V6_vprefixqb_128B(q128); + __builtin_HEXAGON_V6_vprefixqb_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1)); // CHECK: @llvm.hexagon.V6.vprefixqh.128B - __builtin_HEXAGON_V6_vprefixqh_128B(q128); + __builtin_HEXAGON_V6_vprefixqh_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1)); // CHECK: @llvm.hexagon.V6.vprefixqw.128B - __builtin_HEXAGON_V6_vprefixqw_128B(q128); + __builtin_HEXAGON_V6_vprefixqw_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1)); // CHECK: @llvm.hexagon.V6.vrdelta.128B __builtin_HEXAGON_V6_vrdelta_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vrmpybub.rtt.128B @@ -677,19 +680,19 @@ void test() { // CHECK: @llvm.hexagon.V6.vscattermh.add.128B __builtin_HEXAGON_V6_vscattermh_add_128B(0, 0, v128, v128); // CHECK: @llvm.hexagon.V6.vscattermhq.128B - __builtin_HEXAGON_V6_vscattermhq_128B(q128, 0, 0, v128, v128); + __builtin_HEXAGON_V6_vscattermhq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0, 0, v128, v128); // CHECK: @llvm.hexagon.V6.vscattermhw.128B __builtin_HEXAGON_V6_vscattermhw_128B(0, 0, v256, v128); // CHECK: @llvm.hexagon.V6.vscattermhw.add.128B __builtin_HEXAGON_V6_vscattermhw_add_128B(0, 0, v256, v128); // CHECK: @llvm.hexagon.V6.vscattermhwq.128B - __builtin_HEXAGON_V6_vscattermhwq_128B(q128, 0, 0, v256, v128); + __builtin_HEXAGON_V6_vscattermhwq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0, 0, v256, v128); // CHECK: @llvm.hexagon.V6.vscattermw.128B __builtin_HEXAGON_V6_vscattermw_128B(0, 0, v128, v128); // CHECK: @llvm.hexagon.V6.vscattermw.add.128B __builtin_HEXAGON_V6_vscattermw_add_128B(0, 0, v128, v128); // CHECK: @llvm.hexagon.V6.vscattermwq.128B - __builtin_HEXAGON_V6_vscattermwq_128B(q128, 0, 0, v128, v128); + __builtin_HEXAGON_V6_vscattermwq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), 0, 0, v128, v128); // CHECK: @llvm.hexagon.V6.vsh.128B __builtin_HEXAGON_V6_vsh_128B(v128); // CHECK: @llvm.hexagon.V6.vshufeh.128B @@ -715,9 +718,9 @@ void test() { // CHECK: @llvm.hexagon.V6.vsubb.dv.128B __builtin_HEXAGON_V6_vsubb_dv_128B(v256, v256); // CHECK: @llvm.hexagon.V6.vsubbnq.128B - __builtin_HEXAGON_V6_vsubbnq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vsubbnq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vsubbq.128B - __builtin_HEXAGON_V6_vsubbq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vsubbq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vsubbsat.128B __builtin_HEXAGON_V6_vsubbsat_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vsubbsat.dv.128B @@ -729,9 +732,9 @@ void test() { // CHECK: @llvm.hexagon.V6.vsubh.dv.128B __builtin_HEXAGON_V6_vsubh_dv_128B(v256, v256); // CHECK: @llvm.hexagon.V6.vsubhnq.128B - __builtin_HEXAGON_V6_vsubhnq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vsubhnq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vsubhq.128B - __builtin_HEXAGON_V6_vsubhq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vsubhq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vsubhsat.128B __builtin_HEXAGON_V6_vsubhsat_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vsubhsat.dv.128B @@ -761,15 +764,15 @@ void test() { // CHECK: @llvm.hexagon.V6.vsubw.dv.128B __builtin_HEXAGON_V6_vsubw_dv_128B(v256, v256); // CHECK: @llvm.hexagon.V6.vsubwnq.128B - __builtin_HEXAGON_V6_vsubwnq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vsubwnq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vsubwq.128B - __builtin_HEXAGON_V6_vsubwq_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vsubwq_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vsubwsat.128B __builtin_HEXAGON_V6_vsubwsat_128B(v128, v128); // CHECK: @llvm.hexagon.V6.vsubwsat.dv.128B __builtin_HEXAGON_V6_vsubwsat_dv_128B(v256, v256); // CHECK: @llvm.hexagon.V6.vswap.128B - __builtin_HEXAGON_V6_vswap_128B(q128, v128, v128); + __builtin_HEXAGON_V6_vswap_128B(__builtin_HEXAGON_V6_vandvrt_128B(q128, -1), v128, v128); // CHECK: @llvm.hexagon.V6.vtmpyb.128B __builtin_HEXAGON_V6_vtmpyb_128B(v256, 0); // CHECK: @llvm.hexagon.V6.vtmpyb.acc.128B diff --git a/clang/test/CodeGen/builtins-hvx64.c b/clang/test/CodeGen/builtins-hvx64.c index 27d39990adb5..7321460c489e 100644 --- a/clang/test/CodeGen/builtins-hvx64.c +++ b/clang/test/CodeGen/builtins-hvx64.c @@ -6,6 +6,17 @@ void test() { int v64 __attribute__((__vector_size__(64))); int v128 __attribute__((__vector_size__(128))); + // These are special and ugly: they take an HVX vector in place of + // the HVX vector predicate. + // CHECK: @llvm.hexagon.V6.vmaskedstorenq + __builtin_HEXAGON_V6_vmaskedstorenq(q64, 0, v64); + // CHECK: @llvm.hexagon.V6.vmaskedstorentnq + __builtin_HEXAGON_V6_vmaskedstorentnq(q64, 0, v64); + // CHECK: @llvm.hexagon.V6.vmaskedstorentq + __builtin_HEXAGON_V6_vmaskedstorentq(q64, 0, v64); + // CHECK: @llvm.hexagon.V6.vmaskedstoreq + __builtin_HEXAGON_V6_vmaskedstoreq(q64, 0, v64); + // CHECK: @llvm.hexagon.V6.extractw __builtin_HEXAGON_V6_extractw(v64, 0); // CHECK: @llvm.hexagon.V6.hi @@ -19,33 +30,33 @@ void test() { // CHECK: @llvm.hexagon.V6.lvsplatw __builtin_HEXAGON_V6_lvsplatw(0); // CHECK: @llvm.hexagon.V6.pred.and - __builtin_HEXAGON_V6_pred_and(q64, q64); + __builtin_HEXAGON_V6_pred_and(__builtin_HEXAGON_V6_vandvrt(q64, -1), __builtin_HEXAGON_V6_vandvrt(q64, -1)); // CHECK: @llvm.hexagon.V6.pred.and.n - __builtin_HEXAGON_V6_pred_and_n(q64, q64); + __builtin_HEXAGON_V6_pred_and_n(__builtin_HEXAGON_V6_vandvrt(q64, -1), __builtin_HEXAGON_V6_vandvrt(q64, -1)); // CHECK: @llvm.hexagon.V6.pred.not - __builtin_HEXAGON_V6_pred_not(q64); + __builtin_HEXAGON_V6_pred_not(__builtin_HEXAGON_V6_vandvrt(q64, -1)); // CHECK: @llvm.hexagon.V6.pred.or - __builtin_HEXAGON_V6_pred_or(q64, q64); + __builtin_HEXAGON_V6_pred_or(__builtin_HEXAGON_V6_vandvrt(q64, -1), __builtin_HEXAGON_V6_vandvrt(q64, -1)); // CHECK: @llvm.hexagon.V6.pred.or.n - __builtin_HEXAGON_V6_pred_or_n(q64, q64); + __builtin_HEXAGON_V6_pred_or_n(__builtin_HEXAGON_V6_vandvrt(q64, -1), __builtin_HEXAGON_V6_vandvrt(q64, -1)); // CHECK: @llvm.hexagon.V6.pred.scalar2 __builtin_HEXAGON_V6_pred_scalar2(0); // CHECK: @llvm.hexagon.V6.pred.scalar2v2 __builtin_HEXAGON_V6_pred_scalar2v2(0); // CHECK: @llvm.hexagon.V6.pred.xor - __builtin_HEXAGON_V6_pred_xor(q64, q64); + __builtin_HEXAGON_V6_pred_xor(__builtin_HEXAGON_V6_vandvrt(q64, -1), __builtin_HEXAGON_V6_vandvrt(q64, -1)); // CHECK: @llvm.hexagon.V6.shuffeqh - __builtin_HEXAGON_V6_shuffeqh(q64, q64); + __builtin_HEXAGON_V6_shuffeqh(__builtin_HEXAGON_V6_vandvrt(q64, -1), __builtin_HEXAGON_V6_vandvrt(q64, -1)); // CHECK: @llvm.hexagon.V6.shuffeqw - __builtin_HEXAGON_V6_shuffeqw(q64, q64); + __builtin_HEXAGON_V6_shuffeqw(__builtin_HEXAGON_V6_vandvrt(q64, -1), __builtin_HEXAGON_V6_vandvrt(q64, -1)); // CHECK: @llvm.hexagon.V6.vS32b.nqpred.ai - __builtin_HEXAGON_V6_vS32b_nqpred_ai(q64, 0, v64); + __builtin_HEXAGON_V6_vS32b_nqpred_ai(__builtin_HEXAGON_V6_vandvrt(q64, -1), 0, v64); // CHECK: @llvm.hexagon.V6.vS32b.nt.nqpred.ai - __builtin_HEXAGON_V6_vS32b_nt_nqpred_ai(q64, 0, v64); + __builtin_HEXAGON_V6_vS32b_nt_nqpred_ai(__builtin_HEXAGON_V6_vandvrt(q64, -1), 0, v64); // CHECK: @llvm.hexagon.V6.vS32b.nt.qpred.ai - __builtin_HEXAGON_V6_vS32b_nt_qpred_ai(q64, 0, v64); + __builtin_HEXAGON_V6_vS32b_nt_qpred_ai(__builtin_HEXAGON_V6_vandvrt(q64, -1), 0, v64); // CHECK: @llvm.hexagon.V6.vS32b.qpred.ai - __builtin_HEXAGON_V6_vS32b_qpred_ai(q64, 0, v64); + __builtin_HEXAGON_V6_vS32b_qpred_ai(__builtin_HEXAGON_V6_vandvrt(q64, -1), 0, v64); // CHECK: @llvm.hexagon.V6.vabsb __builtin_HEXAGON_V6_vabsb(v64); // CHECK: @llvm.hexagon.V6.vabsb.sat @@ -71,9 +82,9 @@ void test() { // CHECK: @llvm.hexagon.V6.vaddb.dv __builtin_HEXAGON_V6_vaddb_dv(v128, v128); // CHECK: @llvm.hexagon.V6.vaddbnq - __builtin_HEXAGON_V6_vaddbnq(q64, v64, v64); + __builtin_HEXAGON_V6_vaddbnq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vaddbq - __builtin_HEXAGON_V6_vaddbq(q64, v64, v64); + __builtin_HEXAGON_V6_vaddbq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vaddbsat __builtin_HEXAGON_V6_vaddbsat(v64, v64); // CHECK: @llvm.hexagon.V6.vaddbsat.dv @@ -89,9 +100,9 @@ void test() { // CHECK: @llvm.hexagon.V6.vaddh.dv __builtin_HEXAGON_V6_vaddh_dv(v128, v128); // CHECK: @llvm.hexagon.V6.vaddhnq - __builtin_HEXAGON_V6_vaddhnq(q64, v64, v64); + __builtin_HEXAGON_V6_vaddhnq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vaddhq - __builtin_HEXAGON_V6_vaddhq(q64, v64, v64); + __builtin_HEXAGON_V6_vaddhq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vaddhsat __builtin_HEXAGON_V6_vaddhsat(v64, v64); // CHECK: @llvm.hexagon.V6.vaddhsat.dv @@ -127,9 +138,9 @@ void test() { // CHECK: @llvm.hexagon.V6.vaddw.dv __builtin_HEXAGON_V6_vaddw_dv(v128, v128); // CHECK: @llvm.hexagon.V6.vaddwnq - __builtin_HEXAGON_V6_vaddwnq(q64, v64, v64); + __builtin_HEXAGON_V6_vaddwnq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vaddwq - __builtin_HEXAGON_V6_vaddwq(q64, v64, v64); + __builtin_HEXAGON_V6_vaddwq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vaddwsat __builtin_HEXAGON_V6_vaddwsat(v64, v64); // CHECK: @llvm.hexagon.V6.vaddwsat.dv @@ -141,21 +152,21 @@ void test() { // CHECK: @llvm.hexagon.V6.vand __builtin_HEXAGON_V6_vand(v64, v64); // CHECK: @llvm.hexagon.V6.vandnqrt - __builtin_HEXAGON_V6_vandnqrt(q64, 0); + __builtin_HEXAGON_V6_vandnqrt(__builtin_HEXAGON_V6_vandvrt(q64, -1), 0); // CHECK: @llvm.hexagon.V6.vandnqrt.acc - __builtin_HEXAGON_V6_vandnqrt_acc(v64, q64, 0); + __builtin_HEXAGON_V6_vandnqrt_acc(v64, __builtin_HEXAGON_V6_vandvrt(q64, -1), 0); // CHECK: @llvm.hexagon.V6.vandqrt - __builtin_HEXAGON_V6_vandqrt(q64, 0); + __builtin_HEXAGON_V6_vandqrt(__builtin_HEXAGON_V6_vandvrt(q64, -1), 0); // CHECK: @llvm.hexagon.V6.vandqrt.acc - __builtin_HEXAGON_V6_vandqrt_acc(v64, q64, 0); + __builtin_HEXAGON_V6_vandqrt_acc(v64, __builtin_HEXAGON_V6_vandvrt(q64, -1), 0); // CHECK: @llvm.hexagon.V6.vandvnqv - __builtin_HEXAGON_V6_vandvnqv(q64, v64); + __builtin_HEXAGON_V6_vandvnqv(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64); // CHECK: @llvm.hexagon.V6.vandvqv - __builtin_HEXAGON_V6_vandvqv(q64, v64); + __builtin_HEXAGON_V6_vandvqv(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64); // CHECK: @llvm.hexagon.V6.vandvrt __builtin_HEXAGON_V6_vandvrt(v64, 0); // CHECK: @llvm.hexagon.V6.vandvrt.acc - __builtin_HEXAGON_V6_vandvrt_acc(q64, v64, 0); + __builtin_HEXAGON_V6_vandvrt_acc(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, 0); // CHECK: @llvm.hexagon.V6.vaslh __builtin_HEXAGON_V6_vaslh(v64, 0); // CHECK: @llvm.hexagon.V6.vaslh.acc @@ -297,87 +308,87 @@ void test() { // CHECK: @llvm.hexagon.V6.veqb __builtin_HEXAGON_V6_veqb(v64, v64); // CHECK: @llvm.hexagon.V6.veqb.and - __builtin_HEXAGON_V6_veqb_and(q64, v64, v64); + __builtin_HEXAGON_V6_veqb_and(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.veqb.or - __builtin_HEXAGON_V6_veqb_or(q64, v64, v64); + __builtin_HEXAGON_V6_veqb_or(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.veqb.xor - __builtin_HEXAGON_V6_veqb_xor(q64, v64, v64); + __builtin_HEXAGON_V6_veqb_xor(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.veqh __builtin_HEXAGON_V6_veqh(v64, v64); // CHECK: @llvm.hexagon.V6.veqh.and - __builtin_HEXAGON_V6_veqh_and(q64, v64, v64); + __builtin_HEXAGON_V6_veqh_and(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.veqh.or - __builtin_HEXAGON_V6_veqh_or(q64, v64, v64); + __builtin_HEXAGON_V6_veqh_or(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.veqh.xor - __builtin_HEXAGON_V6_veqh_xor(q64, v64, v64); + __builtin_HEXAGON_V6_veqh_xor(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.veqw __builtin_HEXAGON_V6_veqw(v64, v64); // CHECK: @llvm.hexagon.V6.veqw.and - __builtin_HEXAGON_V6_veqw_and(q64, v64, v64); + __builtin_HEXAGON_V6_veqw_and(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.veqw.or - __builtin_HEXAGON_V6_veqw_or(q64, v64, v64); + __builtin_HEXAGON_V6_veqw_or(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.veqw.xor - __builtin_HEXAGON_V6_veqw_xor(q64, v64, v64); + __builtin_HEXAGON_V6_veqw_xor(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgathermh __builtin_HEXAGON_V6_vgathermh(0, 0, 0, v64); // CHECK: @llvm.hexagon.V6.vgathermhq - __builtin_HEXAGON_V6_vgathermhq(0, q64, 0, 0, v64); + __builtin_HEXAGON_V6_vgathermhq(0, __builtin_HEXAGON_V6_vandvrt(q64, -1), 0, 0, v64); // CHECK: @llvm.hexagon.V6.vgathermhw __builtin_HEXAGON_V6_vgathermhw(0, 0, 0, v128); // CHECK: @llvm.hexagon.V6.vgathermhwq - __builtin_HEXAGON_V6_vgathermhwq(0, q64, 0, 0, v128); + __builtin_HEXAGON_V6_vgathermhwq(0, __builtin_HEXAGON_V6_vandvrt(q64, -1), 0, 0, v128); // CHECK: @llvm.hexagon.V6.vgathermw __builtin_HEXAGON_V6_vgathermw(0, 0, 0, v64); // CHECK: @llvm.hexagon.V6.vgathermwq - __builtin_HEXAGON_V6_vgathermwq(0, q64, 0, 0, v64); + __builtin_HEXAGON_V6_vgathermwq(0, __builtin_HEXAGON_V6_vandvrt(q64, -1), 0, 0, v64); // CHECK: @llvm.hexagon.V6.vgtb __builtin_HEXAGON_V6_vgtb(v64, v64); // CHECK: @llvm.hexagon.V6.vgtb.and - __builtin_HEXAGON_V6_vgtb_and(q64, v64, v64); + __builtin_HEXAGON_V6_vgtb_and(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtb.or - __builtin_HEXAGON_V6_vgtb_or(q64, v64, v64); + __builtin_HEXAGON_V6_vgtb_or(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtb.xor - __builtin_HEXAGON_V6_vgtb_xor(q64, v64, v64); + __builtin_HEXAGON_V6_vgtb_xor(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgth __builtin_HEXAGON_V6_vgth(v64, v64); // CHECK: @llvm.hexagon.V6.vgth.and - __builtin_HEXAGON_V6_vgth_and(q64, v64, v64); + __builtin_HEXAGON_V6_vgth_and(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgth.or - __builtin_HEXAGON_V6_vgth_or(q64, v64, v64); + __builtin_HEXAGON_V6_vgth_or(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgth.xor - __builtin_HEXAGON_V6_vgth_xor(q64, v64, v64); + __builtin_HEXAGON_V6_vgth_xor(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtub __builtin_HEXAGON_V6_vgtub(v64, v64); // CHECK: @llvm.hexagon.V6.vgtub.and - __builtin_HEXAGON_V6_vgtub_and(q64, v64, v64); + __builtin_HEXAGON_V6_vgtub_and(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtub.or - __builtin_HEXAGON_V6_vgtub_or(q64, v64, v64); + __builtin_HEXAGON_V6_vgtub_or(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtub.xor - __builtin_HEXAGON_V6_vgtub_xor(q64, v64, v64); + __builtin_HEXAGON_V6_vgtub_xor(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtuh __builtin_HEXAGON_V6_vgtuh(v64, v64); // CHECK: @llvm.hexagon.V6.vgtuh.and - __builtin_HEXAGON_V6_vgtuh_and(q64, v64, v64); + __builtin_HEXAGON_V6_vgtuh_and(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtuh.or - __builtin_HEXAGON_V6_vgtuh_or(q64, v64, v64); + __builtin_HEXAGON_V6_vgtuh_or(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtuh.xor - __builtin_HEXAGON_V6_vgtuh_xor(q64, v64, v64); + __builtin_HEXAGON_V6_vgtuh_xor(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtuw __builtin_HEXAGON_V6_vgtuw(v64, v64); // CHECK: @llvm.hexagon.V6.vgtuw.and - __builtin_HEXAGON_V6_vgtuw_and(q64, v64, v64); + __builtin_HEXAGON_V6_vgtuw_and(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtuw.or - __builtin_HEXAGON_V6_vgtuw_or(q64, v64, v64); + __builtin_HEXAGON_V6_vgtuw_or(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtuw.xor - __builtin_HEXAGON_V6_vgtuw_xor(q64, v64, v64); + __builtin_HEXAGON_V6_vgtuw_xor(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtw __builtin_HEXAGON_V6_vgtw(v64, v64); // CHECK: @llvm.hexagon.V6.vgtw.and - __builtin_HEXAGON_V6_vgtw_and(q64, v64, v64); + __builtin_HEXAGON_V6_vgtw_and(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtw.or - __builtin_HEXAGON_V6_vgtw_or(q64, v64, v64); + __builtin_HEXAGON_V6_vgtw_or(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vgtw.xor - __builtin_HEXAGON_V6_vgtw_xor(q64, v64, v64); + __builtin_HEXAGON_V6_vgtw_xor(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vinsertwr __builtin_HEXAGON_V6_vinsertwr(v64, 0); // CHECK: @llvm.hexagon.V6.vlalignb @@ -416,14 +427,6 @@ void test() { __builtin_HEXAGON_V6_vlutvwh_oracci(v128, v64, v64, 0); // CHECK: @llvm.hexagon.V6.vlutvwhi __builtin_HEXAGON_V6_vlutvwhi(v64, v64, 0); - // CHECK: @llvm.hexagon.V6.vmaskedstorenq - __builtin_HEXAGON_V6_vmaskedstorenq(q64, 0, v64); - // CHECK: @llvm.hexagon.V6.vmaskedstorentnq - __builtin_HEXAGON_V6_vmaskedstorentnq(q64, 0, v64); - // CHECK: @llvm.hexagon.V6.vmaskedstorentq - __builtin_HEXAGON_V6_vmaskedstorentq(q64, 0, v64); - // CHECK: @llvm.hexagon.V6.vmaskedstoreq - __builtin_HEXAGON_V6_vmaskedstoreq(q64, 0, v64); // CHECK: @llvm.hexagon.V6.vmaxb __builtin_HEXAGON_V6_vmaxb(v64, v64); // CHECK: @llvm.hexagon.V6.vmaxh @@ -567,7 +570,7 @@ void test() { // CHECK: @llvm.hexagon.V6.vmpyuhv.acc __builtin_HEXAGON_V6_vmpyuhv_acc(v128, v64, v64); // CHECK: @llvm.hexagon.V6.vmux - __builtin_HEXAGON_V6_vmux(q64, v64, v64); + __builtin_HEXAGON_V6_vmux(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vnavgb __builtin_HEXAGON_V6_vnavgb(v64, v64); // CHECK: @llvm.hexagon.V6.vnavgh @@ -603,11 +606,11 @@ void test() { // CHECK: @llvm.hexagon.V6.vpopcounth __builtin_HEXAGON_V6_vpopcounth(v64); // CHECK: @llvm.hexagon.V6.vprefixqb - __builtin_HEXAGON_V6_vprefixqb(q64); + __builtin_HEXAGON_V6_vprefixqb(__builtin_HEXAGON_V6_vandvrt(q64, -1)); // CHECK: @llvm.hexagon.V6.vprefixqh - __builtin_HEXAGON_V6_vprefixqh(q64); + __builtin_HEXAGON_V6_vprefixqh(__builtin_HEXAGON_V6_vandvrt(q64, -1)); // CHECK: @llvm.hexagon.V6.vprefixqw - __builtin_HEXAGON_V6_vprefixqw(q64); + __builtin_HEXAGON_V6_vprefixqw(__builtin_HEXAGON_V6_vandvrt(q64, -1)); // CHECK: @llvm.hexagon.V6.vrdelta __builtin_HEXAGON_V6_vrdelta(v64, v64); // CHECK: @llvm.hexagon.V6.vrmpybub.rtt @@ -677,19 +680,19 @@ void test() { // CHECK: @llvm.hexagon.V6.vscattermh.add __builtin_HEXAGON_V6_vscattermh_add(0, 0, v64, v64); // CHECK: @llvm.hexagon.V6.vscattermhq - __builtin_HEXAGON_V6_vscattermhq(q64, 0, 0, v64, v64); + __builtin_HEXAGON_V6_vscattermhq(__builtin_HEXAGON_V6_vandvrt(q64, -1), 0, 0, v64, v64); // CHECK: @llvm.hexagon.V6.vscattermhw __builtin_HEXAGON_V6_vscattermhw(0, 0, v128, v64); // CHECK: @llvm.hexagon.V6.vscattermhw.add __builtin_HEXAGON_V6_vscattermhw_add(0, 0, v128, v64); // CHECK: @llvm.hexagon.V6.vscattermhwq - __builtin_HEXAGON_V6_vscattermhwq(q64, 0, 0, v128, v64); + __builtin_HEXAGON_V6_vscattermhwq(__builtin_HEXAGON_V6_vandvrt(q64, -1), 0, 0, v128, v64); // CHECK: @llvm.hexagon.V6.vscattermw __builtin_HEXAGON_V6_vscattermw(0, 0, v64, v64); // CHECK: @llvm.hexagon.V6.vscattermw.add __builtin_HEXAGON_V6_vscattermw_add(0, 0, v64, v64); // CHECK: @llvm.hexagon.V6.vscattermwq - __builtin_HEXAGON_V6_vscattermwq(q64, 0, 0, v64, v64); + __builtin_HEXAGON_V6_vscattermwq(__builtin_HEXAGON_V6_vandvrt(q64, -1), 0, 0, v64, v64); // CHECK: @llvm.hexagon.V6.vsh __builtin_HEXAGON_V6_vsh(v64); // CHECK: @llvm.hexagon.V6.vshufeh @@ -715,9 +718,9 @@ void test() { // CHECK: @llvm.hexagon.V6.vsubb.dv __builtin_HEXAGON_V6_vsubb_dv(v128, v128); // CHECK: @llvm.hexagon.V6.vsubbnq - __builtin_HEXAGON_V6_vsubbnq(q64, v64, v64); + __builtin_HEXAGON_V6_vsubbnq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vsubbq - __builtin_HEXAGON_V6_vsubbq(q64, v64, v64); + __builtin_HEXAGON_V6_vsubbq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vsubbsat __builtin_HEXAGON_V6_vsubbsat(v64, v64); // CHECK: @llvm.hexagon.V6.vsubbsat.dv @@ -729,9 +732,9 @@ void test() { // CHECK: @llvm.hexagon.V6.vsubh.dv __builtin_HEXAGON_V6_vsubh_dv(v128, v128); // CHECK: @llvm.hexagon.V6.vsubhnq - __builtin_HEXAGON_V6_vsubhnq(q64, v64, v64); + __builtin_HEXAGON_V6_vsubhnq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vsubhq - __builtin_HEXAGON_V6_vsubhq(q64, v64, v64); + __builtin_HEXAGON_V6_vsubhq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vsubhsat __builtin_HEXAGON_V6_vsubhsat(v64, v64); // CHECK: @llvm.hexagon.V6.vsubhsat.dv @@ -761,15 +764,15 @@ void test() { // CHECK: @llvm.hexagon.V6.vsubw.dv __builtin_HEXAGON_V6_vsubw_dv(v128, v128); // CHECK: @llvm.hexagon.V6.vsubwnq - __builtin_HEXAGON_V6_vsubwnq(q64, v64, v64); + __builtin_HEXAGON_V6_vsubwnq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vsubwq - __builtin_HEXAGON_V6_vsubwq(q64, v64, v64); + __builtin_HEXAGON_V6_vsubwq(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vsubwsat __builtin_HEXAGON_V6_vsubwsat(v64, v64); // CHECK: @llvm.hexagon.V6.vsubwsat.dv __builtin_HEXAGON_V6_vsubwsat_dv(v128, v128); // CHECK: @llvm.hexagon.V6.vswap - __builtin_HEXAGON_V6_vswap(q64, v64, v64); + __builtin_HEXAGON_V6_vswap(__builtin_HEXAGON_V6_vandvrt(q64, -1), v64, v64); // CHECK: @llvm.hexagon.V6.vtmpyb __builtin_HEXAGON_V6_vtmpyb(v128, 0); // CHECK: @llvm.hexagon.V6.vtmpyb.acc diff --git a/llvm/include/llvm/IR/IntrinsicsHexagonDep.td b/llvm/include/llvm/IR/IntrinsicsHexagonDep.td index 6799273bf805..8f0f743d1dec 100644 --- a/llvm/include/llvm/IR/IntrinsicsHexagonDep.td +++ b/llvm/include/llvm/IR/IntrinsicsHexagonDep.td @@ -23,13 +23,6 @@ class Hexagon_i64_i64_Intrinsic; // tag : A2_add -class Hexagon_custom_i32_i32i32_Intrinsic< - list intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< - [llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty], - intr_properties>; - -// tag : A2_addh_h16_hh class Hexagon_i32_i32i32_Intrinsic intr_properties = [IntrNoMem]> : Hexagon_Intrinsic; // tag : A2_addp -class Hexagon_custom_i64_i64i64_Intrinsic< - list intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< - [llvm_i64_ty], [llvm_i64_ty,llvm_i64_ty], - intr_properties>; - -// tag : A2_addpsat class Hexagon_i64_i64i64_Intrinsic intr_properties = [IntrNoMem]> : Hexagon_Intrinsic; -// tag : A2_neg -class Hexagon_custom_i32_i32_Intrinsic< - list intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< - [llvm_i32_ty], [llvm_i32_ty], - intr_properties>; - // tag : A2_roundsat class Hexagon_i32_i64_Intrinsic intr_properties = [IntrNoMem]> @@ -288,20 +267,6 @@ class Hexagon_i64_i64i32i32_Intrinsic; -// tag : M2_dpmpyss_s0 -class Hexagon_custom_i64_i32i32_Intrinsic< - list intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< - [llvm_i64_ty], [llvm_i32_ty,llvm_i32_ty], - intr_properties>; - -// tag : S2_asl_i_p -class Hexagon_custom_i64_i64i32_Intrinsic< - list intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< - [llvm_i64_ty], [llvm_i64_ty,llvm_i32_ty], - intr_properties>; - // tag : S2_insert class Hexagon_i32_i32i32i32i32_Intrinsic intr_properties = [IntrNoMem]> @@ -366,44 +331,44 @@ class Hexagon_v32i32_i32_Intrinsic; // tag : V6_pred_and -class Hexagon_custom_v64i1_v64i1v64i1_Intrinsic< +class Hexagon_v64i1_v64i1v64i1_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_pred_and -class Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B< +class Hexagon_v128i1_v128i1v128i1_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_pred_not -class Hexagon_custom_v64i1_v64i1_Intrinsic< +class Hexagon_v64i1_v64i1_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_pred_not -class Hexagon_custom_v128i1_v128i1_Intrinsic_128B< +class Hexagon_v128i1_v128i1_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_pred_scalar2 -class Hexagon_custom_v64i1_i32_Intrinsic< +class Hexagon_v64i1_i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_pred_scalar2 -class Hexagon_custom_v128i1_i32_Intrinsic_128B< +class Hexagon_v128i1_i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; @@ -436,16 +401,16 @@ class Hexagon_v64i32_v64i32v64i32v64i32i32_Intrinsic; // tag : V6_vS32b_nqpred_ai -class Hexagon_custom__v64i1ptrv16i32_Intrinsic< +class Hexagon__v64i1ptrv16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vS32b_nqpred_ai -class Hexagon_custom__v128i1ptrv32i32_Intrinsic_128B< +class Hexagon__v128i1ptrv32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; @@ -485,16 +450,16 @@ class Hexagon_v64i32_v64i32v64i32_Intrinsic; // tag : V6_vaddbnq -class Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic< +class Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vaddbnq -class Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B< +class Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; @@ -513,16 +478,16 @@ class Hexagon_custom_v32i32v128i1_v32i32v32i32v128i1_Intrinsic_128B< intr_properties>; // tag : V6_vaddcarrysat -class Hexagon_custom_v16i32_v16i32v16i32v64i1_Intrinsic< +class Hexagon_v16i32_v16i32v16i32v64i1_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vaddcarrysat -class Hexagon_custom_v32i32_v32i32v32i32v128i1_Intrinsic_128B< +class Hexagon_v32i32_v32i32v32i32v128i1_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; @@ -562,72 +527,72 @@ class Hexagon_v16i32_v16i32v16i32i32_Intrinsic; // tag : V6_vandnqrt -class Hexagon_custom_v16i32_v64i1i32_Intrinsic< +class Hexagon_v16i32_v64i1i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vandnqrt -class Hexagon_custom_v32i32_v128i1i32_Intrinsic_128B< +class Hexagon_v32i32_v128i1i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vandnqrt_acc -class Hexagon_custom_v16i32_v16i32v64i1i32_Intrinsic< +class Hexagon_v16i32_v16i32v64i1i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vandnqrt_acc -class Hexagon_custom_v32i32_v32i32v128i1i32_Intrinsic_128B< +class Hexagon_v32i32_v32i32v128i1i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vandvnqv -class Hexagon_custom_v16i32_v64i1v16i32_Intrinsic< +class Hexagon_v16i32_v64i1v16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vandvnqv -class Hexagon_custom_v32i32_v128i1v32i32_Intrinsic_128B< +class Hexagon_v32i32_v128i1v32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vandvrt -class Hexagon_custom_v64i1_v16i32i32_Intrinsic< +class Hexagon_v64i1_v16i32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vandvrt -class Hexagon_custom_v128i1_v32i32i32_Intrinsic_128B< +class Hexagon_v128i1_v32i32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vandvrt_acc -class Hexagon_custom_v64i1_v64i1v16i32i32_Intrinsic< +class Hexagon_v64i1_v64i1v16i32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vandvrt_acc -class Hexagon_custom_v128i1_v128i1v32i32i32_Intrinsic_128B< +class Hexagon_v128i1_v128i1v32i32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; @@ -737,30 +702,30 @@ class Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic; // tag : V6_veqb -class Hexagon_custom_v64i1_v16i32v16i32_Intrinsic< +class Hexagon_v64i1_v16i32v16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_veqb -class Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B< +class Hexagon_v128i1_v32i32v32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_veqb_and -class Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic< +class Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_veqb_and -class Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B< +class Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; @@ -779,16 +744,16 @@ class Hexagon__ptri32i32v32i32_Intrinsic; // tag : V6_vgathermhq -class Hexagon_custom__ptrv64i1i32i32v16i32_Intrinsic< +class Hexagon__ptrv64i1i32i32v16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vgathermhq -class Hexagon_custom__ptrv128i1i32i32v32i32_Intrinsic_128B< +class Hexagon__ptrv128i1i32i32v32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; @@ -800,16 +765,16 @@ class Hexagon__ptri32i32v64i32_Intrinsic; // tag : V6_vgathermhwq -class Hexagon_custom__ptrv64i1i32i32v32i32_Intrinsic< +class Hexagon__ptrv64i1i32i32v32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vgathermhwq -class Hexagon_custom__ptrv128i1i32i32v64i32_Intrinsic_128B< +class Hexagon__ptrv128i1i32i32v64i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; @@ -891,16 +856,16 @@ class Hexagon_v64i32_v64i32v32i32i32_Intrinsic; // tag : V6_vprefixqb -class Hexagon_custom_v16i32_v64i1_Intrinsic< +class Hexagon_v16i32_v64i1_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vprefixqb -class Hexagon_custom_v32i32_v128i1_Intrinsic_128B< +class Hexagon_v32i32_v128i1_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; @@ -961,16 +926,16 @@ class Hexagon__i32i32v32i32v32i32_Intrinsic; // tag : V6_vscattermhq -class Hexagon_custom__v64i1i32i32v16i32v16i32_Intrinsic< +class Hexagon__v64i1i32i32v16i32v16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vscattermhq -class Hexagon_custom__v128i1i32i32v32i32v32i32_Intrinsic_128B< +class Hexagon__v128i1i32i32v32i32v32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; @@ -989,30 +954,30 @@ class Hexagon__i32i32v64i32v32i32_Intrinsic; // tag : V6_vscattermhwq -class Hexagon_custom__v64i1i32i32v32i32v16i32_Intrinsic< +class Hexagon__v64i1i32i32v32i32v16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vscattermhwq -class Hexagon_custom__v128i1i32i32v64i32v32i32_Intrinsic_128B< +class Hexagon__v128i1i32i32v64i32v32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vswap -class Hexagon_custom_v32i32_v64i1v16i32v16i32_Intrinsic< +class Hexagon_v32i32_v64i1v16i32v16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; // tag : V6_vswap -class Hexagon_custom_v64i32_v128i1v32i32v32i32_Intrinsic_128B< +class Hexagon_v64i32_v128i1v32i32v32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_NonGCC_Intrinsic< + : Hexagon_Intrinsic; @@ -1077,7 +1042,7 @@ def int_hexagon_A2_abssat : Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_abssat">; def int_hexagon_A2_add : -Hexagon_custom_i32_i32i32_Intrinsic; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_add">; def int_hexagon_A2_addh_h16_hh : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_h16_hh">; @@ -1116,10 +1081,10 @@ def int_hexagon_A2_addh_l16_sat_ll : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addh_l16_sat_ll">; def int_hexagon_A2_addi : -Hexagon_custom_i32_i32i32_Intrinsic<[IntrNoMem, ImmArg>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_addi", [IntrNoMem, ImmArg>]>; def int_hexagon_A2_addp : -Hexagon_custom_i64_i64i64_Intrinsic; +Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_addp">; def int_hexagon_A2_addpsat : Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_addpsat">; @@ -1131,10 +1096,10 @@ def int_hexagon_A2_addsp : Hexagon_i64_i32i64_Intrinsic<"HEXAGON_A2_addsp">; def int_hexagon_A2_and : -Hexagon_custom_i32_i32i32_Intrinsic; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_and">; def int_hexagon_A2_andir : -Hexagon_custom_i32_i32i32_Intrinsic<[IntrNoMem, ImmArg>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_andir", [IntrNoMem, ImmArg>]>; def int_hexagon_A2_andp : Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_andp">; @@ -1188,7 +1153,7 @@ def int_hexagon_A2_minup : Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_minup">; def int_hexagon_A2_neg : -Hexagon_custom_i32_i32_Intrinsic; +Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_neg">; def int_hexagon_A2_negp : Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_negp">; @@ -1197,16 +1162,16 @@ def int_hexagon_A2_negsat : Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_negsat">; def int_hexagon_A2_not : -Hexagon_custom_i32_i32_Intrinsic; +Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_not">; def int_hexagon_A2_notp : Hexagon_i64_i64_Intrinsic<"HEXAGON_A2_notp">; def int_hexagon_A2_or : -Hexagon_custom_i32_i32i32_Intrinsic; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_or">; def int_hexagon_A2_orir : -Hexagon_custom_i32_i32i32_Intrinsic<[IntrNoMem, ImmArg>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_orir", [IntrNoMem, ImmArg>]>; def int_hexagon_A2_orp : Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_orp">; @@ -1230,7 +1195,7 @@ def int_hexagon_A2_satuh : Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_satuh">; def int_hexagon_A2_sub : -Hexagon_custom_i32_i32i32_Intrinsic; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_sub">; def int_hexagon_A2_subh_h16_hh : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_h16_hh">; @@ -1269,10 +1234,10 @@ def int_hexagon_A2_subh_l16_sat_ll : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subh_l16_sat_ll">; def int_hexagon_A2_subp : -Hexagon_custom_i64_i64i64_Intrinsic; +Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_subp">; def int_hexagon_A2_subri : -Hexagon_custom_i32_i32i32_Intrinsic<[IntrNoMem, ImmArg>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subri", [IntrNoMem, ImmArg>]>; def int_hexagon_A2_subsat : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_subsat">; @@ -1308,10 +1273,10 @@ def int_hexagon_A2_swiz : Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_swiz">; def int_hexagon_A2_sxtb : -Hexagon_custom_i32_i32_Intrinsic; +Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_sxtb">; def int_hexagon_A2_sxth : -Hexagon_custom_i32_i32_Intrinsic; +Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_sxth">; def int_hexagon_A2_sxtw : Hexagon_i64_i32_Intrinsic<"HEXAGON_A2_sxtw">; @@ -1524,16 +1489,16 @@ def int_hexagon_A2_vsubws : Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_vsubws">; def int_hexagon_A2_xor : -Hexagon_custom_i32_i32i32_Intrinsic; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A2_xor">; def int_hexagon_A2_xorp : Hexagon_i64_i64i64_Intrinsic<"HEXAGON_A2_xorp">; def int_hexagon_A2_zxtb : -Hexagon_custom_i32_i32_Intrinsic; +Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_zxtb">; def int_hexagon_A2_zxth : -Hexagon_custom_i32_i32_Intrinsic; +Hexagon_i32_i32_Intrinsic<"HEXAGON_A2_zxth">; def int_hexagon_A4_andn : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_A4_andn">; @@ -2088,7 +2053,7 @@ def int_hexagon_M2_dpmpyss_rnd_s0 : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_dpmpyss_rnd_s0">; def int_hexagon_M2_dpmpyss_s0 : -Hexagon_custom_i64_i32i32_Intrinsic; +Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_dpmpyss_s0">; def int_hexagon_M2_dpmpyuu_acc_s0 : Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_dpmpyuu_acc_s0">; @@ -2097,7 +2062,7 @@ def int_hexagon_M2_dpmpyuu_nac_s0 : Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_dpmpyuu_nac_s0">; def int_hexagon_M2_dpmpyuu_s0 : -Hexagon_custom_i64_i32i32_Intrinsic; +Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_dpmpyuu_s0">; def int_hexagon_M2_hmmpyh_rs1 : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_hmmpyh_rs1">; @@ -2514,10 +2479,10 @@ def int_hexagon_M2_mpyd_rnd_ll_s1 : Hexagon_i64_i32i32_Intrinsic<"HEXAGON_M2_mpyd_rnd_ll_s1">; def int_hexagon_M2_mpyi : -Hexagon_custom_i32_i32i32_Intrinsic; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyi">; def int_hexagon_M2_mpysmi : -Hexagon_custom_i32_i32i32_Intrinsic<[IntrNoMem, ImmArg>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysmi", [IntrNoMem, ImmArg>]>; def int_hexagon_M2_mpysu_up : Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpysu_up">; @@ -2670,7 +2635,7 @@ def int_hexagon_M2_mpyud_nac_ll_s1 : Hexagon_i64_i64i32i32_Intrinsic<"HEXAGON_M2_mpyud_nac_ll_s1">; def int_hexagon_M2_mpyui : -Hexagon_custom_i32_i32i32_Intrinsic; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_M2_mpyui">; def int_hexagon_M2_nacci : Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_M2_nacci">; @@ -2958,7 +2923,7 @@ def int_hexagon_S2_addasl_rrri : Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_addasl_rrri", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_i_p : -Hexagon_custom_i64_i64i32_Intrinsic<[IntrNoMem, ImmArg>]>; +Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_i_p", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_i_p_acc : Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_acc", [IntrNoMem, ImmArg>]>; @@ -2976,7 +2941,7 @@ def int_hexagon_S2_asl_i_p_xacc : Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asl_i_p_xacc", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_i_r : -Hexagon_custom_i32_i32i32_Intrinsic<[IntrNoMem, ImmArg>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asl_i_r", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asl_i_r_acc : Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asl_i_r_acc", [IntrNoMem, ImmArg>]>; @@ -3045,7 +3010,7 @@ def int_hexagon_S2_asl_r_vw : Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asl_r_vw">; def int_hexagon_S2_asr_i_p : -Hexagon_custom_i64_i64i32_Intrinsic<[IntrNoMem, ImmArg>]>; +Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asr_i_p_acc : Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_acc", [IntrNoMem, ImmArg>]>; @@ -3066,7 +3031,7 @@ def int_hexagon_S2_asr_i_p_rnd_goodsyntax : Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_asr_i_p_rnd_goodsyntax", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asr_i_r : -Hexagon_custom_i32_i32i32_Intrinsic<[IntrNoMem, ImmArg>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_asr_i_r", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_asr_i_r_acc : Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_asr_i_r_acc", [IntrNoMem, ImmArg>]>; @@ -3258,7 +3223,7 @@ def int_hexagon_S2_lsl_r_vw : Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsl_r_vw">; def int_hexagon_S2_lsr_i_p : -Hexagon_custom_i64_i64i32_Intrinsic<[IntrNoMem, ImmArg>]>; +Hexagon_i64_i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_lsr_i_p_acc : Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_acc", [IntrNoMem, ImmArg>]>; @@ -3276,7 +3241,7 @@ def int_hexagon_S2_lsr_i_p_xacc : Hexagon_i64_i64i64i32_Intrinsic<"HEXAGON_S2_lsr_i_p_xacc", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_lsr_i_r : -Hexagon_custom_i32_i32i32_Intrinsic<[IntrNoMem, ImmArg>]>; +Hexagon_i32_i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r", [IntrNoMem, ImmArg>]>; def int_hexagon_S2_lsr_i_r_acc : Hexagon_i32_i32i32i32_Intrinsic<"HEXAGON_S2_lsr_i_r_acc", [IntrNoMem, ImmArg>]>; @@ -3809,70 +3774,70 @@ def int_hexagon_V6_lvsplatw_128B : Hexagon_v32i32_i32_Intrinsic<"HEXAGON_V6_lvsplatw_128B">; def int_hexagon_V6_pred_and : -Hexagon_custom_v64i1_v64i1v64i1_Intrinsic; +Hexagon_v64i1_v64i1v64i1_Intrinsic<"HEXAGON_V6_pred_and">; def int_hexagon_V6_pred_and_128B : -Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B; +Hexagon_v128i1_v128i1v128i1_Intrinsic<"HEXAGON_V6_pred_and_128B">; def int_hexagon_V6_pred_and_n : -Hexagon_custom_v64i1_v64i1v64i1_Intrinsic; +Hexagon_v64i1_v64i1v64i1_Intrinsic<"HEXAGON_V6_pred_and_n">; def int_hexagon_V6_pred_and_n_128B : -Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B; +Hexagon_v128i1_v128i1v128i1_Intrinsic<"HEXAGON_V6_pred_and_n_128B">; def int_hexagon_V6_pred_not : -Hexagon_custom_v64i1_v64i1_Intrinsic; +Hexagon_v64i1_v64i1_Intrinsic<"HEXAGON_V6_pred_not">; def int_hexagon_V6_pred_not_128B : -Hexagon_custom_v128i1_v128i1_Intrinsic_128B; +Hexagon_v128i1_v128i1_Intrinsic<"HEXAGON_V6_pred_not_128B">; def int_hexagon_V6_pred_or : -Hexagon_custom_v64i1_v64i1v64i1_Intrinsic; +Hexagon_v64i1_v64i1v64i1_Intrinsic<"HEXAGON_V6_pred_or">; def int_hexagon_V6_pred_or_128B : -Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B; +Hexagon_v128i1_v128i1v128i1_Intrinsic<"HEXAGON_V6_pred_or_128B">; def int_hexagon_V6_pred_or_n : -Hexagon_custom_v64i1_v64i1v64i1_Intrinsic; +Hexagon_v64i1_v64i1v64i1_Intrinsic<"HEXAGON_V6_pred_or_n">; def int_hexagon_V6_pred_or_n_128B : -Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B; +Hexagon_v128i1_v128i1v128i1_Intrinsic<"HEXAGON_V6_pred_or_n_128B">; def int_hexagon_V6_pred_scalar2 : -Hexagon_custom_v64i1_i32_Intrinsic; +Hexagon_v64i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2">; def int_hexagon_V6_pred_scalar2_128B : -Hexagon_custom_v128i1_i32_Intrinsic_128B; +Hexagon_v128i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2_128B">; def int_hexagon_V6_pred_xor : -Hexagon_custom_v64i1_v64i1v64i1_Intrinsic; +Hexagon_v64i1_v64i1v64i1_Intrinsic<"HEXAGON_V6_pred_xor">; def int_hexagon_V6_pred_xor_128B : -Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B; +Hexagon_v128i1_v128i1v128i1_Intrinsic<"HEXAGON_V6_pred_xor_128B">; def int_hexagon_V6_vS32b_nqpred_ai : -Hexagon_custom__v64i1ptrv16i32_Intrinsic<[IntrWriteMem]>; +Hexagon__v64i1ptrv16i32_Intrinsic<"HEXAGON_V6_vS32b_nqpred_ai", [IntrWriteMem]>; def int_hexagon_V6_vS32b_nqpred_ai_128B : -Hexagon_custom__v128i1ptrv32i32_Intrinsic_128B<[IntrWriteMem]>; +Hexagon__v128i1ptrv32i32_Intrinsic<"HEXAGON_V6_vS32b_nqpred_ai_128B", [IntrWriteMem]>; def int_hexagon_V6_vS32b_nt_nqpred_ai : -Hexagon_custom__v64i1ptrv16i32_Intrinsic<[IntrWriteMem]>; +Hexagon__v64i1ptrv16i32_Intrinsic<"HEXAGON_V6_vS32b_nt_nqpred_ai", [IntrWriteMem]>; def int_hexagon_V6_vS32b_nt_nqpred_ai_128B : -Hexagon_custom__v128i1ptrv32i32_Intrinsic_128B<[IntrWriteMem]>; +Hexagon__v128i1ptrv32i32_Intrinsic<"HEXAGON_V6_vS32b_nt_nqpred_ai_128B", [IntrWriteMem]>; def int_hexagon_V6_vS32b_nt_qpred_ai : -Hexagon_custom__v64i1ptrv16i32_Intrinsic<[IntrWriteMem]>; +Hexagon__v64i1ptrv16i32_Intrinsic<"HEXAGON_V6_vS32b_nt_qpred_ai", [IntrWriteMem]>; def int_hexagon_V6_vS32b_nt_qpred_ai_128B : -Hexagon_custom__v128i1ptrv32i32_Intrinsic_128B<[IntrWriteMem]>; +Hexagon__v128i1ptrv32i32_Intrinsic<"HEXAGON_V6_vS32b_nt_qpred_ai_128B", [IntrWriteMem]>; def int_hexagon_V6_vS32b_qpred_ai : -Hexagon_custom__v64i1ptrv16i32_Intrinsic<[IntrWriteMem]>; +Hexagon__v64i1ptrv16i32_Intrinsic<"HEXAGON_V6_vS32b_qpred_ai", [IntrWriteMem]>; def int_hexagon_V6_vS32b_qpred_ai_128B : -Hexagon_custom__v128i1ptrv32i32_Intrinsic_128B<[IntrWriteMem]>; +Hexagon__v128i1ptrv32i32_Intrinsic<"HEXAGON_V6_vS32b_qpred_ai_128B", [IntrWriteMem]>; def int_hexagon_V6_vabsdiffh : Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vabsdiffh">; @@ -3935,16 +3900,16 @@ def int_hexagon_V6_vaddb_dv_128B : Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vaddb_dv_128B">; def int_hexagon_V6_vaddbnq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddbnq">; def int_hexagon_V6_vaddbnq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddbnq_128B">; def int_hexagon_V6_vaddbq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddbq">; def int_hexagon_V6_vaddbq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddbq_128B">; def int_hexagon_V6_vaddh : Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddh">; @@ -3959,16 +3924,16 @@ def int_hexagon_V6_vaddh_dv_128B : Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vaddh_dv_128B">; def int_hexagon_V6_vaddhnq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddhnq">; def int_hexagon_V6_vaddhnq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddhnq_128B">; def int_hexagon_V6_vaddhq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddhq">; def int_hexagon_V6_vaddhq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddhq_128B">; def int_hexagon_V6_vaddhsat : Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddhsat">; @@ -4037,16 +4002,16 @@ def int_hexagon_V6_vaddw_dv_128B : Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vaddw_dv_128B">; def int_hexagon_V6_vaddwnq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddwnq">; def int_hexagon_V6_vaddwnq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddwnq_128B">; def int_hexagon_V6_vaddwq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddwq">; def int_hexagon_V6_vaddwq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vaddwq_128B">; def int_hexagon_V6_vaddwsat : Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddwsat">; @@ -4079,28 +4044,28 @@ def int_hexagon_V6_vand_128B : Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vand_128B">; def int_hexagon_V6_vandqrt : -Hexagon_custom_v16i32_v64i1i32_Intrinsic; +Hexagon_v16i32_v64i1i32_Intrinsic<"HEXAGON_V6_vandqrt">; def int_hexagon_V6_vandqrt_128B : -Hexagon_custom_v32i32_v128i1i32_Intrinsic_128B; +Hexagon_v32i32_v128i1i32_Intrinsic<"HEXAGON_V6_vandqrt_128B">; def int_hexagon_V6_vandqrt_acc : -Hexagon_custom_v16i32_v16i32v64i1i32_Intrinsic; +Hexagon_v16i32_v16i32v64i1i32_Intrinsic<"HEXAGON_V6_vandqrt_acc">; def int_hexagon_V6_vandqrt_acc_128B : -Hexagon_custom_v32i32_v32i32v128i1i32_Intrinsic_128B; +Hexagon_v32i32_v32i32v128i1i32_Intrinsic<"HEXAGON_V6_vandqrt_acc_128B">; def int_hexagon_V6_vandvrt : -Hexagon_custom_v64i1_v16i32i32_Intrinsic; +Hexagon_v64i1_v16i32i32_Intrinsic<"HEXAGON_V6_vandvrt">; def int_hexagon_V6_vandvrt_128B : -Hexagon_custom_v128i1_v32i32i32_Intrinsic_128B; +Hexagon_v128i1_v32i32i32_Intrinsic<"HEXAGON_V6_vandvrt_128B">; def int_hexagon_V6_vandvrt_acc : -Hexagon_custom_v64i1_v64i1v16i32i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32i32_Intrinsic<"HEXAGON_V6_vandvrt_acc">; def int_hexagon_V6_vandvrt_acc_128B : -Hexagon_custom_v128i1_v128i1v32i32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32i32_Intrinsic<"HEXAGON_V6_vandvrt_acc_128B">; def int_hexagon_V6_vaslh : Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vaslh">; @@ -4439,220 +4404,220 @@ def int_hexagon_V6_vdsaduh_acc_128B : Hexagon_v64i32_v64i32v64i32i32_Intrinsic<"HEXAGON_V6_vdsaduh_acc_128B">; def int_hexagon_V6_veqb : -Hexagon_custom_v64i1_v16i32v16i32_Intrinsic; +Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb">; def int_hexagon_V6_veqb_128B : -Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_128B">; def int_hexagon_V6_veqb_and : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb_and">; def int_hexagon_V6_veqb_and_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_and_128B">; def int_hexagon_V6_veqb_or : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb_or">; def int_hexagon_V6_veqb_or_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_or_128B">; def int_hexagon_V6_veqb_xor : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqb_xor">; def int_hexagon_V6_veqb_xor_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqb_xor_128B">; def int_hexagon_V6_veqh : -Hexagon_custom_v64i1_v16i32v16i32_Intrinsic; +Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh">; def int_hexagon_V6_veqh_128B : -Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_128B">; def int_hexagon_V6_veqh_and : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh_and">; def int_hexagon_V6_veqh_and_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_and_128B">; def int_hexagon_V6_veqh_or : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh_or">; def int_hexagon_V6_veqh_or_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_or_128B">; def int_hexagon_V6_veqh_xor : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqh_xor">; def int_hexagon_V6_veqh_xor_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqh_xor_128B">; def int_hexagon_V6_veqw : -Hexagon_custom_v64i1_v16i32v16i32_Intrinsic; +Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw">; def int_hexagon_V6_veqw_128B : -Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_128B">; def int_hexagon_V6_veqw_and : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw_and">; def int_hexagon_V6_veqw_and_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_and_128B">; def int_hexagon_V6_veqw_or : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw_or">; def int_hexagon_V6_veqw_or_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_or_128B">; def int_hexagon_V6_veqw_xor : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_veqw_xor">; def int_hexagon_V6_veqw_xor_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_veqw_xor_128B">; def int_hexagon_V6_vgtb : -Hexagon_custom_v64i1_v16i32v16i32_Intrinsic; +Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb">; def int_hexagon_V6_vgtb_128B : -Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_128B">; def int_hexagon_V6_vgtb_and : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb_and">; def int_hexagon_V6_vgtb_and_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_and_128B">; def int_hexagon_V6_vgtb_or : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb_or">; def int_hexagon_V6_vgtb_or_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_or_128B">; def int_hexagon_V6_vgtb_xor : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtb_xor">; def int_hexagon_V6_vgtb_xor_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtb_xor_128B">; def int_hexagon_V6_vgth : -Hexagon_custom_v64i1_v16i32v16i32_Intrinsic; +Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth">; def int_hexagon_V6_vgth_128B : -Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_128B">; def int_hexagon_V6_vgth_and : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth_and">; def int_hexagon_V6_vgth_and_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_and_128B">; def int_hexagon_V6_vgth_or : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth_or">; def int_hexagon_V6_vgth_or_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_or_128B">; def int_hexagon_V6_vgth_xor : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgth_xor">; def int_hexagon_V6_vgth_xor_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgth_xor_128B">; def int_hexagon_V6_vgtub : -Hexagon_custom_v64i1_v16i32v16i32_Intrinsic; +Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub">; def int_hexagon_V6_vgtub_128B : -Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_128B">; def int_hexagon_V6_vgtub_and : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub_and">; def int_hexagon_V6_vgtub_and_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_and_128B">; def int_hexagon_V6_vgtub_or : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub_or">; def int_hexagon_V6_vgtub_or_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_or_128B">; def int_hexagon_V6_vgtub_xor : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtub_xor">; def int_hexagon_V6_vgtub_xor_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtub_xor_128B">; def int_hexagon_V6_vgtuh : -Hexagon_custom_v64i1_v16i32v16i32_Intrinsic; +Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh">; def int_hexagon_V6_vgtuh_128B : -Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_128B">; def int_hexagon_V6_vgtuh_and : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh_and">; def int_hexagon_V6_vgtuh_and_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_and_128B">; def int_hexagon_V6_vgtuh_or : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh_or">; def int_hexagon_V6_vgtuh_or_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_or_128B">; def int_hexagon_V6_vgtuh_xor : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuh_xor">; def int_hexagon_V6_vgtuh_xor_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuh_xor_128B">; def int_hexagon_V6_vgtuw : -Hexagon_custom_v64i1_v16i32v16i32_Intrinsic; +Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw">; def int_hexagon_V6_vgtuw_128B : -Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_128B">; def int_hexagon_V6_vgtuw_and : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw_and">; def int_hexagon_V6_vgtuw_and_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_and_128B">; def int_hexagon_V6_vgtuw_or : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw_or">; def int_hexagon_V6_vgtuw_or_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_or_128B">; def int_hexagon_V6_vgtuw_xor : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtuw_xor">; def int_hexagon_V6_vgtuw_xor_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtuw_xor_128B">; def int_hexagon_V6_vgtw : -Hexagon_custom_v64i1_v16i32v16i32_Intrinsic; +Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw">; def int_hexagon_V6_vgtw_128B : -Hexagon_custom_v128i1_v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_128B">; def int_hexagon_V6_vgtw_and : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw_and">; def int_hexagon_V6_vgtw_and_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_and_128B">; def int_hexagon_V6_vgtw_or : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw_or">; def int_hexagon_V6_vgtw_or_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_or_128B">; def int_hexagon_V6_vgtw_xor : -Hexagon_custom_v64i1_v64i1v16i32v16i32_Intrinsic; +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtw_xor">; def int_hexagon_V6_vgtw_xor_128B : -Hexagon_custom_v128i1_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtw_xor_128B">; def int_hexagon_V6_vinsertwr : Hexagon_v16i32_v16i32i32_Intrinsic<"HEXAGON_V6_vinsertwr">; @@ -5051,10 +5016,10 @@ def int_hexagon_V6_vmpyuhv_acc_128B : Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyuhv_acc_128B">; def int_hexagon_V6_vmux : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vmux">; def int_hexagon_V6_vmux_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vmux_128B">; def int_hexagon_V6_vnavgh : Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vnavgh">; @@ -5375,16 +5340,16 @@ def int_hexagon_V6_vsubb_dv_128B : Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubb_dv_128B">; def int_hexagon_V6_vsubbnq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubbnq">; def int_hexagon_V6_vsubbnq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubbnq_128B">; def int_hexagon_V6_vsubbq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubbq">; def int_hexagon_V6_vsubbq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubbq_128B">; def int_hexagon_V6_vsubh : Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubh">; @@ -5399,16 +5364,16 @@ def int_hexagon_V6_vsubh_dv_128B : Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubh_dv_128B">; def int_hexagon_V6_vsubhnq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubhnq">; def int_hexagon_V6_vsubhnq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubhnq_128B">; def int_hexagon_V6_vsubhq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubhq">; def int_hexagon_V6_vsubhq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubhq_128B">; def int_hexagon_V6_vsubhsat : Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubhsat">; @@ -5477,16 +5442,16 @@ def int_hexagon_V6_vsubw_dv_128B : Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubw_dv_128B">; def int_hexagon_V6_vsubwnq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubwnq">; def int_hexagon_V6_vsubwnq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubwnq_128B">; def int_hexagon_V6_vsubwq : -Hexagon_custom_v16i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubwq">; def int_hexagon_V6_vsubwq_128B : -Hexagon_custom_v32i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vsubwq_128B">; def int_hexagon_V6_vsubwsat : Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsubwsat">; @@ -5501,10 +5466,10 @@ def int_hexagon_V6_vsubwsat_dv_128B : Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vsubwsat_dv_128B">; def int_hexagon_V6_vswap : -Hexagon_custom_v32i32_v64i1v16i32v16i32_Intrinsic; +Hexagon_v32i32_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vswap">; def int_hexagon_V6_vswap_128B : -Hexagon_custom_v64i32_v128i1v32i32v32i32_Intrinsic_128B; +Hexagon_v64i32_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vswap_128B">; def int_hexagon_V6_vtmpyb : Hexagon_v32i32_v32i32i32_Intrinsic<"HEXAGON_V6_vtmpyb">; @@ -5611,22 +5576,22 @@ def int_hexagon_V6_lvsplath_128B : Hexagon_v32i32_i32_Intrinsic<"HEXAGON_V6_lvsplath_128B">; def int_hexagon_V6_pred_scalar2v2 : -Hexagon_custom_v64i1_i32_Intrinsic; +Hexagon_v64i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2v2">; def int_hexagon_V6_pred_scalar2v2_128B : -Hexagon_custom_v128i1_i32_Intrinsic_128B; +Hexagon_v128i1_i32_Intrinsic<"HEXAGON_V6_pred_scalar2v2_128B">; def int_hexagon_V6_shuffeqh : -Hexagon_custom_v64i1_v64i1v64i1_Intrinsic; +Hexagon_v64i1_v64i1v64i1_Intrinsic<"HEXAGON_V6_shuffeqh">; def int_hexagon_V6_shuffeqh_128B : -Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B; +Hexagon_v128i1_v128i1v128i1_Intrinsic<"HEXAGON_V6_shuffeqh_128B">; def int_hexagon_V6_shuffeqw : -Hexagon_custom_v64i1_v64i1v64i1_Intrinsic; +Hexagon_v64i1_v64i1v64i1_Intrinsic<"HEXAGON_V6_shuffeqw">; def int_hexagon_V6_shuffeqw_128B : -Hexagon_custom_v128i1_v128i1v128i1_Intrinsic_128B; +Hexagon_v128i1_v128i1v128i1_Intrinsic<"HEXAGON_V6_shuffeqw_128B">; def int_hexagon_V6_vaddbsat : Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vaddbsat">; @@ -5695,28 +5660,28 @@ def int_hexagon_V6_vadduwsat_dv_128B : Hexagon_v64i32_v64i32v64i32_Intrinsic<"HEXAGON_V6_vadduwsat_dv_128B">; def int_hexagon_V6_vandnqrt : -Hexagon_custom_v16i32_v64i1i32_Intrinsic; +Hexagon_v16i32_v64i1i32_Intrinsic<"HEXAGON_V6_vandnqrt">; def int_hexagon_V6_vandnqrt_128B : -Hexagon_custom_v32i32_v128i1i32_Intrinsic_128B; +Hexagon_v32i32_v128i1i32_Intrinsic<"HEXAGON_V6_vandnqrt_128B">; def int_hexagon_V6_vandnqrt_acc : -Hexagon_custom_v16i32_v16i32v64i1i32_Intrinsic; +Hexagon_v16i32_v16i32v64i1i32_Intrinsic<"HEXAGON_V6_vandnqrt_acc">; def int_hexagon_V6_vandnqrt_acc_128B : -Hexagon_custom_v32i32_v32i32v128i1i32_Intrinsic_128B; +Hexagon_v32i32_v32i32v128i1i32_Intrinsic<"HEXAGON_V6_vandnqrt_acc_128B">; def int_hexagon_V6_vandvnqv : -Hexagon_custom_v16i32_v64i1v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32_Intrinsic<"HEXAGON_V6_vandvnqv">; def int_hexagon_V6_vandvnqv_128B : -Hexagon_custom_v32i32_v128i1v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32_Intrinsic<"HEXAGON_V6_vandvnqv_128B">; def int_hexagon_V6_vandvqv : -Hexagon_custom_v16i32_v64i1v16i32_Intrinsic; +Hexagon_v16i32_v64i1v16i32_Intrinsic<"HEXAGON_V6_vandvqv">; def int_hexagon_V6_vandvqv_128B : -Hexagon_custom_v32i32_v128i1v32i32_Intrinsic_128B; +Hexagon_v32i32_v128i1v32i32_Intrinsic<"HEXAGON_V6_vandvqv_128B">; def int_hexagon_V6_vasrhbsat : Hexagon_v16i32_v16i32v16i32i32_Intrinsic<"HEXAGON_V6_vasrhbsat">; @@ -5961,10 +5926,10 @@ def int_hexagon_V6_vgathermh_128B : Hexagon__ptri32i32v32i32_Intrinsic<"HEXAGON_V6_vgathermh_128B", [IntrArgMemOnly]>; def int_hexagon_V6_vgathermhq : -Hexagon_custom__ptrv64i1i32i32v16i32_Intrinsic<[IntrArgMemOnly]>; +Hexagon__ptrv64i1i32i32v16i32_Intrinsic<"HEXAGON_V6_vgathermhq", [IntrArgMemOnly]>; def int_hexagon_V6_vgathermhq_128B : -Hexagon_custom__ptrv128i1i32i32v32i32_Intrinsic_128B<[IntrArgMemOnly]>; +Hexagon__ptrv128i1i32i32v32i32_Intrinsic<"HEXAGON_V6_vgathermhq_128B", [IntrArgMemOnly]>; def int_hexagon_V6_vgathermhw : Hexagon__ptri32i32v32i32_Intrinsic<"HEXAGON_V6_vgathermhw", [IntrArgMemOnly]>; @@ -5973,10 +5938,10 @@ def int_hexagon_V6_vgathermhw_128B : Hexagon__ptri32i32v64i32_Intrinsic<"HEXAGON_V6_vgathermhw_128B", [IntrArgMemOnly]>; def int_hexagon_V6_vgathermhwq : -Hexagon_custom__ptrv64i1i32i32v32i32_Intrinsic<[IntrArgMemOnly]>; +Hexagon__ptrv64i1i32i32v32i32_Intrinsic<"HEXAGON_V6_vgathermhwq", [IntrArgMemOnly]>; def int_hexagon_V6_vgathermhwq_128B : -Hexagon_custom__ptrv128i1i32i32v64i32_Intrinsic_128B<[IntrArgMemOnly]>; +Hexagon__ptrv128i1i32i32v64i32_Intrinsic<"HEXAGON_V6_vgathermhwq_128B", [IntrArgMemOnly]>; def int_hexagon_V6_vgathermw : Hexagon__ptri32i32v16i32_Intrinsic<"HEXAGON_V6_vgathermw", [IntrArgMemOnly]>; @@ -5985,10 +5950,10 @@ def int_hexagon_V6_vgathermw_128B : Hexagon__ptri32i32v32i32_Intrinsic<"HEXAGON_V6_vgathermw_128B", [IntrArgMemOnly]>; def int_hexagon_V6_vgathermwq : -Hexagon_custom__ptrv64i1i32i32v16i32_Intrinsic<[IntrArgMemOnly]>; +Hexagon__ptrv64i1i32i32v16i32_Intrinsic<"HEXAGON_V6_vgathermwq", [IntrArgMemOnly]>; def int_hexagon_V6_vgathermwq_128B : -Hexagon_custom__ptrv128i1i32i32v32i32_Intrinsic_128B<[IntrArgMemOnly]>; +Hexagon__ptrv128i1i32i32v32i32_Intrinsic<"HEXAGON_V6_vgathermwq_128B", [IntrArgMemOnly]>; def int_hexagon_V6_vlut4 : Hexagon_v16i32_v16i32i64_Intrinsic<"HEXAGON_V6_vlut4">; @@ -6051,22 +6016,22 @@ def int_hexagon_V6_vnavgb_128B : Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vnavgb_128B">; def int_hexagon_V6_vprefixqb : -Hexagon_custom_v16i32_v64i1_Intrinsic; +Hexagon_v16i32_v64i1_Intrinsic<"HEXAGON_V6_vprefixqb">; def int_hexagon_V6_vprefixqb_128B : -Hexagon_custom_v32i32_v128i1_Intrinsic_128B; +Hexagon_v32i32_v128i1_Intrinsic<"HEXAGON_V6_vprefixqb_128B">; def int_hexagon_V6_vprefixqh : -Hexagon_custom_v16i32_v64i1_Intrinsic; +Hexagon_v16i32_v64i1_Intrinsic<"HEXAGON_V6_vprefixqh">; def int_hexagon_V6_vprefixqh_128B : -Hexagon_custom_v32i32_v128i1_Intrinsic_128B; +Hexagon_v32i32_v128i1_Intrinsic<"HEXAGON_V6_vprefixqh_128B">; def int_hexagon_V6_vprefixqw : -Hexagon_custom_v16i32_v64i1_Intrinsic; +Hexagon_v16i32_v64i1_Intrinsic<"HEXAGON_V6_vprefixqw">; def int_hexagon_V6_vprefixqw_128B : -Hexagon_custom_v32i32_v128i1_Intrinsic_128B; +Hexagon_v32i32_v128i1_Intrinsic<"HEXAGON_V6_vprefixqw_128B">; def int_hexagon_V6_vscattermh : Hexagon__i32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vscattermh", [IntrWriteMem]>; @@ -6081,10 +6046,10 @@ def int_hexagon_V6_vscattermh_add_128B : Hexagon__i32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vscattermh_add_128B", [IntrWriteMem]>; def int_hexagon_V6_vscattermhq : -Hexagon_custom__v64i1i32i32v16i32v16i32_Intrinsic<[IntrWriteMem]>; +Hexagon__v64i1i32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vscattermhq", [IntrWriteMem]>; def int_hexagon_V6_vscattermhq_128B : -Hexagon_custom__v128i1i32i32v32i32v32i32_Intrinsic_128B<[IntrWriteMem]>; +Hexagon__v128i1i32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vscattermhq_128B", [IntrWriteMem]>; def int_hexagon_V6_vscattermhw : Hexagon__i32i32v32i32v16i32_Intrinsic<"HEXAGON_V6_vscattermhw", [IntrWriteMem]>; @@ -6099,10 +6064,10 @@ def int_hexagon_V6_vscattermhw_add_128B : Hexagon__i32i32v64i32v32i32_Intrinsic<"HEXAGON_V6_vscattermhw_add_128B", [IntrWriteMem]>; def int_hexagon_V6_vscattermhwq : -Hexagon_custom__v64i1i32i32v32i32v16i32_Intrinsic<[IntrWriteMem]>; +Hexagon__v64i1i32i32v32i32v16i32_Intrinsic<"HEXAGON_V6_vscattermhwq", [IntrWriteMem]>; def int_hexagon_V6_vscattermhwq_128B : -Hexagon_custom__v128i1i32i32v64i32v32i32_Intrinsic_128B<[IntrWriteMem]>; +Hexagon__v128i1i32i32v64i32v32i32_Intrinsic<"HEXAGON_V6_vscattermhwq_128B", [IntrWriteMem]>; def int_hexagon_V6_vscattermw : Hexagon__i32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vscattermw", [IntrWriteMem]>; @@ -6117,18 +6082,18 @@ def int_hexagon_V6_vscattermw_add_128B : Hexagon__i32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vscattermw_add_128B", [IntrWriteMem]>; def int_hexagon_V6_vscattermwq : -Hexagon_custom__v64i1i32i32v16i32v16i32_Intrinsic<[IntrWriteMem]>; +Hexagon__v64i1i32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vscattermwq", [IntrWriteMem]>; def int_hexagon_V6_vscattermwq_128B : -Hexagon_custom__v128i1i32i32v32i32v32i32_Intrinsic_128B<[IntrWriteMem]>; +Hexagon__v128i1i32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vscattermwq_128B", [IntrWriteMem]>; // V66 HVX Instructions. def int_hexagon_V6_vaddcarrysat : -Hexagon_custom_v16i32_v16i32v16i32v64i1_Intrinsic; +Hexagon_v16i32_v16i32v16i32v64i1_Intrinsic<"HEXAGON_V6_vaddcarrysat">; def int_hexagon_V6_vaddcarrysat_128B : -Hexagon_custom_v32i32_v32i32v32i32v128i1_Intrinsic_128B; +Hexagon_v32i32_v32i32v32i32v128i1_Intrinsic<"HEXAGON_V6_vaddcarrysat_128B">; def int_hexagon_V6_vasr_into : Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vasr_into">; From e5639b3fa45f68c53e8e676749b0f1aacd16b41d Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Wed, 22 Dec 2021 00:19:53 +0000 Subject: [PATCH 078/293] Fix more clang-tidy cleanups in mlir/ (NFC) --- mlir/lib/Analysis/DataFlowAnalysis.cpp | 4 ++-- mlir/lib/Analysis/Liveness.cpp | 4 ++-- mlir/lib/Analysis/LoopAnalysis.cpp | 3 +-- mlir/lib/Analysis/Utils.cpp | 19 +++++++++---------- mlir/lib/Bindings/Python/IRAffine.cpp | 2 +- mlir/lib/Bindings/Python/IRAttributes.cpp | 12 ++++++++---- mlir/lib/Bindings/Python/IRInterfaces.cpp | 3 +-- mlir/lib/Bindings/Python/IRModule.cpp | 2 +- mlir/lib/CAPI/IR/Diagnostics.cpp | 2 +- .../Conversion/PDLToPDLInterp/Predicate.cpp | 2 +- mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp | 2 +- .../Affine/Transforms/AffineParallelize.cpp | 2 +- .../Dialect/Affine/Transforms/LoopUnroll.cpp | 4 ++-- .../Affine/Transforms/SuperVectorize.cpp | 4 ++-- .../Transforms/ComprehensiveBufferizePass.cpp | 2 +- .../SparseTensor/IR/SparseTensorDialect.cpp | 4 ++-- .../Tosa/Transforms/TosaOptimization.cpp | 2 +- mlir/lib/Dialect/Vector/VectorOps.cpp | 2 +- mlir/lib/ExecutionEngine/AsyncRuntime.cpp | 8 ++++---- mlir/lib/ExecutionEngine/CRunnerUtils.cpp | 2 +- .../lib/ExecutionEngine/SparseTensorUtils.cpp | 8 ++++---- mlir/lib/IR/AsmPrinter.cpp | 14 +++++++------- mlir/lib/IR/Builders.cpp | 2 +- mlir/lib/IR/Diagnostics.cpp | 6 +++--- mlir/lib/IR/Dialect.cpp | 6 +++--- mlir/lib/IR/MLIRContext.cpp | 2 +- mlir/lib/Parser/AsmParserState.cpp | 2 +- mlir/lib/Parser/AttributeParser.cpp | 2 +- mlir/lib/Parser/DialectSymbolParser.cpp | 2 +- mlir/lib/Parser/Parser.cpp | 3 +-- mlir/lib/Pass/IRPrinting.cpp | 2 +- mlir/lib/Pass/Pass.cpp | 14 +++++++------- mlir/lib/Pass/PassCrashRecovery.cpp | 2 +- mlir/lib/Pass/PassRegistry.cpp | 8 ++++---- mlir/lib/Pass/PassStatistics.cpp | 2 +- mlir/lib/Reducer/ReductionNode.cpp | 2 +- mlir/lib/Reducer/Tester.cpp | 2 +- mlir/lib/Rewrite/FrozenRewritePatternSet.cpp | 2 +- mlir/lib/Rewrite/PatternApplicator.cpp | 2 +- mlir/lib/Support/StorageUniquer.cpp | 2 +- mlir/lib/Support/Timing.cpp | 2 +- mlir/lib/TableGen/Attribute.cpp | 2 +- mlir/lib/TableGen/Pass.cpp | 4 ++-- mlir/lib/TableGen/Pattern.cpp | 2 +- mlir/lib/Target/LLVMIR/TypeFromLLVM.cpp | 2 +- mlir/lib/Target/LLVMIR/TypeToLLVM.cpp | 2 +- mlir/lib/Tools/mlir-lsp-server/LSPServer.cpp | 2 +- mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp | 2 +- mlir/lib/Transforms/LoopFusion.cpp | 2 +- .../Transforms/Utils/DialectConversion.cpp | 2 +- mlir/lib/Transforms/Utils/LoopUtils.cpp | 12 +++++------- mlir/test/lib/Analysis/TestAliasAnalysis.cpp | 4 ++-- .../Analysis/TestMemRefDependenceCheck.cpp | 14 +++++++------- mlir/test/lib/IR/TestDiagnostics.cpp | 2 +- mlir/test/lib/IR/TestOpaqueLoc.cpp | 4 ++-- mlir/test/lib/Pass/TestDynamicPipeline.cpp | 2 +- mlir/test/lib/Transforms/TestLoopMapping.cpp | 2 +- .../tools/mlir-tblgen/LLVMIRConversionGen.cpp | 2 +- mlir/tools/mlir-tblgen/PassGen.cpp | 4 ++-- mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp | 2 +- mlir/unittests/Pass/PassManagerTest.cpp | 4 +++- 61 files changed, 122 insertions(+), 122 deletions(-) diff --git a/mlir/lib/Analysis/DataFlowAnalysis.cpp b/mlir/lib/Analysis/DataFlowAnalysis.cpp index c6642112e8cb..ca17e953e990 100644 --- a/mlir/lib/Analysis/DataFlowAnalysis.cpp +++ b/mlir/lib/Analysis/DataFlowAnalysis.cpp @@ -756,13 +756,13 @@ void ForwardDataFlowSolver::join(Operation *owner, AbstractLatticeElement &to, // AbstractLatticeElement //===----------------------------------------------------------------------===// -AbstractLatticeElement::~AbstractLatticeElement() {} +AbstractLatticeElement::~AbstractLatticeElement() = default; //===----------------------------------------------------------------------===// // ForwardDataFlowAnalysisBase //===----------------------------------------------------------------------===// -ForwardDataFlowAnalysisBase::~ForwardDataFlowAnalysisBase() {} +ForwardDataFlowAnalysisBase::~ForwardDataFlowAnalysisBase() = default; AbstractLatticeElement & ForwardDataFlowAnalysisBase::getLatticeElement(Value value) { diff --git a/mlir/lib/Analysis/Liveness.cpp b/mlir/lib/Analysis/Liveness.cpp index 39061679e295..64ff321cb1e6 100644 --- a/mlir/lib/Analysis/Liveness.cpp +++ b/mlir/lib/Analysis/Liveness.cpp @@ -27,7 +27,7 @@ struct BlockInfoBuilder { using ValueSetT = Liveness::ValueSetT; /// Constructs an empty block builder. - BlockInfoBuilder() : block(nullptr) {} + BlockInfoBuilder() {} /// Fills the block builder with initial liveness information. BlockInfoBuilder(Block *block) : block(block) { @@ -104,7 +104,7 @@ struct BlockInfoBuilder { } /// The current block. - Block *block; + Block *block{nullptr}; /// The set of all live in values. ValueSetT inValues; diff --git a/mlir/lib/Analysis/LoopAnalysis.cpp b/mlir/lib/Analysis/LoopAnalysis.cpp index 478b212c3b52..e6c9863887b3 100644 --- a/mlir/lib/Analysis/LoopAnalysis.cpp +++ b/mlir/lib/Analysis/LoopAnalysis.cpp @@ -182,8 +182,7 @@ static bool isAccessIndexInvariant(Value iv, Value index) { DenseSet mlir::getInvariantAccesses(Value iv, ArrayRef indices) { DenseSet res; - for (unsigned idx = 0, n = indices.size(); idx < n; ++idx) { - auto val = indices[idx]; + for (auto val : indices) { if (isAccessIndexInvariant(iv, val)) { res.insert(val); } diff --git a/mlir/lib/Analysis/Utils.cpp b/mlir/lib/Analysis/Utils.cpp index 097828e077e7..a9697ca83a5a 100644 --- a/mlir/lib/Analysis/Utils.cpp +++ b/mlir/lib/Analysis/Utils.cpp @@ -817,15 +817,15 @@ mlir::computeSliceUnion(ArrayRef opsA, ArrayRef opsB, FlatAffineValueConstraints sliceUnionCst; assert(sliceUnionCst.getNumDimAndSymbolIds() == 0); std::vector> dependentOpPairs; - for (unsigned i = 0, numOpsA = opsA.size(); i < numOpsA; ++i) { - MemRefAccess srcAccess(opsA[i]); - for (unsigned j = 0, numOpsB = opsB.size(); j < numOpsB; ++j) { - MemRefAccess dstAccess(opsB[j]); + for (auto i : opsA) { + MemRefAccess srcAccess(i); + for (auto j : opsB) { + MemRefAccess dstAccess(j); if (srcAccess.memref != dstAccess.memref) continue; // Check if 'loopDepth' exceeds nesting depth of src/dst ops. - if ((!isBackwardSlice && loopDepth > getNestingDepth(opsA[i])) || - (isBackwardSlice && loopDepth > getNestingDepth(opsB[j]))) { + if ((!isBackwardSlice && loopDepth > getNestingDepth(i)) || + (isBackwardSlice && loopDepth > getNestingDepth(j))) { LLVM_DEBUG(llvm::dbgs() << "Invalid loop depth\n"); return SliceComputationResult::GenericFailure; } @@ -844,13 +844,12 @@ mlir::computeSliceUnion(ArrayRef opsA, ArrayRef opsB, } if (result.value == DependenceResult::NoDependence) continue; - dependentOpPairs.push_back({opsA[i], opsB[j]}); + dependentOpPairs.emplace_back(i, j); // Compute slice bounds for 'srcAccess' and 'dstAccess'. ComputationSliceState tmpSliceState; - mlir::getComputationSliceState(opsA[i], opsB[j], &dependenceConstraints, - loopDepth, isBackwardSlice, - &tmpSliceState); + mlir::getComputationSliceState(i, j, &dependenceConstraints, loopDepth, + isBackwardSlice, &tmpSliceState); if (sliceUnionCst.getNumDimAndSymbolIds() == 0) { // Initialize 'sliceUnionCst' with the bounds computed in previous step. diff --git a/mlir/lib/Bindings/Python/IRAffine.cpp b/mlir/lib/Bindings/Python/IRAffine.cpp index faf01e5c5f11..c7cdc8243479 100644 --- a/mlir/lib/Bindings/Python/IRAffine.cpp +++ b/mlir/lib/Bindings/Python/IRAffine.cpp @@ -676,7 +676,7 @@ void mlir::python::populateIRAffine(py::module &m) { std::vector res; res.reserve(compressed.size()); for (auto m : compressed) - res.push_back(PyAffineMap(context->getRef(), m)); + res.emplace_back(context->getRef(), m); return res; }) .def_property_readonly( diff --git a/mlir/lib/Bindings/Python/IRAttributes.cpp b/mlir/lib/Bindings/Python/IRAttributes.cpp index f1206617d069..56d16b337c07 100644 --- a/mlir/lib/Bindings/Python/IRAttributes.cpp +++ b/mlir/lib/Bindings/Python/IRAttributes.cpp @@ -524,7 +524,8 @@ class PyDenseElementsAttribute mlirIntegerTypeIsSigned(elementType)) { // i32 return bufferInfo(shapedType); - } else if (mlirIntegerTypeIsUnsigned(elementType)) { + } + if (mlirIntegerTypeIsUnsigned(elementType)) { // unsigned i32 return bufferInfo(shapedType); } @@ -534,7 +535,8 @@ class PyDenseElementsAttribute mlirIntegerTypeIsSigned(elementType)) { // i64 return bufferInfo(shapedType); - } else if (mlirIntegerTypeIsUnsigned(elementType)) { + } + if (mlirIntegerTypeIsUnsigned(elementType)) { // unsigned i64 return bufferInfo(shapedType); } @@ -544,7 +546,8 @@ class PyDenseElementsAttribute mlirIntegerTypeIsSigned(elementType)) { // i8 return bufferInfo(shapedType); - } else if (mlirIntegerTypeIsUnsigned(elementType)) { + } + if (mlirIntegerTypeIsUnsigned(elementType)) { // unsigned i8 return bufferInfo(shapedType); } @@ -554,7 +557,8 @@ class PyDenseElementsAttribute mlirIntegerTypeIsSigned(elementType)) { // i16 return bufferInfo(shapedType); - } else if (mlirIntegerTypeIsUnsigned(elementType)) { + } + if (mlirIntegerTypeIsUnsigned(elementType)) { // unsigned i16 return bufferInfo(shapedType); } diff --git a/mlir/lib/Bindings/Python/IRInterfaces.cpp b/mlir/lib/Bindings/Python/IRInterfaces.cpp index c3d41c4d84d7..564f36b9dac3 100644 --- a/mlir/lib/Bindings/Python/IRInterfaces.cpp +++ b/mlir/lib/Bindings/Python/IRInterfaces.cpp @@ -175,8 +175,7 @@ class PyInferTypeOpInterface auto *data = static_cast(userData); data->inferredTypes.reserve(data->inferredTypes.size() + nTypes); for (intptr_t i = 0; i < nTypes; ++i) { - data->inferredTypes.push_back( - PyType(data->pyMlirContext.getRef(), types[i])); + data->inferredTypes.emplace_back(data->pyMlirContext.getRef(), types[i]); } } diff --git a/mlir/lib/Bindings/Python/IRModule.cpp b/mlir/lib/Bindings/Python/IRModule.cpp index 7008e54bd046..633ffe4e111b 100644 --- a/mlir/lib/Bindings/Python/IRModule.cpp +++ b/mlir/lib/Bindings/Python/IRModule.cpp @@ -29,7 +29,7 @@ PyGlobals::PyGlobals() { instance = this; // The default search path include {mlir.}dialects, where {mlir.} is the // package prefix configured at compile time. - dialectSearchPrefixes.push_back(MAKE_MLIR_PYTHON_QUALNAME("dialects")); + dialectSearchPrefixes.emplace_back(MAKE_MLIR_PYTHON_QUALNAME("dialects")); } PyGlobals::~PyGlobals() { instance = nullptr; } diff --git a/mlir/lib/CAPI/IR/Diagnostics.cpp b/mlir/lib/CAPI/IR/Diagnostics.cpp index 2ed05a5a06e6..40639c7ba31b 100644 --- a/mlir/lib/CAPI/IR/Diagnostics.cpp +++ b/mlir/lib/CAPI/IR/Diagnostics.cpp @@ -57,7 +57,7 @@ MlirDiagnosticHandlerID mlirContextAttachDiagnosticHandler( MlirContext context, MlirDiagnosticHandler handler, void *userData, void (*deleteUserData)(void *)) { assert(handler && "unexpected null diagnostic handler"); - if (deleteUserData == NULL) + if (deleteUserData == nullptr) deleteUserData = deleteUserDataNoop; std::shared_ptr sharedUserData(userData, deleteUserData); DiagnosticEngine::HandlerID id = diff --git a/mlir/lib/Conversion/PDLToPDLInterp/Predicate.cpp b/mlir/lib/Conversion/PDLToPDLInterp/Predicate.cpp index 8d6d8776bde3..07fa5c77c13f 100644 --- a/mlir/lib/Conversion/PDLToPDLInterp/Predicate.cpp +++ b/mlir/lib/Conversion/PDLToPDLInterp/Predicate.cpp @@ -15,7 +15,7 @@ using namespace mlir::pdl_to_pdl_interp; // Positions //===----------------------------------------------------------------------===// -Position::~Position() {} +Position::~Position() = default; /// Returns the depth of the first ancestor operation position. unsigned Position::getOperationDepth() const { diff --git a/mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp b/mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp index a17eb411acc8..7e2dfaf4db21 100644 --- a/mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp +++ b/mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp @@ -99,4 +99,4 @@ ArrayRef AffineValueMap::getOperands() const { AffineMap AffineValueMap::getAffineMap() const { return map.getAffineMap(); } -AffineValueMap::~AffineValueMap() {} +AffineValueMap::~AffineValueMap() = default; diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp index 237094d40006..add9a802ad78 100644 --- a/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp @@ -56,7 +56,7 @@ void AffineParallelize::runOnFunction() { f.walk([&](AffineForOp loop) { SmallVector reductions; if (isLoopParallel(loop, parallelReductions ? &reductions : nullptr)) - parallelizableLoops.push_back({loop, std::move(reductions)}); + parallelizableLoops.emplace_back(loop, std::move(reductions)); }); for (const ParallelizationCandidate &candidate : parallelizableLoops) { diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp index f876153834bb..5063d8c66977 100644 --- a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp @@ -41,8 +41,8 @@ struct LoopUnroll : public AffineLoopUnrollBase { LoopUnroll() : getUnrollFactor(nullptr) {} LoopUnroll(const LoopUnroll &other) - : AffineLoopUnrollBase(other), - getUnrollFactor(other.getUnrollFactor) {} + + = default; explicit LoopUnroll( Optional unrollFactor = None, bool unrollUpToFactor = false, bool unrollFull = false, diff --git a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp index 0e41d02d3c27..9d59b89ea1a2 100644 --- a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp @@ -1494,7 +1494,7 @@ getMatchedAffineLoopsRec(NestedMatch match, unsigned currentLevel, // Add a new empty level to the output if it doesn't exist already. assert(currentLevel <= loops.size() && "Unexpected currentLevel"); if (currentLevel == loops.size()) - loops.push_back(SmallVector()); + loops.emplace_back(); // Add current match and recursively visit its children. loops[currentLevel].push_back(cast(match.getMatchedOperation())); @@ -1631,7 +1631,7 @@ static void computeIntersectionBuckets( // it. if (!intersects) { bucketRoots.push_back(matchRoot); - intersectionBuckets.push_back(SmallVector()); + intersectionBuckets.emplace_back(); intersectionBuckets.back().push_back(match); } } diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp index 5255bd2d7b00..61a9f1b0cdb3 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp @@ -33,7 +33,7 @@ namespace { struct LinalgComprehensiveModuleBufferize : public LinalgComprehensiveModuleBufferizeBase< LinalgComprehensiveModuleBufferize> { - LinalgComprehensiveModuleBufferize() {} + LinalgComprehensiveModuleBufferize() = default; LinalgComprehensiveModuleBufferize( const LinalgComprehensiveModuleBufferize &p) {} diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 4858ffd657eb..024d380454d3 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -61,8 +61,8 @@ Attribute SparseTensorEncodingAttr::parse(AsmParser &parser, Type type) { "expected an array for dimension level types"); return {}; } - for (unsigned i = 0, e = arrayAttr.size(); i < e; i++) { - auto strAttr = arrayAttr[i].dyn_cast(); + for (auto i : arrayAttr) { + auto strAttr = i.dyn_cast(); if (!strAttr) { parser.emitError(parser.getNameLoc(), "expected a string value in dimension level types"); diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaOptimization.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaOptimization.cpp index e0ed33bbe692..9a19b63ed198 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaOptimization.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaOptimization.cpp @@ -215,7 +215,7 @@ struct DepthwiseConv2DIsMul : public OpRewritePattern { class TosaOptimization : public PassWrapper { public: - explicit TosaOptimization() {} + explicit TosaOptimization() = default; void runOnFunction() override; StringRef getArgument() const final { return PASS_NAME; } diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index 7b6534ff1faa..8c724f8ef0be 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -702,7 +702,7 @@ getDimMap(ArrayRef indexingMaps, ArrayAttr iteratorTypes, int64_t lhsDim = getResultIndex(indexingMaps[0], targetExpr); int64_t rhsDim = getResultIndex(indexingMaps[1], targetExpr); if (lhsDim >= 0 && rhsDim >= 0) - dimMap.push_back({lhsDim, rhsDim}); + dimMap.emplace_back(lhsDim, rhsDim); } return dimMap; } diff --git a/mlir/lib/ExecutionEngine/AsyncRuntime.cpp b/mlir/lib/ExecutionEngine/AsyncRuntime.cpp index 1cf593bd4a4b..03243700e9ec 100644 --- a/mlir/lib/ExecutionEngine/AsyncRuntime.cpp +++ b/mlir/lib/ExecutionEngine/AsyncRuntime.cpp @@ -289,7 +289,7 @@ extern "C" int64_t mlirAsyncRuntimeAddTokenToGroup(AsyncToken *token, // then, and re-ackquire the lock. group->addRef(); - token->awaiters.push_back([group, onTokenReady]() { + token->awaiters.emplace_back([group, onTokenReady]() { // Make sure that `dropRef` does not destroy the mutex owned by the lock. { std::unique_lock lockGroup(group->mu); @@ -408,7 +408,7 @@ extern "C" void mlirAsyncRuntimeAwaitTokenAndExecute(AsyncToken *token, lock.unlock(); execute(); } else { - token->awaiters.push_back([execute]() { execute(); }); + token->awaiters.emplace_back([execute]() { execute(); }); } } @@ -421,7 +421,7 @@ extern "C" void mlirAsyncRuntimeAwaitValueAndExecute(AsyncValue *value, lock.unlock(); execute(); } else { - value->awaiters.push_back([execute]() { execute(); }); + value->awaiters.emplace_back([execute]() { execute(); }); } } @@ -434,7 +434,7 @@ extern "C" void mlirAsyncRuntimeAwaitAllInGroupAndExecute(AsyncGroup *group, lock.unlock(); execute(); } else { - group->awaiters.push_back([execute]() { execute(); }); + group->awaiters.emplace_back([execute]() { execute(); }); } } diff --git a/mlir/lib/ExecutionEngine/CRunnerUtils.cpp b/mlir/lib/ExecutionEngine/CRunnerUtils.cpp index 889d8c76964d..da284294df37 100644 --- a/mlir/lib/ExecutionEngine/CRunnerUtils.cpp +++ b/mlir/lib/ExecutionEngine/CRunnerUtils.cpp @@ -109,7 +109,7 @@ extern "C" void print_flops(double flops) { extern "C" double rtclock() { #ifndef _WIN32 struct timeval tp; - int stat = gettimeofday(&tp, NULL); + int stat = gettimeofday(&tp, nullptr); if (stat != 0) fprintf(stderr, "Error returning time from gettimeofday: %d\n", stat); return (tp.tv_sec + tp.tv_usec * 1.0e-6); diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp index 99d467e34e2c..5cc40665a8af 100644 --- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp +++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp @@ -217,7 +217,7 @@ class SparseTensorStorageBase { /// Finishes insertion. virtual void endInsert() = 0; - virtual ~SparseTensorStorageBase() {} + virtual ~SparseTensorStorageBase() = default; private: void fatal(const char *tp) { @@ -277,7 +277,7 @@ class SparseTensorStorage : public SparseTensorStorageBase { } } - virtual ~SparseTensorStorage() {} + virtual ~SparseTensorStorage() = default; /// Get the rank of the tensor. uint64_t getRank() const { return sizes.size(); } @@ -574,7 +574,7 @@ static void readMMEHeader(FILE *file, char *filename, char *line, exit(1); } // Skip comments. - while (1) { + while (true) { if (!fgets(line, kColWidth, file)) { fprintf(stderr, "Cannot find data in %s\n", filename); exit(1); @@ -598,7 +598,7 @@ static void readMMEHeader(FILE *file, char *filename, char *line, static void readExtFROSTTHeader(FILE *file, char *filename, char *line, uint64_t *idata) { // Skip comments. - while (1) { + while (true) { if (!fgets(line, kColWidth, file)) { fprintf(stderr, "Cannot find data in %s\n", filename); exit(1); diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp index 0b281069823c..376e5c16fe6c 100644 --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -54,9 +54,9 @@ void OperationName::dump() const { print(llvm::errs()); } // AsmParser //===--------------------------------------------------------------------===// -AsmParser::~AsmParser() {} -DialectAsmParser::~DialectAsmParser() {} -OpAsmParser::~OpAsmParser() {} +AsmParser::~AsmParser() = default; +DialectAsmParser::~DialectAsmParser() = default; +OpAsmParser::~OpAsmParser() = default; MLIRContext *AsmParser::getContext() const { return getBuilder().getContext(); } @@ -64,13 +64,13 @@ MLIRContext *AsmParser::getContext() const { return getBuilder().getContext(); } // DialectAsmPrinter //===----------------------------------------------------------------------===// -DialectAsmPrinter::~DialectAsmPrinter() {} +DialectAsmPrinter::~DialectAsmPrinter() = default; //===----------------------------------------------------------------------===// // OpAsmPrinter //===----------------------------------------------------------------------===// -OpAsmPrinter::~OpAsmPrinter() {} +OpAsmPrinter::~OpAsmPrinter() = default; void OpAsmPrinter::printFunctionalType(Operation *op) { auto &os = getStream(); @@ -1221,7 +1221,7 @@ class AsmStateImpl { AsmState::AsmState(Operation *op, const OpPrintingFlags &printerFlags, LocationMap *locationMap) : impl(std::make_unique(op, printerFlags, locationMap)) {} -AsmState::~AsmState() {} +AsmState::~AsmState() = default; //===----------------------------------------------------------------------===// // AsmPrinter::Impl @@ -2116,7 +2116,7 @@ void AsmPrinter::Impl::printDialectType(Type type) { // AsmPrinter //===--------------------------------------------------------------------===// -AsmPrinter::~AsmPrinter() {} +AsmPrinter::~AsmPrinter() = default; raw_ostream &AsmPrinter::getStream() const { assert(impl && "expected AsmPrinter::getStream to be overriden"); diff --git a/mlir/lib/IR/Builders.cpp b/mlir/lib/IR/Builders.cpp index 9b91054104dd..3d3e835f4abf 100644 --- a/mlir/lib/IR/Builders.cpp +++ b/mlir/lib/IR/Builders.cpp @@ -342,7 +342,7 @@ AffineMap Builder::getShiftedAffineMap(AffineMap map, int64_t shift) { // OpBuilder //===----------------------------------------------------------------------===// -OpBuilder::Listener::~Listener() {} +OpBuilder::Listener::~Listener() = default; /// Insert the given operation at the current insertion point and return it. Operation *OpBuilder::insert(Operation *op) { diff --git a/mlir/lib/IR/Diagnostics.cpp b/mlir/lib/IR/Diagnostics.cpp index fc252585dd21..327264a046c8 100644 --- a/mlir/lib/IR/Diagnostics.cpp +++ b/mlir/lib/IR/Diagnostics.cpp @@ -257,7 +257,7 @@ void DiagnosticEngineImpl::emit(Diagnostic diag) { //===----------------------------------------------------------------------===// DiagnosticEngine::DiagnosticEngine() : impl(new DiagnosticEngineImpl()) {} -DiagnosticEngine::~DiagnosticEngine() {} +DiagnosticEngine::~DiagnosticEngine() = default; /// Register a new handler for diagnostics to the engine. This function returns /// a unique identifier for the registered handler, which can be used to @@ -434,7 +434,7 @@ SourceMgrDiagnosticHandler::SourceMgrDiagnosticHandler( : SourceMgrDiagnosticHandler(mgr, ctx, llvm::errs(), std::move(shouldShowLocFn)) {} -SourceMgrDiagnosticHandler::~SourceMgrDiagnosticHandler() {} +SourceMgrDiagnosticHandler::~SourceMgrDiagnosticHandler() = default; void SourceMgrDiagnosticHandler::emitDiagnostic(Location loc, Twine message, DiagnosticSeverity kind, @@ -952,7 +952,7 @@ struct ParallelDiagnosticHandlerImpl : public llvm::PrettyStackTraceEntry { ParallelDiagnosticHandler::ParallelDiagnosticHandler(MLIRContext *ctx) : impl(new ParallelDiagnosticHandlerImpl(ctx)) {} -ParallelDiagnosticHandler::~ParallelDiagnosticHandler() {} +ParallelDiagnosticHandler::~ParallelDiagnosticHandler() = default; /// Set the order id for the current thread. void ParallelDiagnosticHandler::setOrderIDForThread(size_t orderID) { diff --git a/mlir/lib/IR/Dialect.cpp b/mlir/lib/IR/Dialect.cpp index 631dc410632a..6d1d48edbefd 100644 --- a/mlir/lib/IR/Dialect.cpp +++ b/mlir/lib/IR/Dialect.cpp @@ -124,7 +124,7 @@ Dialect::Dialect(StringRef name, MLIRContext *context, TypeID id) assert(isValidNamespace(name) && "invalid dialect namespace"); } -Dialect::~Dialect() {} +Dialect::~Dialect() = default; /// Verify an attribute from this dialect on the argument at 'argIndex' for /// the region at 'regionIndex' on the given operation. Returns failure if @@ -196,7 +196,7 @@ void Dialect::addInterface(std::unique_ptr interface) { // Dialect Interface //===----------------------------------------------------------------------===// -DialectInterface::~DialectInterface() {} +DialectInterface::~DialectInterface() = default; DialectInterfaceCollectionBase::DialectInterfaceCollectionBase( MLIRContext *ctx, TypeID interfaceKind) { @@ -208,7 +208,7 @@ DialectInterfaceCollectionBase::DialectInterfaceCollectionBase( } } -DialectInterfaceCollectionBase::~DialectInterfaceCollectionBase() {} +DialectInterfaceCollectionBase::~DialectInterfaceCollectionBase() = default; /// Get the interface for the dialect of given operation, or null if one /// is not registered. diff --git a/mlir/lib/IR/MLIRContext.cpp b/mlir/lib/IR/MLIRContext.cpp index 666ce3013f98..96fe76eaac28 100644 --- a/mlir/lib/IR/MLIRContext.cpp +++ b/mlir/lib/IR/MLIRContext.cpp @@ -321,7 +321,7 @@ MLIRContext::MLIRContext(const DialectRegistry ®istry, Threading setting) impl->affineUniquer.registerParametricStorageType(); } -MLIRContext::~MLIRContext() {} +MLIRContext::~MLIRContext() = default; /// Copy the specified array of elements into memory managed by the provided /// bump pointer allocator. This assumes the elements are all PODs. diff --git a/mlir/lib/Parser/AsmParserState.cpp b/mlir/lib/Parser/AsmParserState.cpp index 95f64910541f..c5c0c9cb9a2c 100644 --- a/mlir/lib/Parser/AsmParserState.cpp +++ b/mlir/lib/Parser/AsmParserState.cpp @@ -91,7 +91,7 @@ void AsmParserState::Impl::resolveSymbolUses() { //===----------------------------------------------------------------------===// AsmParserState::AsmParserState() : impl(std::make_unique()) {} -AsmParserState::~AsmParserState() {} +AsmParserState::~AsmParserState() = default; AsmParserState &AsmParserState::operator=(AsmParserState &&other) { impl = std::move(other.impl); return *this; diff --git a/mlir/lib/Parser/AttributeParser.cpp b/mlir/lib/Parser/AttributeParser.cpp index 13da92c05ff7..881e5b6d0e6d 100644 --- a/mlir/lib/Parser/AttributeParser.cpp +++ b/mlir/lib/Parser/AttributeParser.cpp @@ -670,7 +670,7 @@ DenseElementsAttr TensorLiteralParser::getStringAttr(llvm::SMLoc loc, for (auto val : storage) { stringValues.push_back(val.second.getStringValue()); - stringRefValues.push_back(stringValues.back()); + stringRefValues.emplace_back(stringValues.back()); } return DenseStringElementsAttr::get(type, stringRefValues); diff --git a/mlir/lib/Parser/DialectSymbolParser.cpp b/mlir/lib/Parser/DialectSymbolParser.cpp index 36f583c60211..ded29d9d1f6b 100644 --- a/mlir/lib/Parser/DialectSymbolParser.cpp +++ b/mlir/lib/Parser/DialectSymbolParser.cpp @@ -32,7 +32,7 @@ class CustomDialectAsmParser : public AsmParserImpl { CustomDialectAsmParser(StringRef fullSpec, Parser &parser) : AsmParserImpl(parser.getToken().getLoc(), parser), fullSpec(fullSpec) {} - ~CustomDialectAsmParser() override {} + ~CustomDialectAsmParser() override = default; /// Returns the full specification of the symbol being parsed. This allows /// for using a separate parser if necessary. diff --git a/mlir/lib/Parser/Parser.cpp b/mlir/lib/Parser/Parser.cpp index 87d8f4bcf4de..da765b61243b 100644 --- a/mlir/lib/Parser/Parser.cpp +++ b/mlir/lib/Parser/Parser.cpp @@ -1233,8 +1233,7 @@ class CustomOpAsmParser : public AsmParserImpl { std::pair getResultName(unsigned resultNo) const override { // Scan for the resultID that contains this result number. - for (unsigned nameID = 0, e = resultIDs.size(); nameID != e; ++nameID) { - const auto &entry = resultIDs[nameID]; + for (const auto &entry : resultIDs) { if (resultNo < std::get<1>(entry)) { // Don't pass on the leading %. StringRef name = std::get<0>(entry).drop_front(); diff --git a/mlir/lib/Pass/IRPrinting.cpp b/mlir/lib/Pass/IRPrinting.cpp index 403d88d2d293..e3d22f2b1cb0 100644 --- a/mlir/lib/Pass/IRPrinting.cpp +++ b/mlir/lib/Pass/IRPrinting.cpp @@ -188,7 +188,7 @@ PassManager::IRPrinterConfig::IRPrinterConfig(bool printModuleScope, printAfterOnlyOnChange(printAfterOnlyOnChange), printAfterOnlyOnFailure(printAfterOnlyOnFailure), opPrintingFlags(opPrintingFlags) {} -PassManager::IRPrinterConfig::~IRPrinterConfig() {} +PassManager::IRPrinterConfig::~IRPrinterConfig() = default; /// A hook that may be overridden by a derived config that checks if the IR /// of 'operation' should be dumped *before* the pass 'pass' has been diff --git a/mlir/lib/Pass/Pass.cpp b/mlir/lib/Pass/Pass.cpp index 911753650d5e..fff008330c79 100644 --- a/mlir/lib/Pass/Pass.cpp +++ b/mlir/lib/Pass/Pass.cpp @@ -182,9 +182,9 @@ void OpPassManagerImpl::coalesceAdjacentAdaptorPasses() { // Walk the pass list and merge adjacent adaptors. OpToOpPassAdaptor *lastAdaptor = nullptr; - for (auto it = passes.begin(), e = passes.end(); it != e; ++it) { + for (auto &passe : passes) { // Check to see if this pass is an adaptor. - if (auto *currentAdaptor = dyn_cast(it->get())) { + if (auto *currentAdaptor = dyn_cast(passe.get())) { // If it is the first adaptor in a possible chain, remember it and // continue. if (!lastAdaptor) { @@ -194,7 +194,7 @@ void OpPassManagerImpl::coalesceAdjacentAdaptorPasses() { // Otherwise, merge into the existing adaptor and delete the current one. currentAdaptor->mergeInto(*lastAdaptor); - it->reset(); + passe.reset(); } else if (lastAdaptor) { // If this pass is not an adaptor, then coalesce and forget any existing // adaptor. @@ -236,7 +236,7 @@ OpPassManager &OpPassManager::operator=(const OpPassManager &rhs) { return *this; } -OpPassManager::~OpPassManager() {} +OpPassManager::~OpPassManager() = default; OpPassManager::pass_iterator OpPassManager::begin() { return MutableArrayRef>{impl->passes}.begin(); @@ -634,7 +634,7 @@ PassManager::PassManager(MLIRContext *ctx, Nesting nesting, initializationKey(DenseMapInfo::getTombstoneKey()), passTiming(false), verifyPasses(true) {} -PassManager::~PassManager() {} +PassManager::~PassManager() = default; void PassManager::enableVerifier(bool enabled) { verifyPasses = enabled; } @@ -771,7 +771,7 @@ void detail::NestedAnalysisMap::invalidate( // PassInstrumentation //===----------------------------------------------------------------------===// -PassInstrumentation::~PassInstrumentation() {} +PassInstrumentation::~PassInstrumentation() = default; //===----------------------------------------------------------------------===// // PassInstrumentor @@ -790,7 +790,7 @@ struct PassInstrumentorImpl { } // namespace mlir PassInstrumentor::PassInstrumentor() : impl(new PassInstrumentorImpl()) {} -PassInstrumentor::~PassInstrumentor() {} +PassInstrumentor::~PassInstrumentor() = default; /// See PassInstrumentation::runBeforePipeline for details. void PassInstrumentor::runBeforePipeline( diff --git a/mlir/lib/Pass/PassCrashRecovery.cpp b/mlir/lib/Pass/PassCrashRecovery.cpp index 46fec825d3d9..ea642ce44f5d 100644 --- a/mlir/lib/Pass/PassCrashRecovery.cpp +++ b/mlir/lib/Pass/PassCrashRecovery.cpp @@ -196,7 +196,7 @@ struct PassCrashReproducerGenerator::Impl { PassCrashReproducerGenerator::PassCrashReproducerGenerator( PassManager::ReproducerStreamFactory &streamFactory, bool localReproducer) : impl(std::make_unique(streamFactory, localReproducer)) {} -PassCrashReproducerGenerator::~PassCrashReproducerGenerator() {} +PassCrashReproducerGenerator::~PassCrashReproducerGenerator() = default; void PassCrashReproducerGenerator::initialize( iterator_range passes, Operation *op, diff --git a/mlir/lib/Pass/PassRegistry.cpp b/mlir/lib/Pass/PassRegistry.cpp index eae50918dbea..e86a315dc355 100644 --- a/mlir/lib/Pass/PassRegistry.cpp +++ b/mlir/lib/Pass/PassRegistry.cpp @@ -505,13 +505,13 @@ namespace { /// This struct represents the possible data entries in a parsed pass pipeline /// list. struct PassArgData { - PassArgData() : registryEntry(nullptr) {} + PassArgData() {} PassArgData(const PassRegistryEntry *registryEntry) : registryEntry(registryEntry) {} /// This field is used when the parsed option corresponds to a registered pass /// or pass pipeline. - const PassRegistryEntry *registryEntry; + const PassRegistryEntry *registryEntry{nullptr}; /// This field is set when instance specific pass options have been provided /// on the command line. @@ -694,7 +694,7 @@ struct PassPipelineCLParserImpl { PassPipelineCLParser::PassPipelineCLParser(StringRef arg, StringRef description) : impl(std::make_unique( arg, description, /*passNamesOnly=*/false)) {} -PassPipelineCLParser::~PassPipelineCLParser() {} +PassPipelineCLParser::~PassPipelineCLParser() = default; /// Returns true if this parser contains any valid options to add. bool PassPipelineCLParser::hasAnyOccurrences() const { @@ -737,7 +737,7 @@ PassNameCLParser::PassNameCLParser(StringRef arg, StringRef description) arg, description, /*passNamesOnly=*/true)) { impl->passList.setMiscFlag(llvm::cl::CommaSeparated); } -PassNameCLParser::~PassNameCLParser() {} +PassNameCLParser::~PassNameCLParser() = default; /// Returns true if this parser contains any valid options to add. bool PassNameCLParser::hasAnyOccurrences() const { diff --git a/mlir/lib/Pass/PassStatistics.cpp b/mlir/lib/Pass/PassStatistics.cpp index 8b1b020746c0..aa46adaf76e9 100644 --- a/mlir/lib/Pass/PassStatistics.cpp +++ b/mlir/lib/Pass/PassStatistics.cpp @@ -92,7 +92,7 @@ static void printResultsAsList(raw_ostream &os, OpPassManager &pm) { // Sort the statistics by pass name and then by record name. std::vector>> passAndStatistics; for (auto &passIt : mergedStats) - passAndStatistics.push_back({passIt.first(), std::move(passIt.second)}); + passAndStatistics.emplace_back(passIt.first(), std::move(passIt.second)); llvm::sort(passAndStatistics, [](const auto &lhs, const auto &rhs) { return lhs.first.compare(rhs.first) < 0; }); diff --git a/mlir/lib/Reducer/ReductionNode.cpp b/mlir/lib/Reducer/ReductionNode.cpp index a9d17431defe..05d4c594100b 100644 --- a/mlir/lib/Reducer/ReductionNode.cpp +++ b/mlir/lib/Reducer/ReductionNode.cpp @@ -111,7 +111,7 @@ void ReductionNode::update(std::pair result) { if (interesting == Tester::Interestingness::True) { // This module may has been updated. Reset the range. ranges.clear(); - ranges.push_back({0, std::distance(region->op_begin(), region->op_end())}); + ranges.emplace_back(0, std::distance(region->op_begin(), region->op_end())); } else { // Release the uninteresting module to save some memory. module.release()->erase(); diff --git a/mlir/lib/Reducer/Tester.cpp b/mlir/lib/Reducer/Tester.cpp index e5197418f3e5..367ba4f76b05 100644 --- a/mlir/lib/Reducer/Tester.cpp +++ b/mlir/lib/Reducer/Tester.cpp @@ -62,7 +62,7 @@ Tester::Interestingness Tester::isInteresting(StringRef testCase) const { testerArgs.push_back(testCase); for (const std::string &arg : testScriptArgs) - testerArgs.push_back(arg); + testerArgs.emplace_back(arg); testerArgs.push_back(testCase); diff --git a/mlir/lib/Rewrite/FrozenRewritePatternSet.cpp b/mlir/lib/Rewrite/FrozenRewritePatternSet.cpp index 20c71b51c5f8..7988150bcba5 100644 --- a/mlir/lib/Rewrite/FrozenRewritePatternSet.cpp +++ b/mlir/lib/Rewrite/FrozenRewritePatternSet.cpp @@ -133,4 +133,4 @@ FrozenRewritePatternSet::FrozenRewritePatternSet( pdlPatterns.takeRewriteFunctions()); } -FrozenRewritePatternSet::~FrozenRewritePatternSet() {} +FrozenRewritePatternSet::~FrozenRewritePatternSet() = default; diff --git a/mlir/lib/Rewrite/PatternApplicator.cpp b/mlir/lib/Rewrite/PatternApplicator.cpp index ae8beff061c8..d5a98fef09e7 100644 --- a/mlir/lib/Rewrite/PatternApplicator.cpp +++ b/mlir/lib/Rewrite/PatternApplicator.cpp @@ -28,7 +28,7 @@ PatternApplicator::PatternApplicator( bytecode->initializeMutableState(*mutableByteCodeState); } } -PatternApplicator::~PatternApplicator() {} +PatternApplicator::~PatternApplicator() = default; #ifndef NDEBUG /// Log a message for a pattern that is impossible to match. diff --git a/mlir/lib/Support/StorageUniquer.cpp b/mlir/lib/Support/StorageUniquer.cpp index 9291199fdce1..a33eb53d4d48 100644 --- a/mlir/lib/Support/StorageUniquer.cpp +++ b/mlir/lib/Support/StorageUniquer.cpp @@ -332,7 +332,7 @@ struct StorageUniquerImpl { } // namespace mlir StorageUniquer::StorageUniquer() : impl(new StorageUniquerImpl()) {} -StorageUniquer::~StorageUniquer() {} +StorageUniquer::~StorageUniquer() = default; /// Set the flag specifying if multi-threading is disabled within the uniquer. void StorageUniquer::disableMultithreading(bool disable) { diff --git a/mlir/lib/Support/Timing.cpp b/mlir/lib/Support/Timing.cpp index 63e07324c0b1..7277e4859582 100644 --- a/mlir/lib/Support/Timing.cpp +++ b/mlir/lib/Support/Timing.cpp @@ -60,7 +60,7 @@ class TimingManagerImpl { TimingManager::TimingManager() : impl(std::make_unique()) {} -TimingManager::~TimingManager() {} +TimingManager::~TimingManager() = default; /// Get the root timer of this timing manager. Timer TimingManager::getRootTimer() { diff --git a/mlir/lib/TableGen/Attribute.cpp b/mlir/lib/TableGen/Attribute.cpp index 630a9205fc6c..ac62220d85e4 100644 --- a/mlir/lib/TableGen/Attribute.cpp +++ b/mlir/lib/TableGen/Attribute.cpp @@ -219,7 +219,7 @@ std::vector EnumAttr::getAllCases() const { cases.reserve(inits->size()); for (const llvm::Init *init : *inits) { - cases.push_back(EnumAttrCase(cast(init))); + cases.emplace_back(cast(init)); } return cases; diff --git a/mlir/lib/TableGen/Pass.cpp b/mlir/lib/TableGen/Pass.cpp index f96180689c55..84b3f01d1255 100644 --- a/mlir/lib/TableGen/Pass.cpp +++ b/mlir/lib/TableGen/Pass.cpp @@ -66,9 +66,9 @@ StringRef PassStatistic::getDescription() const { Pass::Pass(const llvm::Record *def) : def(def) { for (auto *init : def->getValueAsListOfDefs("options")) - options.push_back(PassOption(init)); + options.emplace_back(init); for (auto *init : def->getValueAsListOfDefs("statistics")) - statistics.push_back(PassStatistic(init)); + statistics.emplace_back(init); for (StringRef dialect : def->getValueAsListOfStrings("dependentDialects")) dependentDialects.push_back(dialect); } diff --git a/mlir/lib/TableGen/Pattern.cpp b/mlir/lib/TableGen/Pattern.cpp index b459af368fc5..92cd68162093 100644 --- a/mlir/lib/TableGen/Pattern.cpp +++ b/mlir/lib/TableGen/Pattern.cpp @@ -663,7 +663,7 @@ std::vector Pattern::getConstraints() const { &def, "operands to additional constraints can only be symbol references"); } - entities.push_back(std::string(argName->getValue())); + entities.emplace_back(argName->getValue()); } ret.emplace_back(cast(dagInit->getOperator())->getDef(), diff --git a/mlir/lib/Target/LLVMIR/TypeFromLLVM.cpp b/mlir/lib/Target/LLVMIR/TypeFromLLVM.cpp index c987198f0c12..4efb5332cced 100644 --- a/mlir/lib/Target/LLVMIR/TypeFromLLVM.cpp +++ b/mlir/lib/Target/LLVMIR/TypeFromLLVM.cpp @@ -157,7 +157,7 @@ class TypeFromLLVMIRTranslatorImpl { LLVM::TypeFromLLVMIRTranslator::TypeFromLLVMIRTranslator(MLIRContext &context) : impl(new detail::TypeFromLLVMIRTranslatorImpl(context)) {} -LLVM::TypeFromLLVMIRTranslator::~TypeFromLLVMIRTranslator() {} +LLVM::TypeFromLLVMIRTranslator::~TypeFromLLVMIRTranslator() = default; Type LLVM::TypeFromLLVMIRTranslator::translateType(llvm::Type *type) { return impl->translateType(type); diff --git a/mlir/lib/Target/LLVMIR/TypeToLLVM.cpp b/mlir/lib/Target/LLVMIR/TypeToLLVM.cpp index 3d67984d7e41..2cd98691e0c3 100644 --- a/mlir/lib/Target/LLVMIR/TypeToLLVM.cpp +++ b/mlir/lib/Target/LLVMIR/TypeToLLVM.cpp @@ -180,7 +180,7 @@ class TypeToLLVMIRTranslatorImpl { LLVM::TypeToLLVMIRTranslator::TypeToLLVMIRTranslator(llvm::LLVMContext &context) : impl(new detail::TypeToLLVMIRTranslatorImpl(context)) {} -LLVM::TypeToLLVMIRTranslator::~TypeToLLVMIRTranslator() {} +LLVM::TypeToLLVMIRTranslator::~TypeToLLVMIRTranslator() = default; llvm::Type *LLVM::TypeToLLVMIRTranslator::translateType(Type type) { return impl->translateType(type); diff --git a/mlir/lib/Tools/mlir-lsp-server/LSPServer.cpp b/mlir/lib/Tools/mlir-lsp-server/LSPServer.cpp index 0d043ba6ccbe..93600465abcf 100644 --- a/mlir/lib/Tools/mlir-lsp-server/LSPServer.cpp +++ b/mlir/lib/Tools/mlir-lsp-server/LSPServer.cpp @@ -198,7 +198,7 @@ void LSPServer::Impl::onDocumentSymbol( LSPServer::LSPServer(MLIRServer &server, JSONTransport &transport) : impl(std::make_unique(server, transport)) {} -LSPServer::~LSPServer() {} +LSPServer::~LSPServer() = default; LogicalResult LSPServer::run() { MessageHandler messageHandler(impl->transport); diff --git a/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp b/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp index 88aa4626b42f..046368a3c429 100644 --- a/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp +++ b/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp @@ -878,7 +878,7 @@ struct lsp::MLIRServer::Impl { lsp::MLIRServer::MLIRServer(DialectRegistry ®istry) : impl(std::make_unique(registry)) {} -lsp::MLIRServer::~MLIRServer() {} +lsp::MLIRServer::~MLIRServer() = default; void lsp::MLIRServer::addOrUpdateDocument( const URIForFile &uri, StringRef contents, int64_t version, diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp index 801268efdddb..e6db0cf6927b 100644 --- a/mlir/lib/Transforms/LoopFusion.cpp +++ b/mlir/lib/Transforms/LoopFusion.cpp @@ -198,7 +198,7 @@ struct MemRefDependenceGraph { // The next unique identifier to use for newly created graph nodes. unsigned nextNodeId = 0; - MemRefDependenceGraph() {} + MemRefDependenceGraph() = default; // Initializes the dependence graph based on operations in 'f'. // Returns true on success, false otherwise. diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 97c6a4daaf82..a299b8c5b660 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -1492,7 +1492,7 @@ LogicalResult ConversionPatternRewriterImpl::notifyMatchFailure( ConversionPatternRewriter::ConversionPatternRewriter(MLIRContext *ctx) : PatternRewriter(ctx), impl(new detail::ConversionPatternRewriterImpl(*this)) {} -ConversionPatternRewriter::~ConversionPatternRewriter() {} +ConversionPatternRewriter::~ConversionPatternRewriter() = default; void ConversionPatternRewriter::replaceOpWithIf( Operation *op, ValueRange newValues, bool *allUsesReplaced, diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp index c0eb9cdfaf84..fbb79b5af3f8 100644 --- a/mlir/lib/Transforms/Utils/LoopUtils.cpp +++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp @@ -407,7 +407,7 @@ LogicalResult mlir::affineForOpBodySkew(AffineForOp forOp, lbShift = d * step; } // Augment the list of operations that get into the current open interval. - opGroupQueue.push_back({d, sortedOpGroups[d]}); + opGroupQueue.emplace_back(d, sortedOpGroups[d]); } // Those operations groups left in the queue now need to be processed (FIFO) @@ -1370,7 +1370,7 @@ struct JamBlockGatherer { while (it != e && !isa(&*it)) ++it; if (it != subBlockStart) - subBlocks.push_back({subBlockStart, std::prev(it)}); + subBlocks.emplace_back(subBlockStart, std::prev(it)); // Process all for ops that appear next. while (it != e && isa(&*it)) walk(&*it++); @@ -1608,8 +1608,7 @@ static bool checkLoopInterchangeDependences( // lexicographically negative. // Example 1: [-1, 1][0, 0] // Example 2: [0, 0][-1, 1] - for (unsigned i = 0, e = depCompsVec.size(); i < e; ++i) { - const SmallVector &depComps = depCompsVec[i]; + for (const auto &depComps : depCompsVec) { assert(depComps.size() >= maxLoopDepth); // Check if the first non-zero dependence component is positive. // This iterates through loops in the desired order. @@ -1748,8 +1747,7 @@ AffineForOp mlir::sinkSequentialLoops(AffineForOp forOp) { // Mark loops as either parallel or sequential. SmallVector isParallelLoop(maxLoopDepth, true); - for (unsigned i = 0, e = depCompsVec.size(); i < e; ++i) { - SmallVector &depComps = depCompsVec[i]; + for (auto &depComps : depCompsVec) { assert(depComps.size() >= maxLoopDepth); for (unsigned j = 0; j < maxLoopDepth; ++j) { DependenceComponent &depComp = depComps[j]; @@ -3181,7 +3179,7 @@ gatherLoopsInBlock(Block *block, unsigned currLoopDepth, // Add a new empty level to output if it doesn't exist level already. assert(currLoopDepth <= depthToLoops.size() && "Unexpected currLoopDepth"); if (currLoopDepth == depthToLoops.size()) - depthToLoops.push_back(SmallVector()); + depthToLoops.emplace_back(); for (auto &op : *block) { if (auto forOp = dyn_cast(op)) { diff --git a/mlir/test/lib/Analysis/TestAliasAnalysis.cpp b/mlir/test/lib/Analysis/TestAliasAnalysis.cpp index 081bc89e8747..01ab39503e9f 100644 --- a/mlir/test/lib/Analysis/TestAliasAnalysis.cpp +++ b/mlir/test/lib/Analysis/TestAliasAnalysis.cpp @@ -108,11 +108,11 @@ struct TestAliasAnalysisModRefPass }); // Check for aliasing behavior between each of the values. - for (auto it = valsToCheck.begin(), e = valsToCheck.end(); it != e; ++it) { + for (auto &it : valsToCheck) { getOperation()->walk([&](Operation *op) { if (!op->getAttr("test.ptr")) return; - printModRefResult(aliasAnalysis.getModRef(op, *it), op, *it); + printModRefResult(aliasAnalysis.getModRef(op, it), op, it); }); } } diff --git a/mlir/test/lib/Analysis/TestMemRefDependenceCheck.cpp b/mlir/test/lib/Analysis/TestMemRefDependenceCheck.cpp index d3e6788711d8..01fba3acfa7d 100644 --- a/mlir/test/lib/Analysis/TestMemRefDependenceCheck.cpp +++ b/mlir/test/lib/Analysis/TestMemRefDependenceCheck.cpp @@ -48,18 +48,18 @@ getDirectionVectorStr(bool ret, unsigned numCommonLoops, unsigned loopNestDepth, if (dependenceComponents.empty() || loopNestDepth > numCommonLoops) return "true"; std::string result; - for (unsigned i = 0, e = dependenceComponents.size(); i < e; ++i) { + for (const auto &dependenceComponent : dependenceComponents) { std::string lbStr = "-inf"; - if (dependenceComponents[i].lb.hasValue() && - dependenceComponents[i].lb.getValue() != + if (dependenceComponent.lb.hasValue() && + dependenceComponent.lb.getValue() != std::numeric_limits::min()) - lbStr = std::to_string(dependenceComponents[i].lb.getValue()); + lbStr = std::to_string(dependenceComponent.lb.getValue()); std::string ubStr = "+inf"; - if (dependenceComponents[i].ub.hasValue() && - dependenceComponents[i].ub.getValue() != + if (dependenceComponent.ub.hasValue() && + dependenceComponent.ub.getValue() != std::numeric_limits::max()) - ubStr = std::to_string(dependenceComponents[i].ub.getValue()); + ubStr = std::to_string(dependenceComponent.ub.getValue()); result += "[" + lbStr + ", " + ubStr + "]"; } diff --git a/mlir/test/lib/IR/TestDiagnostics.cpp b/mlir/test/lib/IR/TestDiagnostics.cpp index 8bff82a22e62..e6bb5f90f57e 100644 --- a/mlir/test/lib/IR/TestDiagnostics.cpp +++ b/mlir/test/lib/IR/TestDiagnostics.cpp @@ -23,7 +23,7 @@ struct TestDiagnosticFilterPass StringRef getDescription() const final { return "Test diagnostic filtering support."; } - TestDiagnosticFilterPass() {} + TestDiagnosticFilterPass() = default; TestDiagnosticFilterPass(const TestDiagnosticFilterPass &) {} void runOnOperation() override { diff --git a/mlir/test/lib/IR/TestOpaqueLoc.cpp b/mlir/test/lib/IR/TestOpaqueLoc.cpp index 92a5c5a41b2c..0b24126d062b 100644 --- a/mlir/test/lib/IR/TestOpaqueLoc.cpp +++ b/mlir/test/lib/IR/TestOpaqueLoc.cpp @@ -26,11 +26,11 @@ struct TestOpaqueLoc /// A simple structure which is used for testing as an underlying location in /// OpaqueLoc. struct MyLocation { - MyLocation() : id(42) {} + MyLocation() = default; MyLocation(int id) : id(id) {} int getId() { return id; } - int id; + int id{42}; }; void runOnOperation() override { diff --git a/mlir/test/lib/Pass/TestDynamicPipeline.cpp b/mlir/test/lib/Pass/TestDynamicPipeline.cpp index d10dd4a200fd..90b88202f7e5 100644 --- a/mlir/test/lib/Pass/TestDynamicPipeline.cpp +++ b/mlir/test/lib/Pass/TestDynamicPipeline.cpp @@ -32,7 +32,7 @@ class TestDynamicPipelinePass pm.getDependentDialects(registry); } - TestDynamicPipelinePass(){}; + TestDynamicPipelinePass() = default; TestDynamicPipelinePass(const TestDynamicPipelinePass &) {} void runOnOperation() override { diff --git a/mlir/test/lib/Transforms/TestLoopMapping.cpp b/mlir/test/lib/Transforms/TestLoopMapping.cpp index ebd739885236..721e697dd902 100644 --- a/mlir/test/lib/Transforms/TestLoopMapping.cpp +++ b/mlir/test/lib/Transforms/TestLoopMapping.cpp @@ -32,7 +32,7 @@ class TestLoopMappingPass StringRef getDescription() const final { return "test mapping a single loop on a virtual processor grid"; } - explicit TestLoopMappingPass() {} + explicit TestLoopMappingPass() = default; void getDependentDialects(DialectRegistry ®istry) const override { registry.insert(); diff --git a/mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp b/mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp index 1a92f4a162ee..e098e89da39d 100644 --- a/mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp +++ b/mlir/tools/mlir-tblgen/LLVMIRConversionGen.cpp @@ -205,7 +205,7 @@ class LLVMEnumAttr : public tblgen::EnumAttr { std::vector cases; for (auto &c : tblgen::EnumAttr::getAllCases()) - cases.push_back(LLVMEnumAttrCase(c)); + cases.emplace_back(c); return cases; } diff --git a/mlir/tools/mlir-tblgen/PassGen.cpp b/mlir/tools/mlir-tblgen/PassGen.cpp index 1edd4db551cd..7d98a600519f 100644 --- a/mlir/tools/mlir-tblgen/PassGen.cpp +++ b/mlir/tools/mlir-tblgen/PassGen.cpp @@ -94,7 +94,7 @@ static void emitPassOptionDecls(const Pass &pass, raw_ostream &os) { os.indent(2) << "::mlir::Pass::" << (opt.isListOption() ? "ListOption" : "Option"); - os << llvm::formatv("<{0}> {1}{{*this, \"{2}\", ::llvm::cl::desc(\"{3}\")", + os << llvm::formatv(R"(<{0}> {1}{{*this, "{2}", ::llvm::cl::desc("{3}"))", opt.getType(), opt.getCppVariableName(), opt.getArgument(), opt.getDescription()); if (Optional defaultVal = opt.getDefaultValue()) @@ -198,7 +198,7 @@ static void emitDecls(const llvm::RecordKeeper &recordKeeper, raw_ostream &os) { os << "/* Autogenerated by mlir-tblgen; don't manually edit */\n"; std::vector passes; for (const auto *def : recordKeeper.getAllDerivedDefinitions("PassBase")) - passes.push_back(Pass(def)); + passes.emplace_back(def); emitPassDecls(passes, os); emitRegistration(passes, os); diff --git a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp index 2a290bb86b27..ad0b05e4c47b 100644 --- a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp +++ b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp @@ -1103,7 +1103,7 @@ emitExtendedSetDeserializationDispatch(const RecordKeeper &recordKeeper, Operator op(def); auto setName = def->getValueAsString("extendedInstSetName"); if (!extensionSets.count(setName)) { - extensionSetNames.push_back(""); + extensionSetNames.emplace_back(""); extensionSets.try_emplace(setName, extensionSetNames.back()); auto &setos = extensionSets.find(setName)->second; setos << formatv(" if ({0} == \"{1}\") {{\n", extensionSetName, setName); diff --git a/mlir/unittests/Pass/PassManagerTest.cpp b/mlir/unittests/Pass/PassManagerTest.cpp index bd6ef0462a71..ff1cda2116ba 100644 --- a/mlir/unittests/Pass/PassManagerTest.cpp +++ b/mlir/unittests/Pass/PassManagerTest.cpp @@ -12,6 +12,8 @@ #include "mlir/Pass/Pass.h" #include "gtest/gtest.h" +#include + using namespace mlir; using namespace mlir::detail; @@ -105,7 +107,7 @@ TEST(PassManagerTest, InvalidPass) { // check it later. std::unique_ptr diagnostic; context.getDiagEngine().registerHandler([&](Diagnostic &diag) { - diagnostic.reset(new Diagnostic(std::move(diag))); + diagnostic = std::make_unique(std::move(diag)); }); // Instantiate and run our pass. From f99a8bcde8527184afc0348620deb2e2fa2f9938 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 22 Dec 2021 12:54:37 -0800 Subject: [PATCH 079/293] [NFC][mlgo]Rename a variable in TensorFlowCompile.cmake Remaining var that had 'inlining' in name, despite being general-purpose --- llvm/cmake/modules/TensorFlowCompile.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/cmake/modules/TensorFlowCompile.cmake b/llvm/cmake/modules/TensorFlowCompile.cmake index 6c79e2f49784..ea5fa56cbdba 100644 --- a/llvm/cmake/modules/TensorFlowCompile.cmake +++ b/llvm/cmake/modules/TensorFlowCompile.cmake @@ -89,8 +89,8 @@ endfunction() function(tf_find_and_compile model default_url default_path generation_config tag_set signature_def_key fname cpp_class) if ("${model}" STREQUAL "download") # Crash if the user wants to download a model but a URL is set to "TO_BE_UPDATED" - if ("${LLVM_INLINER_MODEL_CURRENT_URL}" STREQUAL "TO_BE_UPDATED") - message(FATAL_ERROR "LLVM_INLINER_MODEL_PATH was set to 'download' but there is no model url currently specified in cmake - likely, the model interface recently changed, and so there is not a released model available.") + if ("${default_url}" STREQUAL "TO_BE_UPDATED") + message(FATAL_ERROR "Default URL was set to 'download' but there is no model url currently specified in cmake - likely, the model interface recently changed, and so there is not a released model available.") endif() set(model ${default_url}) From 32e8b30d6e67352040ffef54fb0e4b3a69242a4a Mon Sep 17 00:00:00 2001 From: Mogball Date: Wed, 22 Dec 2021 21:06:01 +0000 Subject: [PATCH 080/293] [mlir] Add unit test for disabling canonicalizer patterns (NFC) --- mlir/unittests/Transforms/CMakeLists.txt | 1 + mlir/unittests/Transforms/Canonicalizer.cpp | 84 +++++++++++++++++++ .../mlir/unittests/BUILD.bazel | 4 + 3 files changed, 89 insertions(+) create mode 100644 mlir/unittests/Transforms/Canonicalizer.cpp diff --git a/mlir/unittests/Transforms/CMakeLists.txt b/mlir/unittests/Transforms/CMakeLists.txt index 9636f93835eb..b78f3cd8cf22 100644 --- a/mlir/unittests/Transforms/CMakeLists.txt +++ b/mlir/unittests/Transforms/CMakeLists.txt @@ -1,4 +1,5 @@ add_mlir_unittest(MLIRTransformsTests + Canonicalizer.cpp DialectConversion.cpp ) target_link_libraries(MLIRTransformsTests diff --git a/mlir/unittests/Transforms/Canonicalizer.cpp b/mlir/unittests/Transforms/Canonicalizer.cpp new file mode 100644 index 000000000000..de968326f63e --- /dev/null +++ b/mlir/unittests/Transforms/Canonicalizer.cpp @@ -0,0 +1,84 @@ +//===- DialectConversion.cpp - Dialect conversion unit tests --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/IR/PatternMatch.h" +#include "mlir/Parser.h" +#include "mlir/Pass/PassManager.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "mlir/Transforms/Passes.h" +#include "gtest/gtest.h" + +using namespace mlir; + +namespace { + +struct DisabledPattern : public RewritePattern { + DisabledPattern(MLIRContext *context) + : RewritePattern("test.foo", /*benefit=*/0, context, + /*generatedNamed=*/{}) { + setDebugName("DisabledPattern"); + } + + LogicalResult matchAndRewrite(Operation *op, + PatternRewriter &rewriter) const override { + if (op->getNumResults() != 1) + return failure(); + rewriter.eraseOp(op); + return success(); + } +}; + +struct EnabledPattern : public RewritePattern { + EnabledPattern(MLIRContext *context) + : RewritePattern("test.foo", /*benefit=*/0, context, + /*generatedNamed=*/{}) { + setDebugName("EnabledPattern"); + } + + LogicalResult matchAndRewrite(Operation *op, + PatternRewriter &rewriter) const override { + if (op->getNumResults() == 1) + return failure(); + rewriter.eraseOp(op); + return success(); + } +}; + +struct TestDialect : public Dialect { + static StringRef getDialectNamespace() { return "test"; } + + TestDialect(MLIRContext *context) + : Dialect(getDialectNamespace(), context, TypeID::get()) { + allowUnknownOperations(); + } + + void getCanonicalizationPatterns(RewritePatternSet &results) const override { + results.insert(results.getContext()); + } +}; + +TEST(CanonicalizerTest, TestDisablePatterns) { + MLIRContext context; + context.getOrLoadDialect(); + PassManager mgr(&context); + mgr.addPass( + createCanonicalizerPass(GreedyRewriteConfig(), {"DisabledPattern"})); + + const char *const code = R"mlir( + %0:2 = "test.foo"() {sym_name = "A"} : () -> (i32, i32) + %1 = "test.foo"() {sym_name = "B"} : () -> (f32) + )mlir"; + + OwningModuleRef module = mlir::parseSourceString(code, &context); + ASSERT_TRUE(succeeded(mgr.run(*module))); + + EXPECT_TRUE(module->lookupSymbol("B")); + EXPECT_FALSE(module->lookupSymbol("A")); +} + +} // end anonymous namespace diff --git a/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel index 5ee06e75f435..73560d698081 100644 --- a/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel @@ -266,7 +266,11 @@ cc_test( ]), deps = [ "//llvm:gtest_main", + "//mlir:IR", + "//mlir:Parser", + "//mlir:Pass", "//mlir:TransformUtils", + "//mlir:Transforms", ], ) From 90d1786ba0c233456b7785fe4b93eca835d56028 Mon Sep 17 00:00:00 2001 From: Marianne Mailhot-Sarrasin Date: Wed, 22 Dec 2021 16:11:23 -0500 Subject: [PATCH 081/293] [DSE] Fix invalid removal of store instruction Fix handling of alloc-like instructions in isGuaranteedLoopInvariant(). It was not valid when the 'KillingDef' was outside of the loop, while the 'CurrentDef' was inside the loop. In that case, the 'KillingDef' only overwrites the definition from the last iteration of the loop, and not the ones of all iterations. Therefor it does not make the 'CurrentDef' to be dead, and must not remove it. Fixing issue : https://github.com/llvm/llvm-project/issues/52774 Reviewed by: Florian Hahn Differential revision: https://reviews.llvm.org/D115965 --- llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 13 +++---------- .../DeadStoreElimination/store-after-loop.ll | 3 ++- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index ee6c800e07a5..4c17e2c0bdbb 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1209,17 +1209,10 @@ struct DSEState { /// loop. In particular, this guarantees that it only references a single /// MemoryLocation during execution of the containing function. bool isGuaranteedLoopInvariant(const Value *Ptr) { - auto IsGuaranteedLoopInvariantBase = [this](const Value *Ptr) { + auto IsGuaranteedLoopInvariantBase = [](const Value *Ptr) { Ptr = Ptr->stripPointerCasts(); - if (auto *I = dyn_cast(Ptr)) { - if (isa(Ptr)) - return true; - - if (isAllocLikeFn(I, &TLI)) - return true; - - return false; - } + if (auto *I = dyn_cast(Ptr)) + return I->getParent()->isEntryBlock(); return true; }; diff --git a/llvm/test/Transforms/DeadStoreElimination/store-after-loop.ll b/llvm/test/Transforms/DeadStoreElimination/store-after-loop.ll index 27c7c139acb1..1e98fd7404a3 100644 --- a/llvm/test/Transforms/DeadStoreElimination/store-after-loop.ll +++ b/llvm/test/Transforms/DeadStoreElimination/store-after-loop.ll @@ -7,7 +7,6 @@ target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S32" ; There is no dead store in this test. Make sure no store is deleted by DSE. ; Test case related to bug report PR52774. -; NOTE: Showing actual (broken) DSE behavior. define %struct.ilist* @test() { ; CHECK-LABEL: @test( @@ -19,6 +18,8 @@ define %struct.ilist* @test() { ; CHECK-NEXT: [[LIST_NEW_ILIST_PTR]] = bitcast i8* [[LIST_NEW_I8_PTR]] to %struct.ilist* ; CHECK-NEXT: [[GEP_NEW_VALUE:%.*]] = getelementptr inbounds [[STRUCT_ILIST:%.*]], %struct.ilist* [[LIST_NEW_ILIST_PTR]], i32 0, i32 0 ; CHECK-NEXT: store i32 42, i32* [[GEP_NEW_VALUE]], align 8 +; CHECK-NEXT: [[GEP_NEW_NEXT:%.*]] = getelementptr inbounds [[STRUCT_ILIST]], %struct.ilist* [[LIST_NEW_ILIST_PTR]], i32 0, i32 1 +; CHECK-NEXT: store %struct.ilist* [[LIST_NEXT]], %struct.ilist** [[GEP_NEW_NEXT]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV_NEXT]], 10 ; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]] From d840f3edf0297699e750413e629543c2a9271b90 Mon Sep 17 00:00:00 2001 From: Salman Javed Date: Thu, 23 Dec 2021 10:18:45 +1300 Subject: [PATCH 082/293] Resolve -Wdocumentation warning in ClangTidyDiagnosticConsumer (NFC) Change to comments only; NFC. ``` ClangTidyDiagnosticConsumer.h:245:6: warning: '\param' command used in a comment that is not attached to a function declaration [-Wdocumentation] ``` See this build for an example: https://lab.llvm.org/buildbot/#/builders/109/builds/27293/steps/5/logs/warnings__702_ --- clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h index 23800f5622f6..69f1ceddd9bd 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h +++ b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h @@ -241,14 +241,12 @@ getFixIt(const tooling::Diagnostic &Diagnostic, bool AnyFix); /// A diagnostic consumer that turns each \c Diagnostic into a /// \c SourceManager-independent \c ClangTidyError. -/// -/// \param EnableNolintBlocks Enables diagnostic-disabling inside blocks of -/// code, delimited by NOLINTBEGIN and NOLINTEND. -// // FIXME: If we move away from unit-tests, this can be moved to a private // implementation file. class ClangTidyDiagnosticConsumer : public DiagnosticConsumer { public: + /// \param EnableNolintBlocks Enables diagnostic-disabling inside blocks of + /// code, delimited by NOLINTBEGIN and NOLINTEND. ClangTidyDiagnosticConsumer(ClangTidyContext &Ctx, DiagnosticsEngine *ExternalDiagEngine = nullptr, bool RemoveIncompatibleErrors = true, From edf8e3ea5ee74f7bf86651b6caa29e456e3119cd Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Wed, 22 Dec 2021 13:17:10 -0800 Subject: [PATCH 083/293] [NFC][mlgo]Make the test model generator inlining-specific When looking at building the generator for regalloc, we realized we'd need quite a bit of custom logic, and that perhaps it'd be easier to just have each usecase (each kind of mlgo policy) have it's own stand-alone test generator. This patch just consolidates the old `config.py` and `generate_mock_model.py` into one file, and does away with subdirectories under Analysis/models. --- llvm/cmake/modules/TensorFlowCompile.cmake | 18 +++-- llvm/lib/Analysis/CMakeLists.txt | 2 +- .../config.py => gen-inline-oz-test-model.py} | 57 ++++++++++++++- .../Analysis/models/generate_mock_model.py | 69 ------------------- .../Inline/ML/bounds-checks-rewards.ll | 2 +- .../Inline/ML/development-training-log.ll | 2 +- .../Inline/ML/ml-test-development-mode.ll | 2 +- 7 files changed, 68 insertions(+), 84 deletions(-) rename llvm/lib/Analysis/models/{inlining/config.py => gen-inline-oz-test-model.py} (58%) delete mode 100644 llvm/lib/Analysis/models/generate_mock_model.py diff --git a/llvm/cmake/modules/TensorFlowCompile.cmake b/llvm/cmake/modules/TensorFlowCompile.cmake index ea5fa56cbdba..9427a26de8be 100644 --- a/llvm/cmake/modules/TensorFlowCompile.cmake +++ b/llvm/cmake/modules/TensorFlowCompile.cmake @@ -27,15 +27,13 @@ function(tf_get_model model final_path) endfunction() # Generate a mock model for tests. -function(generate_mock_model model generate_mock_model_py config) - tf_get_absolute_path(${model} ${CMAKE_CURRENT_BINARY_DIR} LLVM_ML_MODELS_ABSOLUTE) - tf_get_absolute_path(${generate_mock_model_py} ${CMAKE_CURRENT_SOURCE_DIR} GENERATED_MODEL_ABSOLUTE_PATH) - tf_get_absolute_path(${config} ${CMAKE_CURRENT_SOURCE_DIR} LLVM_ML_MODEL_CONFIG_ABSOLUTE) +function(generate_mock_model generator output) + tf_get_absolute_path(${generator} ${CMAKE_CURRENT_SOURCE_DIR} generator_absolute_path) + tf_get_absolute_path(${output} ${CMAKE_CURRENT_BINARY_DIR} output_absolute_path) message(WARNING "Autogenerated mock models should not be used in production builds.") execute_process(COMMAND python3 - ${GENERATED_MODEL_ABSOLUTE_PATH} - ${LLVM_ML_MODEL_CONFIG_ABSOLUTE} - ${LLVM_ML_MODELS_ABSOLUTE}-autogenerated + ${generator_absolute_path} + ${output_absolute_path} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} ) endfunction() @@ -86,7 +84,7 @@ function(tfcompile model tag_set signature_def_key fname cpp_class) endfunction() -function(tf_find_and_compile model default_url default_path generation_config tag_set signature_def_key fname cpp_class) +function(tf_find_and_compile model default_url default_path test_model_generator tag_set signature_def_key fname cpp_class) if ("${model}" STREQUAL "download") # Crash if the user wants to download a model but a URL is set to "TO_BE_UPDATED" if ("${default_url}" STREQUAL "TO_BE_UPDATED") @@ -97,8 +95,8 @@ function(tf_find_and_compile model default_url default_path generation_config ta endif() if ("${model}" STREQUAL "autogenerate") - generate_mock_model(${default_path} models/generate_mock_model.py ${generation_config}) - set(model ${default_path}-autogenerated) + set(model ${default_path}-autogenerated) + generate_mock_model(${test_model_generator} ${model}) endif() tf_get_model(${model} LLVM_ML_MODELS_ABSOLUTE) diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index ae0ee22561ce..aec84124129f 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -12,7 +12,7 @@ if (DEFINED LLVM_HAVE_TF_AOT OR DEFINED LLVM_HAVE_TF_API) ${LLVM_INLINER_MODEL_PATH} ${LLVM_INLINER_MODEL_CURRENT_URL} ${LLVM_INLINER_MODEL_PATH_DEFAULT} - "models/inlining/config.py" + "models/gen-inline-oz-test-model.py" serve action InlinerSizeModel diff --git a/llvm/lib/Analysis/models/inlining/config.py b/llvm/lib/Analysis/models/gen-inline-oz-test-model.py similarity index 58% rename from llvm/lib/Analysis/models/inlining/config.py rename to llvm/lib/Analysis/models/gen-inline-oz-test-model.py index 78d3a8259cc2..d800b1476eed 100644 --- a/llvm/lib/Analysis/models/inlining/config.py +++ b/llvm/lib/Analysis/models/gen-inline-oz-test-model.py @@ -1,4 +1,13 @@ -"""Inlining Training config.""" +"""Generate a mock model for LLVM tests. + +The generated model is not a neural net - it is just a tf.function with the +correct input and output parameters. By construction, the mock model will always +output 1. +""" + +import os +import importlib.util +import sys import tensorflow as tf @@ -85,3 +94,49 @@ def get_output_signature(): def get_output_spec(): return POLICY_OUTPUT_SPEC + +def get_output_spec_path(path): + return os.path.join(path, 'output_spec.json') + + +def build_mock_model(path, signature): + """Build and save the mock model with the given signature""" + module = tf.Module() + + # We have to set this useless variable in order for the TF C API to correctly + # intake it + module.var = tf.Variable(0.) + + def action(*inputs): + s = tf.reduce_sum([tf.cast(x, tf.float32) for x in tf.nest.flatten(inputs)]) + return {signature['output']: float('inf') + s + module.var} + + module.action = tf.function()(action) + action = {'action': module.action.get_concrete_function(signature['inputs'])} + tf.saved_model.save(module, path, signatures=action) + + output_spec_path = get_output_spec_path(path) + with open(output_spec_path, 'w') as f: + print(f'Writing output spec to {output_spec_path}.') + f.write(signature['output_spec']) + + +def get_signature(): + return { + 'inputs': get_input_signature(), + 'output': get_output_signature(), + 'output_spec': get_output_spec() + } + + +def main(argv): + assert len(argv) == 2 + model_path = argv[1] + + print(f'Output model to: [{argv[1]}]') + signature = get_signature() + build_mock_model(model_path, signature) + + +if __name__ == '__main__': + main(sys.argv) diff --git a/llvm/lib/Analysis/models/generate_mock_model.py b/llvm/lib/Analysis/models/generate_mock_model.py deleted file mode 100644 index f1170abba383..000000000000 --- a/llvm/lib/Analysis/models/generate_mock_model.py +++ /dev/null @@ -1,69 +0,0 @@ -"""Generate a mock model for LLVM tests. - -The generated model is not a neural net - it is just a tf.function with the -correct input and output parameters. By construction, the mock model will always -output 1. -""" - -import os -import importlib.util -import sys - -import tensorflow as tf - - -def get_output_spec_path(path): - return os.path.join(path, 'output_spec.json') - - -def build_mock_model(path, signature): - """Build and save the mock model with the given signature""" - module = tf.Module() - - # We have to set this useless variable in order for the TF C API to correctly - # intake it - module.var = tf.Variable(0.) - - def action(*inputs): - s = tf.reduce_sum([tf.cast(x, tf.float32) for x in tf.nest.flatten(inputs)]) - return {signature['output']: float('inf') + s + module.var} - - module.action = tf.function()(action) - action = {'action': module.action.get_concrete_function(signature['inputs'])} - tf.saved_model.save(module, path, signatures=action) - - output_spec_path = get_output_spec_path(path) - with open(output_spec_path, 'w') as f: - print(f'Writing output spec to {output_spec_path}.') - f.write(signature['output_spec']) - - -def get_external_signature(config_path): - """Get the signature for the desired model. - - We manually import the python file at config_path to avoid adding a gin - dependency to the LLVM build. - """ - spec = importlib.util.spec_from_file_location('config', config_path) - config = importlib.util.module_from_spec(spec) - spec.loader.exec_module(config) - - return { - 'inputs': config.get_input_signature(), - 'output': config.get_output_signature(), - 'output_spec': config.get_output_spec() - } - - -def main(argv): - assert len(argv) == 3 - config_path = argv[1] - model_path = argv[2] - - print(f'Using config file at [{argv[1]}]') - signature = get_external_signature(config_path) - build_mock_model(model_path, signature) - - -if __name__ == '__main__': - main(sys.argv) diff --git a/llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll b/llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll index a410a2a071aa..48c2e0a301a9 100644 --- a/llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll +++ b/llvm/test/Transforms/Inline/ML/bounds-checks-rewards.ll @@ -8,7 +8,7 @@ ; ; Generate mock model ; RUN: rm -rf %t -; RUN: %python %S/../../../../lib/Analysis/models/generate_mock_model.py %S/../../../../lib/Analysis/models/inlining/config.py %t +; RUN: %python %S/../../../../lib/Analysis/models/gen-inline-oz-test-model.py %t ; ; When the bounds are very wide ("no bounds"), all inlinings happen. ; RUN: opt -passes=scc-oz-module-inliner -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-model-under-training=%t -training-log=- -tfutils-text-log -enable-ml-inliner=development -ml-advisor-size-increase-threshold=10.0 -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=NOBOUNDS diff --git a/llvm/test/Transforms/Inline/ML/development-training-log.ll b/llvm/test/Transforms/Inline/ML/development-training-log.ll index c571c02b3fd5..7d3b59207582 100644 --- a/llvm/test/Transforms/Inline/ML/development-training-log.ll +++ b/llvm/test/Transforms/Inline/ML/development-training-log.ll @@ -2,7 +2,7 @@ ; REQUIRES: have_tf_api ; Generate mock model ; RUN: rm -rf %t -; RUN: %python %S/../../../../lib/Analysis/models/generate_mock_model.py %S/../../../../lib/Analysis/models/inlining/config.py %t +; RUN: %python %S/../../../../lib/Analysis/models/gen-inline-oz-test-model.py %t ; ; RUN: opt -enable-ml-inliner=development -passes=scc-oz-module-inliner -training-log=- -tfutils-text-log -ml-inliner-model-under-training=%t -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -S < %s | FileCheck %s ; RUN: opt -enable-ml-inliner=development -passes=scc-oz-module-inliner -training-log=- -tfutils-text-log -ml-inliner-model-under-training=%t -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -ml-inliner-output-spec-override=%S/Inputs/test_output_spec.json -S < %s | FileCheck %s --check-prefixes=EXTRA-OUTPUTS,CHECK diff --git a/llvm/test/Transforms/Inline/ML/ml-test-development-mode.ll b/llvm/test/Transforms/Inline/ML/ml-test-development-mode.ll index b755cf015a0d..d902b9e4c778 100644 --- a/llvm/test/Transforms/Inline/ML/ml-test-development-mode.ll +++ b/llvm/test/Transforms/Inline/ML/ml-test-development-mode.ll @@ -7,6 +7,6 @@ ; ; REQUIRES: have_tf_api ; RUN: rm -rf %t && mkdir %t -; RUN: %python %S/../../../../lib/Analysis/models/generate_mock_model.py %S/../../../../lib/Analysis/models/inlining/config.py %t +; RUN: %python %S/../../../../lib/Analysis/models/gen-inline-oz-test-model.py %t ; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=default -S < %S/Inputs/test-module.ll 2>&1 | FileCheck %S/Inputs/test-module.ll --check-prefix=DEFAULT ; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=development -ml-inliner-model-under-training=%t -S < %S/Inputs/test-module.ll 2>&1 | FileCheck %S/Inputs/test-module.ll --check-prefix=CHECK From 3cc2161c897ca6c9f1ff09fbc610a6ae40b3b566 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Wed, 22 Dec 2021 18:17:50 +0000 Subject: [PATCH 084/293] [libc] Move the x86_64 syscall functions to OSUtil. Reviewed By: michaelrj, lntue Differential Revision: https://reviews.llvm.org/D116177 --- libc/config/linux/CMakeLists.txt | 12 ------------ libc/loader/linux/x86_64/CMakeLists.txt | 2 +- libc/loader/linux/x86_64/start.cpp | 2 +- libc/src/__support/OSUtil/CMakeLists.txt | 1 + libc/src/__support/OSUtil/linux/CMakeLists.txt | 8 +++++--- libc/src/__support/OSUtil/linux/io.h | 4 ++-- libc/src/__support/OSUtil/linux/quick_exit.h | 4 ++-- libc/src/__support/OSUtil/linux/syscall.h | 18 ++++++++++++++++++ .../OSUtil/linux/x86_64/CMakeLists.txt | 7 +++++++ .../__support/OSUtil/linux/x86_64/syscall.h} | 8 +++++--- .../__support/OSUtil/syscall.h} | 10 ++++++---- libc/src/assert/CMakeLists.txt | 2 +- libc/src/assert/__assert_fail.cpp | 2 +- libc/src/signal/linux/CMakeLists.txt | 8 ++++---- libc/src/signal/linux/__restore.cpp | 2 +- libc/src/signal/linux/signal.h | 4 ++-- libc/src/stdlib/linux/CMakeLists.txt | 2 +- libc/src/stdlib/linux/_Exit.cpp | 4 ++-- libc/src/sys/mman/linux/CMakeLists.txt | 4 ++-- libc/src/sys/mman/linux/mmap.cpp | 4 ++-- libc/src/sys/mman/linux/munmap.cpp | 4 ++-- libc/src/threads/linux/CMakeLists.txt | 10 +++++----- libc/src/threads/linux/CndVar.h | 6 +++--- libc/src/threads/linux/Mutex.h | 6 +++--- libc/src/threads/linux/call_once.cpp | 6 +++--- libc/src/threads/linux/thrd_create.cpp | 10 +++++----- libc/src/threads/linux/thrd_join.cpp | 6 +++--- libc/src/unistd/linux/CMakeLists.txt | 2 +- libc/src/unistd/linux/write.cpp | 4 ++-- libc/test/CMakeLists.txt | 1 - libc/test/config/CMakeLists.txt | 1 - libc/test/config/linux/CMakeLists.txt | 5 ----- libc/test/config/linux/x86_64/CMakeLists.txt | 7 ------- libc/test/src/__support/CMakeLists.txt | 2 ++ libc/test/src/__support/OSUtil/CMakeLists.txt | 3 +++ .../src/__support/OSUtil/linux/CMakeLists.txt | 1 + .../OSUtil/linux/x86_64/CMakeLists.txt | 7 +++++++ .../OSUtil}/linux/x86_64/syscall_test.cpp | 5 ++--- 38 files changed, 106 insertions(+), 88 deletions(-) create mode 100644 libc/src/__support/OSUtil/linux/syscall.h create mode 100644 libc/src/__support/OSUtil/linux/x86_64/CMakeLists.txt rename libc/{config/linux/x86_64/syscall.h.inc => src/__support/OSUtil/linux/x86_64/syscall.h} (93%) rename libc/{config/linux/syscall.h.def => src/__support/OSUtil/syscall.h} (65%) delete mode 100644 libc/test/config/CMakeLists.txt delete mode 100644 libc/test/config/linux/CMakeLists.txt delete mode 100644 libc/test/config/linux/x86_64/CMakeLists.txt create mode 100644 libc/test/src/__support/OSUtil/CMakeLists.txt create mode 100644 libc/test/src/__support/OSUtil/linux/CMakeLists.txt create mode 100644 libc/test/src/__support/OSUtil/linux/x86_64/CMakeLists.txt rename libc/test/{config => src/__support/OSUtil}/linux/x86_64/syscall_test.cpp (97%) diff --git a/libc/config/linux/CMakeLists.txt b/libc/config/linux/CMakeLists.txt index e3a00189a2ad..bc42557f3db9 100644 --- a/libc/config/linux/CMakeLists.txt +++ b/libc/config/linux/CMakeLists.txt @@ -1,15 +1,3 @@ -add_gen_header( - linux_syscall_h - DEF_FILE syscall.h.def - GEN_HDR syscall.h - PARAMS - inline_syscalls=${LIBC_TARGET_ARCHITECTURE}/syscall.h.inc - DATA_FILES - ${LIBC_TARGET_ARCHITECTURE}/syscall.h.inc - DEPENDS - libc.src.__support.common -) - add_header( app_h HDR diff --git a/libc/loader/linux/x86_64/CMakeLists.txt b/libc/loader/linux/x86_64/CMakeLists.txt index ceaa94ee11a3..75b8a23802e7 100644 --- a/libc/loader/linux/x86_64/CMakeLists.txt +++ b/libc/loader/linux/x86_64/CMakeLists.txt @@ -3,9 +3,9 @@ add_loader_object( SRC start.cpp DEPENDS - libc.config.linux.linux_syscall_h libc.config.linux.app_h libc.include.sys_syscall + libc.src.__support.OSUtil.osutil libc.src.string.memcpy libc.src.sys.mman.mmap COMPILE_OPTIONS diff --git a/libc/loader/linux/x86_64/start.cpp b/libc/loader/linux/x86_64/start.cpp index 320fda96105b..55ae00391927 100644 --- a/libc/loader/linux/x86_64/start.cpp +++ b/libc/loader/linux/x86_64/start.cpp @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include "config/linux/app.h" -#include "config/linux/syscall.h" #include "include/sys/mman.h" #include "include/sys/syscall.h" +#include "src/__support/OSUtil/syscall.h" #include "src/string/memcpy.h" #include "src/sys/mman/mmap.h" diff --git a/libc/src/__support/OSUtil/CMakeLists.txt b/libc/src/__support/OSUtil/CMakeLists.txt index b0f7ac86ffa5..028a9c06123c 100644 --- a/libc/src/__support/OSUtil/CMakeLists.txt +++ b/libc/src/__support/OSUtil/CMakeLists.txt @@ -5,6 +5,7 @@ add_header_library( HDRS io.h quick_exit.h + syscall.h DEPENDS libc.src.__support.OSUtil.linux.linux_util ) diff --git a/libc/src/__support/OSUtil/linux/CMakeLists.txt b/libc/src/__support/OSUtil/linux/CMakeLists.txt index 38286147a914..4b0f5d70a0a2 100644 --- a/libc/src/__support/OSUtil/linux/CMakeLists.txt +++ b/libc/src/__support/OSUtil/linux/CMakeLists.txt @@ -1,10 +1,12 @@ +add_subdirectory(x86_64) + add_header_library( linux_util HDRS io.h quick_exit.h + syscall.h DEPENDS - libc.config.linux.linux_syscall_h - libc.include.sys_syscall - libc.src.string.string_utils + .x86_64.linux_x86_64_util + libc.src.__support.common ) diff --git a/libc/src/__support/OSUtil/linux/io.h b/libc/src/__support/OSUtil/linux/io.h index 5aaa544b5142..71830476bfff 100644 --- a/libc/src/__support/OSUtil/linux/io.h +++ b/libc/src/__support/OSUtil/linux/io.h @@ -9,9 +9,9 @@ #ifndef LLVM_LIBC_SRC_SUPPORT_OSUTIL_LINUX_IO_H #define LLVM_LIBC_SRC_SUPPORT_OSUTIL_LINUX_IO_H -#include "config/linux/syscall.h" // For internal syscall function. -#include "include/sys/syscall.h" // For syscall numbers. +#include "include/sys/syscall.h" // For syscall numbers. #include "src/string/string_utils.h" +#include "syscall.h" // For internal syscall function. namespace __llvm_libc { diff --git a/libc/src/__support/OSUtil/linux/quick_exit.h b/libc/src/__support/OSUtil/linux/quick_exit.h index 932959d265b1..eb9d86f446cf 100644 --- a/libc/src/__support/OSUtil/linux/quick_exit.h +++ b/libc/src/__support/OSUtil/linux/quick_exit.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_SUPPORT_OSUTIL_LINUX_QUICK_EXIT_H #define LLVM_LIBC_SRC_SUPPORT_OSUTIL_LINUX_QUICK_EXIT_H -#include "config/linux/syscall.h" // For internal syscall function. -#include "include/sys/syscall.h" // For syscall numbers. +#include "include/sys/syscall.h" // For syscall numbers. +#include "syscall.h" // For internal syscall function. namespace __llvm_libc { diff --git a/libc/src/__support/OSUtil/linux/syscall.h b/libc/src/__support/OSUtil/linux/syscall.h new file mode 100644 index 000000000000..754883e8a667 --- /dev/null +++ b/libc/src/__support/OSUtil/linux/syscall.h @@ -0,0 +1,18 @@ +//===----------------------- Linux syscalls ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_SUPPORT_OSUTIL_LINUX_SYSCALL_H +#define LLVM_LIBC_SRC_SUPPORT_OSUTIL_LINUX_SYSCALL_H + +#include "src/__support/architectures.h" + +#ifdef LLVM_LIBC_ARCH_X86_64 +#include "x86_64/syscall.h" +#endif + +#endif // LLVM_LIBC_SRC_SUPPORT_OSUTIL_LINUX_SYSCALL_H diff --git a/libc/src/__support/OSUtil/linux/x86_64/CMakeLists.txt b/libc/src/__support/OSUtil/linux/x86_64/CMakeLists.txt new file mode 100644 index 000000000000..a7f2d74e6353 --- /dev/null +++ b/libc/src/__support/OSUtil/linux/x86_64/CMakeLists.txt @@ -0,0 +1,7 @@ +add_header_library( + linux_x86_64_util + HDRS + syscall.h + DEPENDS + libc.src.__support.common +) diff --git a/libc/config/linux/x86_64/syscall.h.inc b/libc/src/__support/OSUtil/linux/x86_64/syscall.h similarity index 93% rename from libc/config/linux/x86_64/syscall.h.inc rename to libc/src/__support/OSUtil/linux/x86_64/syscall.h index ee3b5e5bfc0d..77d139806271 100644 --- a/libc/config/linux/x86_64/syscall.h.inc +++ b/libc/src/__support/OSUtil/linux/x86_64/syscall.h @@ -1,4 +1,4 @@ -//===------------ inline implementation of x86_64 syscalls ----------------===// +//===---------- inline implementation of x86_64 syscalls ----------* C++ *-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,8 @@ // //===----------------------------------------------------------------------===// -%%begin() +#ifndef LLVM_LIBC_SRC_SUPPORT_OSUTIL_LINUX_X86_64_SYSCALL_H +#define LLVM_LIBC_SRC_SUPPORT_OSUTIL_LINUX_X86_64_SYSCALL_H #include "src/__support/common.h" @@ -100,7 +101,8 @@ __attribute__((always_inline)) inline long syscall(long __number, Ts... ts) { return syscall(__number, (long)ts...); } - #undef SYSCALL_CLOBBER_LIST } // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_SUPPORT_OSUTIL_LINUX_X86_64_SYSCALL_H diff --git a/libc/config/linux/syscall.h.def b/libc/src/__support/OSUtil/syscall.h similarity index 65% rename from libc/config/linux/syscall.h.def rename to libc/src/__support/OSUtil/syscall.h index 0daceaf00dde..b1137275a86f 100644 --- a/libc/config/linux/syscall.h.def +++ b/libc/src/__support/OSUtil/syscall.h @@ -6,9 +6,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIBC_CONFIG_LINUX_SYSCALL_H -#define LLVM_LIBC_CONFIG_LINUX_SYSCALL_H +#ifndef LLVM_LIBC_SRC_SUPPORT_OSUTIL_SYSCALL_H +#define LLVM_LIBC_SRC_SUPPORT_OSUTIL_SYSCALL_H -%%include_file(${inline_syscalls}) +#ifdef __unix__ +#include "linux/syscall.h" +#endif -#endif // LLVM_LIBC_CONFIG_LINUX_SYSCALL_H +#endif // LLVM_LIBC_SRC_SUPPORT_OSUTIL_SYSCALL_H diff --git a/libc/src/assert/CMakeLists.txt b/libc/src/assert/CMakeLists.txt index 36123cc6bd41..7f82017e51c5 100644 --- a/libc/src/assert/CMakeLists.txt +++ b/libc/src/assert/CMakeLists.txt @@ -8,7 +8,7 @@ add_entrypoint_object( DEPENDS # These two dependencies are temporary and should be replaced by fprintf # later. - libc.config.linux.linux_syscall_h + libc.src.__support.OSUtil.osutil libc.include.sys_syscall libc.src.stdlib.abort ) diff --git a/libc/src/assert/__assert_fail.cpp b/libc/src/assert/__assert_fail.cpp index 0fe2ca6da87c..93c176a62231 100644 --- a/libc/src/assert/__assert_fail.cpp +++ b/libc/src/assert/__assert_fail.cpp @@ -10,8 +10,8 @@ #include "src/stdlib/abort.h" // These includes are temporary. -#include "config/linux/syscall.h" // For internal syscall function. #include "include/sys/syscall.h" // For syscall numbers. +#include "src/__support/OSUtil/syscall.h" // For internal syscall function. namespace __llvm_libc { diff --git a/libc/src/signal/linux/CMakeLists.txt b/libc/src/signal/linux/CMakeLists.txt index fc4a7aa32bf2..419146f5bd44 100644 --- a/libc/src/signal/linux/CMakeLists.txt +++ b/libc/src/signal/linux/CMakeLists.txt @@ -7,9 +7,9 @@ add_entrypoint_object( signal.h ../raise.h DEPENDS - libc.config.linux.linux_syscall_h libc.include.signal libc.include.sys_syscall + libc.src.__support.OSUtil.osutil ) add_object_library( @@ -25,8 +25,8 @@ add_object_library( # asan creates asan.module_ctor which uses stack space, causing warnings. -fno-sanitize=address DEPENDS - libc.config.linux.linux_syscall_h libc.include.sys_syscall + libc.src.__support.OSUtil.osutil ) add_entrypoint_object( @@ -38,9 +38,9 @@ add_entrypoint_object( ../sigaction.h DEPENDS .__restore - libc.config.linux.linux_syscall_h libc.include.signal libc.include.sys_syscall + libc.src.__support.OSUtil.osutil libc.src.errno.__errno_location ) @@ -52,9 +52,9 @@ add_entrypoint_object( signal.h ../sigprocmask.h DEPENDS - libc.config.linux.linux_syscall_h libc.include.signal libc.include.sys_syscall + libc.src.__support.OSUtil.osutil libc.src.errno.__errno_location ) diff --git a/libc/src/signal/linux/__restore.cpp b/libc/src/signal/linux/__restore.cpp index 8ce848f4960b..c648ddb4cb15 100644 --- a/libc/src/signal/linux/__restore.cpp +++ b/libc/src/signal/linux/__restore.cpp @@ -10,8 +10,8 @@ // strongly control the options this file is compiled with. __restore_rt cannot // make any stack allocations so we must ensure this. -#include "config/linux/syscall.h" #include "include/sys/syscall.h" +#include "src/__support/OSUtil/syscall.h" namespace __llvm_libc { diff --git a/libc/src/signal/linux/signal.h b/libc/src/signal/linux/signal.h index a59a56ced5d6..fd312d79ed72 100644 --- a/libc/src/signal/linux/signal.h +++ b/libc/src/signal/linux/signal.h @@ -9,8 +9,8 @@ #ifndef LLVM_LIBC_SRC_SIGNAL_LINUX_SIGNAL_H #define LLVM_LIBC_SRC_SIGNAL_LINUX_SIGNAL_H -#include "config/linux/syscall.h" // For internal syscall function. -#include "include/sys/syscall.h" // For syscall numbers. +#include "include/sys/syscall.h" // For syscall numbers. +#include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "include/signal.h" diff --git a/libc/src/stdlib/linux/CMakeLists.txt b/libc/src/stdlib/linux/CMakeLists.txt index 27e00a35c70f..c5130c11ed91 100644 --- a/libc/src/stdlib/linux/CMakeLists.txt +++ b/libc/src/stdlib/linux/CMakeLists.txt @@ -6,6 +6,6 @@ add_entrypoint_object( ../_Exit.h DEPENDS libc.include.sys_syscall - libc.config.linux.linux_syscall_h libc.include.stdlib + libc.src.__support.OSUtil.osutil ) diff --git a/libc/src/stdlib/linux/_Exit.cpp b/libc/src/stdlib/linux/_Exit.cpp index 7fdd60ffe9bc..51b0a48c7521 100644 --- a/libc/src/stdlib/linux/_Exit.cpp +++ b/libc/src/stdlib/linux/_Exit.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "config/linux/syscall.h" // For internal syscall function. -#include "include/sys/syscall.h" // For syscall numbers. +#include "include/sys/syscall.h" // For syscall numbers. +#include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" #include "src/stdlib/_Exit.h" diff --git a/libc/src/sys/mman/linux/CMakeLists.txt b/libc/src/sys/mman/linux/CMakeLists.txt index 7bf7c2f96d2d..949d965fdbe9 100644 --- a/libc/src/sys/mman/linux/CMakeLists.txt +++ b/libc/src/sys/mman/linux/CMakeLists.txt @@ -5,9 +5,9 @@ add_entrypoint_object( HDRS ../mmap.h DEPENDS - libc.config.linux.linux_syscall_h libc.include.sys_mman libc.include.sys_syscall + libc.src.__support.OSUtil.osutil libc.src.errno.__errno_location ) @@ -18,8 +18,8 @@ add_entrypoint_object( HDRS ../munmap.h DEPENDS - libc.config.linux.linux_syscall_h libc.include.sys_mman libc.include.sys_syscall + libc.src.__support.OSUtil.osutil libc.src.errno.__errno_location ) diff --git a/libc/src/sys/mman/linux/mmap.cpp b/libc/src/sys/mman/linux/mmap.cpp index 31e114a7f66b..0447ef129891 100644 --- a/libc/src/sys/mman/linux/mmap.cpp +++ b/libc/src/sys/mman/linux/mmap.cpp @@ -8,8 +8,8 @@ #include "src/sys/mman/mmap.h" -#include "config/linux/syscall.h" // For internal syscall function. -#include "include/sys/syscall.h" // For syscall numbers. +#include "include/sys/syscall.h" // For syscall numbers. +#include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" #include "src/errno/llvmlibc_errno.h" diff --git a/libc/src/sys/mman/linux/munmap.cpp b/libc/src/sys/mman/linux/munmap.cpp index fd89773cc829..7801a637f23f 100644 --- a/libc/src/sys/mman/linux/munmap.cpp +++ b/libc/src/sys/mman/linux/munmap.cpp @@ -8,8 +8,8 @@ #include "src/sys/mman/munmap.h" -#include "config/linux/syscall.h" // For internal syscall function. -#include "include/sys/syscall.h" // For syscall numbers. +#include "include/sys/syscall.h" // For syscall numbers. +#include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" #include "src/errno/llvmlibc_errno.h" diff --git a/libc/src/threads/linux/CMakeLists.txt b/libc/src/threads/linux/CMakeLists.txt index e9fc8d2d9c44..fcc3381e68bb 100644 --- a/libc/src/threads/linux/CMakeLists.txt +++ b/libc/src/threads/linux/CMakeLists.txt @@ -16,9 +16,9 @@ add_entrypoint_object( ../call_once.h DEPENDS .threads_utils - libc.config.linux.linux_syscall_h libc.include.sys_syscall libc.include.threads + libc.src.__support.OSUtil.osutil ) add_header_library( @@ -30,9 +30,9 @@ add_header_library( Thread.h DEPENDS .thread_start_args_h - libc.config.linux.linux_syscall_h libc.include.sys_syscall libc.include.threads + libc.src.__support.OSUtil.osutil ) add_entrypoint_object( @@ -43,11 +43,11 @@ add_entrypoint_object( ../thrd_create.h DEPENDS .threads_utils - libc.config.linux.linux_syscall_h libc.include.errno libc.include.sys_syscall libc.include.threads libc.src.__support.common + libc.src.__support.OSUtil.osutil libc.src.errno.__errno_location libc.src.sys.mman.mmap COMPILE_OPTIONS @@ -64,11 +64,11 @@ add_entrypoint_object( ../thrd_join.h DEPENDS .threads_utils - libc.config.linux.linux_syscall_h libc.include.sys_syscall libc.include.threads - libc.src.sys.mman.munmap libc.src.__support.common + libc.src.__support.OSUtil.osutil + libc.src.sys.mman.munmap ) add_entrypoint_object( diff --git a/libc/src/threads/linux/CndVar.h b/libc/src/threads/linux/CndVar.h index 5d9ced5d6457..7e531c87ff1f 100644 --- a/libc/src/threads/linux/CndVar.h +++ b/libc/src/threads/linux/CndVar.h @@ -12,9 +12,9 @@ #include "Futex.h" #include "Mutex.h" -#include "config/linux/syscall.h" // For syscall functions. -#include "include/sys/syscall.h" // For syscall numbers. -#include "include/threads.h" // For values like thrd_success etc. +#include "include/sys/syscall.h" // For syscall numbers. +#include "include/threads.h" // For values like thrd_success etc. +#include "src/__support/OSUtil/syscall.h" // For syscall functions. #include // For futex operations. #include // For atomic operations diff --git a/libc/src/threads/linux/Mutex.h b/libc/src/threads/linux/Mutex.h index f6033c92eaf6..a7b06d4c2bab 100644 --- a/libc/src/threads/linux/Mutex.h +++ b/libc/src/threads/linux/Mutex.h @@ -11,9 +11,9 @@ #include "Futex.h" -#include "config/linux/syscall.h" // For syscall functions. -#include "include/sys/syscall.h" // For syscall numbers. -#include "include/threads.h" // For values like thrd_success etc. +#include "include/sys/syscall.h" // For syscall numbers. +#include "include/threads.h" // For values like thrd_success etc. +#include "src/__support/OSUtil/syscall.h" // For syscall functions. #include // For futex operations. #include diff --git a/libc/src/threads/linux/call_once.cpp b/libc/src/threads/linux/call_once.cpp index 1bcb46f6b0a3..9b9664634a1f 100644 --- a/libc/src/threads/linux/call_once.cpp +++ b/libc/src/threads/linux/call_once.cpp @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include "src/threads/call_once.h" -#include "config/linux/syscall.h" // For syscall functions. -#include "include/sys/syscall.h" // For syscall numbers. -#include "include/threads.h" // For call_once related type definition. +#include "include/sys/syscall.h" // For syscall numbers. +#include "include/threads.h" // For call_once related type definition. +#include "src/__support/OSUtil/syscall.h" // For syscall functions. #include "src/__support/common.h" #include "src/threads/linux/Futex.h" diff --git a/libc/src/threads/linux/thrd_create.cpp b/libc/src/threads/linux/thrd_create.cpp index f2464d4075ea..5421c96775b7 100644 --- a/libc/src/threads/linux/thrd_create.cpp +++ b/libc/src/threads/linux/thrd_create.cpp @@ -7,11 +7,11 @@ //===----------------------------------------------------------------------===// #include "src/threads/thrd_create.h" -#include "config/linux/syscall.h" // For syscall function. -#include "include/errno.h" // For E* error values. -#include "include/sys/mman.h" // For PROT_* and MAP_* definitions. -#include "include/sys/syscall.h" // For syscall numbers. -#include "include/threads.h" // For thrd_* type definitions. +#include "include/errno.h" // For E* error values. +#include "include/sys/mman.h" // For PROT_* and MAP_* definitions. +#include "include/sys/syscall.h" // For syscall numbers. +#include "include/threads.h" // For thrd_* type definitions. +#include "src/__support/OSUtil/syscall.h" // For syscall function. #include "src/__support/common.h" #include "src/errno/llvmlibc_errno.h" #include "src/sys/mman/mmap.h" diff --git a/libc/src/threads/linux/thrd_join.cpp b/libc/src/threads/linux/thrd_join.cpp index 3c779495def5..f55f5a3af3f5 100644 --- a/libc/src/threads/linux/thrd_join.cpp +++ b/libc/src/threads/linux/thrd_join.cpp @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include "src/threads/thrd_join.h" -#include "config/linux/syscall.h" // For syscall function. -#include "include/sys/syscall.h" // For syscall numbers. -#include "include/threads.h" // For thrd_* type definitions. +#include "include/sys/syscall.h" // For syscall numbers. +#include "include/threads.h" // For thrd_* type definitions. +#include "src/__support/OSUtil/syscall.h" // For syscall function. #include "src/__support/common.h" #include "src/sys/mman/munmap.h" #include "src/threads/linux/Futex.h" diff --git a/libc/src/unistd/linux/CMakeLists.txt b/libc/src/unistd/linux/CMakeLists.txt index bd2cba708e69..1c3fdf15a6a7 100644 --- a/libc/src/unistd/linux/CMakeLists.txt +++ b/libc/src/unistd/linux/CMakeLists.txt @@ -6,7 +6,7 @@ add_entrypoint_object( ../write.h DEPENDS libc.include.unistd - libc.config.linux.linux_syscall_h libc.include.sys_syscall + libc.src.__support.OSUtil.osutil libc.src.errno.__errno_location ) diff --git a/libc/src/unistd/linux/write.cpp b/libc/src/unistd/linux/write.cpp index de0efb9bf8de..3554e3030943 100644 --- a/libc/src/unistd/linux/write.cpp +++ b/libc/src/unistd/linux/write.cpp @@ -8,8 +8,8 @@ #include "src/unistd/write.h" -#include "config/linux/syscall.h" // For internal syscall function. -#include "include/sys/syscall.h" // For syscall numbers. +#include "include/sys/syscall.h" // For syscall numbers. +#include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" #include "src/errno/llvmlibc_errno.h" diff --git a/libc/test/CMakeLists.txt b/libc/test/CMakeLists.txt index 3cf1f8d59d95..850d69345d29 100644 --- a/libc/test/CMakeLists.txt +++ b/libc/test/CMakeLists.txt @@ -18,6 +18,5 @@ if(NOT LLVM_LIBC_FULL_BUILD) return() endif() -add_subdirectory(config) add_subdirectory(integration) add_subdirectory(loader) diff --git a/libc/test/config/CMakeLists.txt b/libc/test/config/CMakeLists.txt deleted file mode 100644 index 52824d270583..000000000000 --- a/libc/test/config/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -add_subdirectory(${LIBC_TARGET_OS}) diff --git a/libc/test/config/linux/CMakeLists.txt b/libc/test/config/linux/CMakeLists.txt deleted file mode 100644 index b449a33ec94c..000000000000 --- a/libc/test/config/linux/CMakeLists.txt +++ /dev/null @@ -1,5 +0,0 @@ -add_libc_testsuite(libc_linux_tests) - -if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_ARCHITECTURE}) - add_subdirectory(${LIBC_TARGET_ARCHITECTURE}) -endif() diff --git a/libc/test/config/linux/x86_64/CMakeLists.txt b/libc/test/config/linux/x86_64/CMakeLists.txt deleted file mode 100644 index 0436736eefd0..000000000000 --- a/libc/test/config/linux/x86_64/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -add_libc_unittest( - libc_linux_x86_64_syscall_unittest - SUITE libc_linux_tests - SRCS syscall_test.cpp - DEPENDS - libc.config.linux.linux_syscall_h -) diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt index d2367ef52f69..20d0e2d75745 100644 --- a/libc/test/src/__support/CMakeLists.txt +++ b/libc/test/src/__support/CMakeLists.txt @@ -48,3 +48,5 @@ add_custom_command(TARGET libc_str_to_float_comparison_test DEPENDS ${float_test_file} COMMENT "Test the strtof and strtod implementations against precomputed results." VERBATIM) + +add_subdirectory(OSUtil) diff --git a/libc/test/src/__support/OSUtil/CMakeLists.txt b/libc/test/src/__support/OSUtil/CMakeLists.txt new file mode 100644 index 000000000000..494218b4bb12 --- /dev/null +++ b/libc/test/src/__support/OSUtil/CMakeLists.txt @@ -0,0 +1,3 @@ +add_libc_testsuite(libc_osutil_tests) + +add_subdirectory(linux) diff --git a/libc/test/src/__support/OSUtil/linux/CMakeLists.txt b/libc/test/src/__support/OSUtil/linux/CMakeLists.txt new file mode 100644 index 000000000000..7e2608e595f5 --- /dev/null +++ b/libc/test/src/__support/OSUtil/linux/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(x86_64) diff --git a/libc/test/src/__support/OSUtil/linux/x86_64/CMakeLists.txt b/libc/test/src/__support/OSUtil/linux/x86_64/CMakeLists.txt new file mode 100644 index 000000000000..b6f57eb41f72 --- /dev/null +++ b/libc/test/src/__support/OSUtil/linux/x86_64/CMakeLists.txt @@ -0,0 +1,7 @@ +add_libc_unittest( + syscall_unittest + SUITE libc_osutil_tests + SRCS syscall_test.cpp + DEPENDS + libc.src.__support.OSUtil.osutil +) diff --git a/libc/test/config/linux/x86_64/syscall_test.cpp b/libc/test/src/__support/OSUtil/linux/x86_64/syscall_test.cpp similarity index 97% rename from libc/test/config/linux/x86_64/syscall_test.cpp rename to libc/test/src/__support/OSUtil/linux/x86_64/syscall_test.cpp index eb2205aa4fb6..2f9816e96829 100644 --- a/libc/test/config/linux/x86_64/syscall_test.cpp +++ b/libc/test/src/__support/OSUtil/linux/x86_64/syscall_test.cpp @@ -6,10 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "config/linux/syscall.h" -#include "utils/UnitTest/Test.h" - #include "src/__support/CPP/Functional.h" +#include "src/__support/OSUtil/syscall.h" +#include "utils/UnitTest/Test.h" TEST(LlvmLibcX86_64_SyscallTest, APITest) { // We only do a signature test here. Actual functionality tests are From 5fc4323eda60ee15ef4c87d989d468cd0e6d77c1 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 22 Dec 2021 14:36:14 -0800 Subject: [PATCH 085/293] [ELF] Change some global pointers to unique_ptr Currently the singleton `config` is assigned by `config = make()` and (if `canExitEarly` is false) destroyed by `lld::freeArena`. `make` allocates a stab with `malloc(4096)`. This both wastes memory and bloats the executable (every type instantiates `BumpPtrAllocator` which costs more than 1KiB code on x86-64). (No need to worry about `clang::no_destroy`. Regular invocations (`canExitEarly` is true) call `_Exit` via llvm::sys::Process::ExitNoCleanup.) Reviewed By: lichray Differential Revision: https://reviews.llvm.org/D116143 --- lld/ELF/Config.h | 3 ++- lld/ELF/Driver.cpp | 12 ++++++------ lld/ELF/Driver.h | 2 +- lld/ELF/LinkerScript.cpp | 2 +- lld/ELF/LinkerScript.h | 2 +- lld/ELF/SymbolTable.cpp | 2 +- lld/ELF/SymbolTable.h | 2 +- 7 files changed, 13 insertions(+), 12 deletions(-) diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index 0c8ce62c566a..b3d5219ff57b 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -22,6 +22,7 @@ #include "llvm/Support/GlobPattern.h" #include "llvm/Support/PrettyStackTrace.h" #include +#include #include namespace lld { @@ -343,7 +344,7 @@ struct Configuration { }; // The only instance of Configuration struct. -extern Configuration *config; +extern std::unique_ptr config; // The first two elements of versionDefinitions represent VER_NDX_LOCAL and // VER_NDX_GLOBAL. This helper returns other elements. diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index d0007449a2b1..05cc4db5af74 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -71,8 +71,8 @@ using namespace llvm::support; using namespace lld; using namespace lld::elf; -Configuration *elf::config; -LinkerDriver *elf::driver; +std::unique_ptr elf::config; +std::unique_ptr elf::driver; static void setConfigs(opt::InputArgList &args); static void readConfigs(opt::InputArgList &args); @@ -111,10 +111,10 @@ bool elf::link(ArrayRef args, bool canExitEarly, errorHandler().exitEarly = canExitEarly; stderrOS.enable_colors(stderrOS.has_colors()); - config = make(); - driver = make(); - script = make(); - symtab = make(); + config = std::make_unique(); + driver = std::make_unique(); + script = std::make_unique(); + symtab = std::make_unique(); partitions = {Partition()}; diff --git a/lld/ELF/Driver.h b/lld/ELF/Driver.h index 96d040041c5a..5961e1f69472 100644 --- a/lld/ELF/Driver.h +++ b/lld/ELF/Driver.h @@ -22,7 +22,7 @@ namespace lld { namespace elf { -extern class LinkerDriver *driver; +extern std::unique_ptr driver; class LinkerDriver { public: diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index 782a658fdaed..38b1b9093cd5 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -47,7 +47,7 @@ using namespace llvm::support::endian; using namespace lld; using namespace lld::elf; -LinkerScript *elf::script; +std::unique_ptr elf::script; static bool isSectionPrefix(StringRef prefix, StringRef name) { return name.consume_front(prefix) && (name.empty() || name[0] == '.'); diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h index badc4d126be8..5e9a3542c426 100644 --- a/lld/ELF/LinkerScript.h +++ b/lld/ELF/LinkerScript.h @@ -366,7 +366,7 @@ class LinkerScript final { std::vector orphanSections; }; -extern LinkerScript *script; +extern std::unique_ptr script; } // end namespace elf } // end namespace lld diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index ce77f9233fea..5c7b8b00a6ee 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -29,7 +29,7 @@ using namespace llvm::ELF; using namespace lld; using namespace lld::elf; -SymbolTable *elf::symtab; +std::unique_ptr elf::symtab; void SymbolTable::wrap(Symbol *sym, Symbol *real, Symbol *wrap) { // Redirect __real_foo to the original foo and foo to the original __wrap_foo. diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h index 54c4b1169ed1..568e4559ec34 100644 --- a/lld/ELF/SymbolTable.h +++ b/lld/ELF/SymbolTable.h @@ -91,7 +91,7 @@ class SymbolTable { llvm::Optional>> demangledSyms; }; -extern SymbolTable *symtab; +extern std::unique_ptr symtab; } // namespace elf } // namespace lld From e8a0af80137a282ed850e2e53075cbd9d678084d Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Wed, 22 Dec 2021 22:19:38 +0000 Subject: [PATCH 086/293] [libc][Obvious] Add target OSUtil tests only if the target OS/arch match. --- libc/test/src/__support/OSUtil/CMakeLists.txt | 4 +++- libc/test/src/__support/OSUtil/linux/CMakeLists.txt | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/libc/test/src/__support/OSUtil/CMakeLists.txt b/libc/test/src/__support/OSUtil/CMakeLists.txt index 494218b4bb12..c70fedf3ff04 100644 --- a/libc/test/src/__support/OSUtil/CMakeLists.txt +++ b/libc/test/src/__support/OSUtil/CMakeLists.txt @@ -1,3 +1,5 @@ add_libc_testsuite(libc_osutil_tests) -add_subdirectory(linux) +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS}) + add_subdirectory(${LIBC_TARGET_OS}) +endif() diff --git a/libc/test/src/__support/OSUtil/linux/CMakeLists.txt b/libc/test/src/__support/OSUtil/linux/CMakeLists.txt index 7e2608e595f5..bfb072c03e97 100644 --- a/libc/test/src/__support/OSUtil/linux/CMakeLists.txt +++ b/libc/test/src/__support/OSUtil/linux/CMakeLists.txt @@ -1 +1,3 @@ -add_subdirectory(x86_64) +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_ARCHITECTURE}) + add_subdirectory(${LIBC_TARGET_ARCHITECTURE}) +endif() From 6c67ff4b0566aa91b78971df7a989c82b22a8f52 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 22 Dec 2021 14:13:24 -0800 Subject: [PATCH 087/293] [NFC][sanitizer] Reformat a part of the file --- .../sanitizer_symbolizer_posix_libcdep.cpp | 34 +++++++++---------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp index b128ffc311a2..de353bc5aa09 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp @@ -312,29 +312,26 @@ class Addr2LinePool final : public SymbolizerTool { FIRST_32_SECOND_64(UINT32_MAX, UINT64_MAX); }; -#if SANITIZER_SUPPORTS_WEAK_HOOKS +# if SANITIZER_SUPPORTS_WEAK_HOOKS extern "C" { SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset, char *Buffer, int MaxLength, bool SymbolizeInlineFrames); -SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE -bool __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset, - char *Buffer, int MaxLength); -SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE -void __sanitizer_symbolize_flush(); -SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE -int __sanitizer_symbolize_demangle(const char *Name, char *Buffer, - int MaxLength); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool +__sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset, + char *Buffer, int MaxLength); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void +__sanitizer_symbolize_flush(); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE int +__sanitizer_symbolize_demangle(const char *Name, char *Buffer, int MaxLength); } // extern "C" class InternalSymbolizer final : public SymbolizerTool { public: static InternalSymbolizer *get(LowLevelAllocator *alloc) { - if (__sanitizer_symbolize_code != 0 && - __sanitizer_symbolize_data != 0) { - return new(*alloc) InternalSymbolizer(); - } + if (__sanitizer_symbolize_code && __sanitizer_symbolize_data) + return new (*alloc) InternalSymbolizer(); return 0; } @@ -342,7 +339,8 @@ class InternalSymbolizer final : public SymbolizerTool { bool result = __sanitizer_symbolize_code( stack->info.module, stack->info.module_offset, buffer_, kBufferSize, common_flags()->symbolize_inline_frames); - if (result) ParseSymbolizePCOutput(buffer_, stack); + if (result) + ParseSymbolizePCOutput(buffer_, stack); return result; } @@ -365,7 +363,7 @@ class InternalSymbolizer final : public SymbolizerTool { if (__sanitizer_symbolize_demangle) { for (uptr res_length = 1024; res_length <= InternalSizeClassMap::kMaxSize;) { - char *res_buff = static_cast(InternalAlloc(res_length)); + char *res_buff = static_cast(InternalAlloc(res_length)); uptr req_length = __sanitizer_symbolize_demangle(name, res_buff, res_length); if (req_length > res_length) { @@ -380,19 +378,19 @@ class InternalSymbolizer final : public SymbolizerTool { } private: - InternalSymbolizer() { } + InternalSymbolizer() {} static const int kBufferSize = 16 * 1024; char buffer_[kBufferSize]; }; -#else // SANITIZER_SUPPORTS_WEAK_HOOKS +# else // SANITIZER_SUPPORTS_WEAK_HOOKS class InternalSymbolizer final : public SymbolizerTool { public: static InternalSymbolizer *get(LowLevelAllocator *alloc) { return 0; } }; -#endif // SANITIZER_SUPPORTS_WEAK_HOOKS +# endif // SANITIZER_SUPPORTS_WEAK_HOOKS const char *Symbolizer::PlatformDemangle(const char *name) { return DemangleSwiftAndCXX(name); From 63997782a0d0f3ddf815b54c7126f8e1f4b8818e Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 22 Dec 2021 14:12:19 -0800 Subject: [PATCH 088/293] [NFC][sanitizer] Reformat script --- .../sanitizer_common/symbolizer/scripts/build_symbolizer.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh index 56a0d99d59c8..29925ccf30a1 100755 --- a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh +++ b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh @@ -155,7 +155,10 @@ SYMBOLIZER_FLAGS="$LLVM_FLAGS -I${LLVM_SRC}/include -I${LLVM_BUILD}/include -std $CXX $SYMBOLIZER_FLAGS ${SRC_DIR}/sanitizer_symbolize.cpp ${SRC_DIR}/sanitizer_wrappers.cpp -c $AR rc symbolizer.a sanitizer_symbolize.o sanitizer_wrappers.o -SYMBOLIZER_API_LIST=__sanitizer_symbolize_code,__sanitizer_symbolize_data,__sanitizer_symbolize_flush,__sanitizer_symbolize_demangle +SYMBOLIZER_API_LIST=__sanitizer_symbolize_code +SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_data +SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_flush +SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_demangle LIBCXX_ARCHIVE_DIR=$(dirname $(find $LIBCXX_BUILD -name libc++.a | head -n1)) From caf940bac6fc30216f9cfd45e6fafe65d5f7ad88 Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Wed, 22 Dec 2021 17:56:03 -0500 Subject: [PATCH 089/293] [libc++] Remove "clang-format off/on" comments. NFC. These headers have stabilized; we don't expect anyone to be blindly clang-formatting them anymore. Leave the comments in `__format/*.h` for Mark to remove at his leisure. --- libcxx/include/__iterator/concepts.h | 4 ---- libcxx/include/__ranges/concepts.h | 4 ---- libcxx/include/__ranges/data.h | 3 --- libcxx/include/__ranges/empty.h | 2 -- 4 files changed, 13 deletions(-) diff --git a/libcxx/include/__iterator/concepts.h b/libcxx/include/__iterator/concepts.h index 531acdf0a5b2..d7a666743afb 100644 --- a/libcxx/include/__iterator/concepts.h +++ b/libcxx/include/__iterator/concepts.h @@ -28,8 +28,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if !defined(_LIBCPP_HAS_NO_RANGES) -// clang-format off - // [iterator.concept.readable] template concept __indirectly_readable_impl = @@ -259,8 +257,6 @@ concept indirectly_movable_storable = // Note: indirectly_swappable is located in iter_swap.h to prevent a dependency cycle // (both iter_swap and indirectly_swappable require indirectly_readable). -// clang-format on - #endif // !defined(_LIBCPP_HAS_NO_RANGES) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/concepts.h b/libcxx/include/__ranges/concepts.h index 6a8364006beb..bad23c8c4bfb 100644 --- a/libcxx/include/__ranges/concepts.h +++ b/libcxx/include/__ranges/concepts.h @@ -29,8 +29,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD -// clang-format off - #if !defined(_LIBCPP_HAS_NO_RANGES) namespace ranges { @@ -126,8 +124,6 @@ namespace ranges { #endif // !defined(_LIBCPP_HAS_NO_RANGES) -// clang-format on - _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___RANGES_CONCEPTS_H diff --git a/libcxx/include/__ranges/data.h b/libcxx/include/__ranges/data.h index 7eade494cceb..a33da0e5a1c2 100644 --- a/libcxx/include/__ranges/data.h +++ b/libcxx/include/__ranges/data.h @@ -26,7 +26,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if !defined(_LIBCPP_HAS_NO_RANGES) -// clang-format off namespace ranges { // [range.prim.data] namespace __data { @@ -72,8 +71,6 @@ inline namespace __cpo { } // namespace __cpo } // namespace ranges -// clang-format off - #endif // !defined(_LIBCPP_HAS_NO_RANGES) _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/include/__ranges/empty.h b/libcxx/include/__ranges/empty.h index fc6a938fd86e..bf6772c5673b 100644 --- a/libcxx/include/__ranges/empty.h +++ b/libcxx/include/__ranges/empty.h @@ -24,7 +24,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if !defined(_LIBCPP_HAS_NO_RANGES) -// clang-format off namespace ranges { // [range.prim.empty] namespace __empty { @@ -72,7 +71,6 @@ inline namespace __cpo { inline constexpr auto empty = __empty::__fn{}; } // namespace __cpo } // namespace ranges -// clang-format off #endif // !defined(_LIBCPP_HAS_NO_RANGES) From e4103c91f857e124a33e8c504e2d6ac2b2e06f53 Mon Sep 17 00:00:00 2001 From: alex-t Date: Thu, 23 Dec 2021 00:45:01 +0300 Subject: [PATCH 090/293] [AMDGPU] Select build_vector DAG nodes according to the divergence This change enables divergence-driven instruction selection for the build_vector DAG nodes. It also enables packed i16 instructions for GFX9. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D116187 --- llvm/lib/Target/AMDGPU/SIInstructions.td | 67 ++++- .../AMDGPU/divergence-driven-buildvector.ll | 215 +++++++++++++++ llvm/test/CodeGen/AMDGPU/idot8s.ll | 248 +++++++++--------- .../CodeGen/AMDGPU/vector_shuffle.packed.ll | 52 ++-- 4 files changed, 423 insertions(+), 159 deletions(-) create mode 100755 llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 2bbce1d8ba93..2f0792203d4f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2429,20 +2429,36 @@ def : GCNPat < // COPY is workaround tablegen bug from multiple outputs // from S_LSHL_B32's multiple outputs from implicit scc def. def : GCNPat < - (v2i16 (build_vector (i16 0), (i16 SReg_32:$src1))), + (v2i16 (UniformBinFrag (i16 0), (i16 SReg_32:$src1))), (S_LSHL_B32 SReg_32:$src1, (i16 16)) >; def : GCNPat < - (v2i16 (build_vector (i16 SReg_32:$src1), (i16 0))), + (v2i16 (DivergentBinFrag (i16 0), (i16 SReg_32:$src1))), + (v2i16 (V_LSHLREV_B32_e64 (i16 16), SReg_32:$src1)) +>; + + +def : GCNPat < + (v2i16 (UniformBinFrag (i16 SReg_32:$src1), (i16 0))), (S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1) >; def : GCNPat < - (v2f16 (build_vector (f16 SReg_32:$src1), (f16 FP_ZERO))), + (v2i16 (DivergentBinFrag (i16 SReg_32:$src1), (i16 0))), + (v2i16 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), SReg_32:$src1)) +>; + +def : GCNPat < + (v2f16 (UniformBinFrag (f16 SReg_32:$src1), (f16 FP_ZERO))), (S_AND_B32 (S_MOV_B32 (i32 0xffff)), SReg_32:$src1) >; +def : GCNPat < + (v2f16 (DivergentBinFrag (f16 SReg_32:$src1), (f16 FP_ZERO))), + (v2f16 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), SReg_32:$src1)) +>; + def : GCNPat < (v2i16 (build_vector (i16 SReg_32:$src0), (i16 undef))), (COPY_TO_REGCLASS SReg_32:$src0, SReg_32) @@ -2459,41 +2475,74 @@ def : GCNPat < >; def : GCNPat < - (v2i16 (build_vector (i16 undef), (i16 SReg_32:$src1))), + (v2i16 (UniformBinFrag (i16 undef), (i16 SReg_32:$src1))), (S_LSHL_B32 SReg_32:$src1, (i32 16)) >; def : GCNPat < - (v2f16 (build_vector (f16 undef), (f16 SReg_32:$src1))), + (v2i16 (DivergentBinFrag (i16 undef), (i16 SReg_32:$src1))), + (v2i16 (V_LSHLREV_B32_e64 (i32 16), SReg_32:$src1)) +>; + + +def : GCNPat < + (v2f16 (UniformBinFrag (f16 undef), (f16 SReg_32:$src1))), (S_LSHL_B32 SReg_32:$src1, (i32 16)) >; +def : GCNPat < + (v2f16 (DivergentBinFrag (f16 undef), (f16 SReg_32:$src1))), + (v2f16 (V_LSHLREV_B32_e64 (i32 16), SReg_32:$src1)) +>; + let SubtargetPredicate = HasVOP3PInsts in { def : GCNPat < - (v2i16 (build_vector (i16 SReg_32:$src0), (i16 SReg_32:$src1))), + (v2i16 (UniformBinFrag (i16 SReg_32:$src0), (i16 SReg_32:$src1))), (S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1) >; +def : GCNPat < + (v2i16 (DivergentBinFrag (i16 SReg_32:$src0), (i16 SReg_32:$src1))), + (v2i16 (V_LSHL_OR_B32_e64 $src1, (i32 16), (i32 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), $src0)))) +>; + // With multiple uses of the shift, this will duplicate the shift and // increase register pressure. def : GCNPat < - (v2i16 (build_vector (i16 SReg_32:$src0), (i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))), + (v2i16 (UniformBinFrag (i16 SReg_32:$src0), (i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))), (v2i16 (S_PACK_LH_B32_B16 SReg_32:$src0, SReg_32:$src1)) >; +def : GCNPat < + (v2i16 (DivergentBinFrag (i16 SReg_32:$src0), (i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))), + (v2i16 (V_BFI_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), SReg_32:$src0, SReg_32:$src1)) +>; + def : GCNPat < - (v2i16 (build_vector (i16 (trunc (srl_oneuse SReg_32:$src0, (i32 16)))), + (v2i16 (UniformBinFrag (i16 (trunc (srl_oneuse SReg_32:$src0, (i32 16)))), (i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))), (S_PACK_HH_B32_B16 SReg_32:$src0, SReg_32:$src1) >; +def : GCNPat < + (v2i16 (DivergentBinFrag (i16 (trunc (srl_oneuse SReg_32:$src0, (i32 16)))), + (i16 (trunc (srl_oneuse SReg_32:$src1, (i32 16)))))), + (v2i16 (V_AND_OR_B32_e64 SReg_32:$src1, (i32 (V_MOV_B32_e32 (i32 0xffff0000))), (i32 (V_LSHRREV_B32_e64 (i32 16), SReg_32:$src0)))) +>; + // TODO: Should source modifiers be matched to v_pack_b32_f16? def : GCNPat < - (v2f16 (build_vector (f16 SReg_32:$src0), (f16 SReg_32:$src1))), + (v2f16 (UniformBinFrag (f16 SReg_32:$src0), (f16 SReg_32:$src1))), (S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1) >; +def : GCNPat < + (v2f16 (DivergentBinFrag (f16 SReg_32:$src0), (f16 SReg_32:$src1))), + (v2f16 (V_LSHL_OR_B32_e64 SReg_32:$src1, (i32 16), (i32 (V_AND_B32_e64 (i32 (V_MOV_B32_e32 (i32 0xffff))), SReg_32:$src0)))) +>; + + def : GCNPat < (v2f16 (is_canonicalized (f16 (VOP3Mods (f16 VGPR_32:$src0), i32:$src0_mods)), (f16 (VOP3Mods (f16 VGPR_32:$src1), i32:$src1_mods)))), diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll new file mode 100755 index 000000000000..d7fa098d5eb8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-buildvector.ll @@ -0,0 +1,215 @@ +; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX9 %s + +; GCN-LABEL: name: uniform_vec_0_i16 +; GCN: S_LSHL_B32 +define amdgpu_kernel void @uniform_vec_0_i16(i32 addrspace(1)* %out, i16 %a) { + %tmp = insertelement <2 x i16> undef, i16 0, i32 0 + %vec = insertelement <2 x i16> %tmp, i16 %a, i32 1 + %val = bitcast <2 x i16> %vec to i32 + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: name: divergent_vec_0_i16 +; GCN: V_LSHLREV_B32_e64 +define i32 @divergent_vec_0_i16(i16 %a) { + %tmp = insertelement <2 x i16> undef, i16 0, i32 0 + %vec = insertelement <2 x i16> %tmp, i16 %a, i32 1 + %val = bitcast <2 x i16> %vec to i32 + ret i32 %val +} + +; GCN-LABEL: name: uniform_vec_i16_0 +; GCN: S_AND_B32 +define amdgpu_kernel void @uniform_vec_i16_0(i32 addrspace(1)* %out, i16 %a) { + %tmp = insertelement <2 x i16> undef, i16 %a, i32 0 + %vec = insertelement <2 x i16> %tmp, i16 0, i32 1 + %val = bitcast <2 x i16> %vec to i32 + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: name: divergent_vec_i16_0 +; GCN: V_AND_B32_e64 +define i32 @divergent_vec_i16_0(i16 %a) { + %tmp = insertelement <2 x i16> undef, i16 %a, i32 0 + %vec = insertelement <2 x i16> %tmp, i16 0, i32 1 + %val = bitcast <2 x i16> %vec to i32 + ret i32 %val +} + +; GCN-LABEL: name: uniform_vec_f16_0 +; GCN: S_AND_B32 +define amdgpu_kernel void @uniform_vec_f16_0(float addrspace(1)* %out, half %a) { + %tmp = insertelement <2 x half> undef, half %a, i32 0 + %vec = insertelement <2 x half> %tmp, half 0.0, i32 1 + %val = bitcast <2 x half> %vec to float + store float %val, float addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: name: divergent_vec_f16_0 +; GCN: V_CVT_F16_F32_e64 0, %0 +; GCN: COPY %1 + +; GFX9-LABEL: name: divergent_vec_f16_0 +; GFX9: V_AND_B32_e64 +define float @divergent_vec_f16_0(half %a) { + %tmp = insertelement <2 x half> undef, half %a, i32 0 + %vec = insertelement <2 x half> %tmp, half 0.0, i32 1 + %val = bitcast <2 x half> %vec to float + ret float %val +} + +; GCN-LABEL: name: uniform_vec_i16_LL +; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 65535 +; GCN: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %{{[0-9]+}}, killed %[[IMM]] +; GCN: %[[SHIFT:[0-9]+]]:sreg_32 = S_MOV_B32 16 +; GCN: %[[SHL:[0-9]+]]:sreg_32 = S_LSHL_B32 killed %{{[0-9]+}}, killed %[[SHIFT]] +; GCN: S_OR_B32 killed %[[AND]], killed %[[SHL]] + +; GFX9-LABEL: name: uniform_vec_i16_LL +; GFX9: S_PACK_LL_B32_B16 +define amdgpu_kernel void @uniform_vec_i16_LL(i32 addrspace(4)* %in0, i32 addrspace(4)* %in1) { + %val0 = load volatile i32, i32 addrspace(4)* %in0 + %val1 = load volatile i32, i32 addrspace(4)* %in1 + %lo = trunc i32 %val0 to i16 + %hi = trunc i32 %val1 to i16 + %vec.0 = insertelement <2 x i16> undef, i16 %lo, i32 0 + %vec.1 = insertelement <2 x i16> %vec.0, i16 %hi, i32 1 + %vec.i32 = bitcast <2 x i16> %vec.1 to i32 + call void asm sideeffect "; use $0", "s"(i32 %vec.i32) #0 + ret void +} + +; GCN-LABEL: name: divergent_vec_i16_LL +; GCN: %[[SHIFT:[0-9]+]]:sreg_32 = S_MOV_B32 16 +; GCN: %[[SHL:[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed %[[SHIFT]], %1, implicit $exec +; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 65535 +; GCN: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 %0, killed %[[IMM]], implicit $exec +; GCN: V_OR_B32_e64 killed %[[AND]], killed %[[SHL]], implicit $exec + +; GFX9-LABEL: name: divergent_vec_i16_LL +; GFX9: %[[IMM:[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535 +; GFX9: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed %[[IMM]] +; GFX9: V_LSHL_OR_B32_e64 %{{[0-9]+}}, 16, killed %[[AND]] +define i32 @divergent_vec_i16_LL(i16 %a, i16 %b) { + %tmp = insertelement <2 x i16> undef, i16 %a, i32 0 + %vec = insertelement <2 x i16> %tmp, i16 %b, i32 1 + %val = bitcast <2 x i16> %vec to i32 + ret i32 %val +} + +; GCN-LABEL: name: uniform_vec_i16_LH +; GCN-DAG: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 65535 +; GCN-DAG: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %{{[0-9]+}}, killed %[[IMM]] +; GCN-DAG: %[[NEG:[0-9]+]]:sreg_32 = S_MOV_B32 -65536 +; GCN-DAG: %[[ANDN:[0-9]+]]:sreg_32 = S_AND_B32 killed %{{[0-9]+}}, killed %[[NEG]] +; GCN: S_OR_B32 killed %[[AND]], killed %[[ANDN]] + +; GFX9-LABEL: name: uniform_vec_i16_LH +; GFX9: S_PACK_LH_B32_B16 +define amdgpu_kernel void @uniform_vec_i16_LH(i32 addrspace(1)* %out, i16 %a, i32 %b) { + %shift = lshr i32 %b, 16 + %tr = trunc i32 %shift to i16 + %tmp = insertelement <2 x i16> undef, i16 %a, i32 0 + %vec = insertelement <2 x i16> %tmp, i16 %tr, i32 1 + %val = bitcast <2 x i16> %vec to i32 + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: name: divergent_vec_i16_LH +; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 65535 +; GCN: V_BFI_B32_e64 killed %[[IMM]] +define i32 @divergent_vec_i16_LH(i16 %a, i32 %b) { + %shift = lshr i32 %b, 16 + %tr = trunc i32 %shift to i16 + %tmp = insertelement <2 x i16> undef, i16 %a, i32 0 + %vec = insertelement <2 x i16> %tmp, i16 %tr, i32 1 + %val = bitcast <2 x i16> %vec to i32 + ret i32 %val +} + +; GCN-LABEL: name: uniform_vec_i16_HH +; GCN: %[[SHIFT:[0-9]+]]:sreg_32 = S_MOV_B32 16 +; GCN: %[[SHR:[0-9]+]]:sreg_32 = S_LSHR_B32 killed %{{[0-9]+}}, killed %[[SHIFT]] +; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 -65536 +; GCN: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %{{[0-9]+}}, killed %[[IMM]] +; GCN: S_OR_B32 killed %[[SHR]], killed %[[AND]] + +; GFX9-LABEL: name: uniform_vec_i16_HH +; GFX9: S_PACK_HH_B32_B16 +define amdgpu_kernel void @uniform_vec_i16_HH(i32 addrspace(1)* %out, i32 %a, i32 %b) { + %shift_a = lshr i32 %a, 16 + %tr_a = trunc i32 %shift_a to i16 + %shift_b = lshr i32 %b, 16 + %tr_b = trunc i32 %shift_b to i16 + %tmp = insertelement <2 x i16> undef, i16 %tr_a, i32 0 + %vec = insertelement <2 x i16> %tmp, i16 %tr_b, i32 1 + %val = bitcast <2 x i16> %vec to i32 + store i32 %val, i32 addrspace(1)* %out, align 4 + ret void +} + +; GCN-LABEL: name: divergent_vec_i16_HH +; GCN: %[[SHR:[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 killed %{{[0-9]+}}, %0, implicit $exec +; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 -65536 +; GCN: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 %1, killed %[[IMM]], implicit $exec +; GCN: V_OR_B32_e64 killed %[[SHR]], killed %[[AND]], implicit $exec + +; GFX9-LABEL: name: divergent_vec_i16_HH +; GFX9: %[[SHR:[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, %0 +; GFX9: %[[IMM:[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -65536, implicit $exec +; GFX9: V_AND_OR_B32_e64 %1, killed %[[IMM]], killed %[[SHR]] +define i32 @divergent_vec_i16_HH(i32 %a, i32 %b) { + %shift_a = lshr i32 %a, 16 + %tr_a = trunc i32 %shift_a to i16 + %shift_b = lshr i32 %b, 16 + %tr_b = trunc i32 %shift_b to i16 + %tmp = insertelement <2 x i16> undef, i16 %tr_a, i32 0 + %vec = insertelement <2 x i16> %tmp, i16 %tr_b, i32 1 + %val = bitcast <2 x i16> %vec to i32 + ret i32 %val +} + +; GCN-LABEL: name: uniform_vec_f16_LL +; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 65535 +; GCN: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %{{[0-9]+}}, killed %[[IMM]] +; GCN: %[[SHIFT:[0-9]+]]:sreg_32 = S_MOV_B32 16 +; GCN: %[[SHL:[0-9]+]]:sreg_32 = S_LSHL_B32 killed %{{[0-9]+}}, killed %[[SHIFT]] +; GCN: S_OR_B32 killed %[[AND]], killed %[[SHL]] + +; GFX9-LABEL: name: uniform_vec_f16_LL +; GFX9: S_PACK_LL_B32_B16 +define amdgpu_kernel void @uniform_vec_f16_LL(i32 addrspace(4)* %in0, i32 addrspace(4)* %in1) { + %val0 = load volatile i32, i32 addrspace(4)* %in0 + %val1 = load volatile i32, i32 addrspace(4)* %in1 + %lo.i = trunc i32 %val0 to i16 + %hi.i = trunc i32 %val1 to i16 + %lo = bitcast i16 %lo.i to half + %hi = bitcast i16 %hi.i to half + %vec.0 = insertelement <2 x half> undef, half %lo, i32 0 + %vec.1 = insertelement <2 x half> %vec.0, half %hi, i32 1 + %vec.i32 = bitcast <2 x half> %vec.1 to i32 + + call void asm sideeffect "; use $0", "s"(i32 %vec.i32) #0 + ret void +} + +; GCN-LABEL: name: divergent_vec_f16_LL +; GCN: %[[SHIFT:[0-9]+]]:sreg_32 = S_MOV_B32 16 +; GCN: %[[SHL:[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed %[[SHIFT]] +; GCN: V_OR_B32_e64 killed %{{[0-9]+}}, killed %[[SHL]], implicit $exec + +; GFX9-LABEL: name: divergent_vec_f16_LL +; GFX9: %[[IMM:[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535 +; GFX9: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed %[[IMM]] +; GFX9: V_LSHL_OR_B32_e64 %{{[0-9]+}}, 16, killed %[[AND]] +define float @divergent_vec_f16_LL(half %a, half %b) { + %tmp = insertelement <2 x half> undef, half %a, i32 0 + %vec = insertelement <2 x half> %tmp, half %b, i32 1 + %val = bitcast <2 x half> %vec to float + ret float %val +} diff --git a/llvm/test/CodeGen/AMDGPU/idot8s.ll b/llvm/test/CodeGen/AMDGPU/idot8s.ll index e73354d2d0b8..d3153d12641c 100644 --- a/llvm/test/CodeGen/AMDGPU/idot8s.ll +++ b/llvm/test/CodeGen/AMDGPU/idot8s.ll @@ -2362,46 +2362,46 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX9-NEXT: global_load_ushort v3, v0, s[2:3] ; GFX9-NEXT: s_addc_u32 s9, s9, 0 ; GFX9-NEXT: s_waitcnt vmcnt(2) -; GFX9-NEXT: v_and_b32_e32 v11, 15, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v5, 28, v1 +; GFX9-NEXT: v_bfe_u32 v6, v1, 24, 4 +; GFX9-NEXT: v_bfe_u32 v7, v1, 20, 4 +; GFX9-NEXT: v_bfe_u32 v8, v1, 16, 4 +; GFX9-NEXT: v_bfe_u32 v9, v1, 12, 4 +; GFX9-NEXT: v_bfe_u32 v10, v1, 8, 4 +; GFX9-NEXT: v_bfe_u32 v11, v1, 4, 4 +; GFX9-NEXT: v_and_b32_e32 v1, 15, v1 ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_and_b32_e32 v18, 15, v2 -; GFX9-NEXT: v_bfe_u32 v5, v1, 24, 4 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 28, v1 -; GFX9-NEXT: v_bfe_u32 v7, v1, 16, 4 -; GFX9-NEXT: v_bfe_u32 v8, v1, 20, 4 -; GFX9-NEXT: v_bfe_u32 v9, v1, 8, 4 -; GFX9-NEXT: v_bfe_u32 v10, v1, 12, 4 -; GFX9-NEXT: v_bfe_u32 v1, v1, 4, 4 -; GFX9-NEXT: v_bfe_u32 v12, v2, 24, 4 -; GFX9-NEXT: v_lshrrev_b32_e32 v13, 28, v2 -; GFX9-NEXT: v_bfe_u32 v14, v2, 16, 4 -; GFX9-NEXT: v_bfe_u32 v15, v2, 20, 4 -; GFX9-NEXT: v_bfe_u32 v16, v2, 8, 4 -; GFX9-NEXT: v_bfe_u32 v17, v2, 12, 4 -; GFX9-NEXT: v_bfe_u32 v2, v2, 4, 4 -; GFX9-NEXT: v_and_b32_e32 v11, v4, v11 -; GFX9-NEXT: v_and_b32_e32 v18, v4, v18 -; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v11 -; GFX9-NEXT: v_lshl_or_b32 v2, v2, 16, v18 -; GFX9-NEXT: v_and_b32_e32 v9, v4, v9 -; GFX9-NEXT: v_and_b32_e32 v5, v4, v5 -; GFX9-NEXT: v_and_b32_e32 v16, v4, v16 +; GFX9-NEXT: v_lshrrev_b32_e32 v12, 28, v2 +; GFX9-NEXT: v_bfe_u32 v13, v2, 24, 4 +; GFX9-NEXT: v_bfe_u32 v14, v2, 20, 4 +; GFX9-NEXT: v_bfe_u32 v15, v2, 16, 4 +; GFX9-NEXT: v_bfe_u32 v16, v2, 12, 4 +; GFX9-NEXT: v_bfe_u32 v17, v2, 8, 4 +; GFX9-NEXT: v_bfe_u32 v18, v2, 4, 4 +; GFX9-NEXT: v_and_b32_e32 v2, 15, v2 +; GFX9-NEXT: v_and_b32_e32 v1, v4, v1 +; GFX9-NEXT: v_and_b32_e32 v2, v4, v2 +; GFX9-NEXT: v_lshl_or_b32 v1, v11, 16, v1 +; GFX9-NEXT: v_lshl_or_b32 v2, v18, 16, v2 +; GFX9-NEXT: v_and_b32_e32 v10, v4, v10 +; GFX9-NEXT: v_and_b32_e32 v6, v4, v6 +; GFX9-NEXT: v_and_b32_e32 v17, v4, v17 ; GFX9-NEXT: v_pk_lshlrev_b16 v1, 12, v1 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_lshlrev_b16 v2, 12, v2 op_sel_hi:[0,1] -; GFX9-NEXT: v_lshl_or_b32 v9, v10, 16, v9 -; GFX9-NEXT: v_lshl_or_b32 v5, v6, 16, v5 -; GFX9-NEXT: v_lshl_or_b32 v6, v17, 16, v16 +; GFX9-NEXT: v_lshl_or_b32 v9, v9, 16, v10 +; GFX9-NEXT: v_lshl_or_b32 v5, v5, 16, v6 +; GFX9-NEXT: v_lshl_or_b32 v6, v16, 16, v17 ; GFX9-NEXT: v_pk_ashrrev_i16 v1, 12, v1 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_ashrrev_i16 v2, 12, v2 op_sel_hi:[0,1] -; GFX9-NEXT: v_and_b32_e32 v7, v4, v7 -; GFX9-NEXT: v_and_b32_e32 v14, v4, v14 -; GFX9-NEXT: v_and_b32_e32 v4, v4, v12 +; GFX9-NEXT: v_and_b32_e32 v8, v4, v8 +; GFX9-NEXT: v_and_b32_e32 v15, v4, v15 +; GFX9-NEXT: v_and_b32_e32 v4, v4, v13 ; GFX9-NEXT: v_pk_lshlrev_b16 v9, 12, v9 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_lshlrev_b16 v6, 12, v6 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_mul_lo_u16 v1, v1, v2 -; GFX9-NEXT: v_lshl_or_b32 v7, v8, 16, v7 -; GFX9-NEXT: v_lshl_or_b32 v8, v15, 16, v14 -; GFX9-NEXT: v_lshl_or_b32 v4, v13, 16, v4 +; GFX9-NEXT: v_lshl_or_b32 v7, v7, 16, v8 +; GFX9-NEXT: v_lshl_or_b32 v8, v14, 16, v15 +; GFX9-NEXT: v_lshl_or_b32 v4, v12, 16, v4 ; GFX9-NEXT: v_pk_ashrrev_i16 v9, 12, v9 op_sel_hi:[0,1] ; GFX9-NEXT: v_pk_ashrrev_i16 v6, 12, v6 op_sel_hi:[0,1] ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -2445,46 +2445,46 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX9-DL-NEXT: global_load_ushort v3, v0, s[2:3] ; GFX9-DL-NEXT: s_addc_u32 s9, s9, 0 ; GFX9-DL-NEXT: s_waitcnt vmcnt(2) -; GFX9-DL-NEXT: v_and_b32_e32 v11, 15, v1 +; GFX9-DL-NEXT: v_lshrrev_b32_e32 v5, 28, v1 +; GFX9-DL-NEXT: v_bfe_u32 v6, v1, 24, 4 +; GFX9-DL-NEXT: v_bfe_u32 v7, v1, 20, 4 +; GFX9-DL-NEXT: v_bfe_u32 v8, v1, 16, 4 +; GFX9-DL-NEXT: v_bfe_u32 v9, v1, 12, 4 +; GFX9-DL-NEXT: v_bfe_u32 v10, v1, 8, 4 +; GFX9-DL-NEXT: v_bfe_u32 v11, v1, 4, 4 +; GFX9-DL-NEXT: v_and_b32_e32 v1, 15, v1 ; GFX9-DL-NEXT: s_waitcnt vmcnt(1) -; GFX9-DL-NEXT: v_and_b32_e32 v18, 15, v2 -; GFX9-DL-NEXT: v_bfe_u32 v5, v1, 24, 4 -; GFX9-DL-NEXT: v_lshrrev_b32_e32 v6, 28, v1 -; GFX9-DL-NEXT: v_bfe_u32 v7, v1, 16, 4 -; GFX9-DL-NEXT: v_bfe_u32 v8, v1, 20, 4 -; GFX9-DL-NEXT: v_bfe_u32 v9, v1, 8, 4 -; GFX9-DL-NEXT: v_bfe_u32 v10, v1, 12, 4 -; GFX9-DL-NEXT: v_bfe_u32 v1, v1, 4, 4 -; GFX9-DL-NEXT: v_bfe_u32 v12, v2, 24, 4 -; GFX9-DL-NEXT: v_lshrrev_b32_e32 v13, 28, v2 -; GFX9-DL-NEXT: v_bfe_u32 v14, v2, 16, 4 -; GFX9-DL-NEXT: v_bfe_u32 v15, v2, 20, 4 -; GFX9-DL-NEXT: v_bfe_u32 v16, v2, 8, 4 -; GFX9-DL-NEXT: v_bfe_u32 v17, v2, 12, 4 -; GFX9-DL-NEXT: v_bfe_u32 v2, v2, 4, 4 -; GFX9-DL-NEXT: v_and_b32_e32 v11, v4, v11 -; GFX9-DL-NEXT: v_and_b32_e32 v18, v4, v18 -; GFX9-DL-NEXT: v_lshl_or_b32 v1, v1, 16, v11 -; GFX9-DL-NEXT: v_lshl_or_b32 v2, v2, 16, v18 -; GFX9-DL-NEXT: v_and_b32_e32 v9, v4, v9 -; GFX9-DL-NEXT: v_and_b32_e32 v5, v4, v5 -; GFX9-DL-NEXT: v_and_b32_e32 v16, v4, v16 +; GFX9-DL-NEXT: v_lshrrev_b32_e32 v12, 28, v2 +; GFX9-DL-NEXT: v_bfe_u32 v13, v2, 24, 4 +; GFX9-DL-NEXT: v_bfe_u32 v14, v2, 20, 4 +; GFX9-DL-NEXT: v_bfe_u32 v15, v2, 16, 4 +; GFX9-DL-NEXT: v_bfe_u32 v16, v2, 12, 4 +; GFX9-DL-NEXT: v_bfe_u32 v17, v2, 8, 4 +; GFX9-DL-NEXT: v_bfe_u32 v18, v2, 4, 4 +; GFX9-DL-NEXT: v_and_b32_e32 v2, 15, v2 +; GFX9-DL-NEXT: v_and_b32_e32 v1, v4, v1 +; GFX9-DL-NEXT: v_and_b32_e32 v2, v4, v2 +; GFX9-DL-NEXT: v_lshl_or_b32 v1, v11, 16, v1 +; GFX9-DL-NEXT: v_lshl_or_b32 v2, v18, 16, v2 +; GFX9-DL-NEXT: v_and_b32_e32 v10, v4, v10 +; GFX9-DL-NEXT: v_and_b32_e32 v6, v4, v6 +; GFX9-DL-NEXT: v_and_b32_e32 v17, v4, v17 ; GFX9-DL-NEXT: v_pk_lshlrev_b16 v1, 12, v1 op_sel_hi:[0,1] ; GFX9-DL-NEXT: v_pk_lshlrev_b16 v2, 12, v2 op_sel_hi:[0,1] -; GFX9-DL-NEXT: v_lshl_or_b32 v9, v10, 16, v9 -; GFX9-DL-NEXT: v_lshl_or_b32 v5, v6, 16, v5 -; GFX9-DL-NEXT: v_lshl_or_b32 v6, v17, 16, v16 +; GFX9-DL-NEXT: v_lshl_or_b32 v9, v9, 16, v10 +; GFX9-DL-NEXT: v_lshl_or_b32 v5, v5, 16, v6 +; GFX9-DL-NEXT: v_lshl_or_b32 v6, v16, 16, v17 ; GFX9-DL-NEXT: v_pk_ashrrev_i16 v1, 12, v1 op_sel_hi:[0,1] ; GFX9-DL-NEXT: v_pk_ashrrev_i16 v2, 12, v2 op_sel_hi:[0,1] -; GFX9-DL-NEXT: v_and_b32_e32 v7, v4, v7 -; GFX9-DL-NEXT: v_and_b32_e32 v14, v4, v14 -; GFX9-DL-NEXT: v_and_b32_e32 v4, v4, v12 +; GFX9-DL-NEXT: v_and_b32_e32 v8, v4, v8 +; GFX9-DL-NEXT: v_and_b32_e32 v15, v4, v15 +; GFX9-DL-NEXT: v_and_b32_e32 v4, v4, v13 ; GFX9-DL-NEXT: v_pk_lshlrev_b16 v9, 12, v9 op_sel_hi:[0,1] ; GFX9-DL-NEXT: v_pk_lshlrev_b16 v6, 12, v6 op_sel_hi:[0,1] ; GFX9-DL-NEXT: v_pk_mul_lo_u16 v1, v1, v2 -; GFX9-DL-NEXT: v_lshl_or_b32 v7, v8, 16, v7 -; GFX9-DL-NEXT: v_lshl_or_b32 v8, v15, 16, v14 -; GFX9-DL-NEXT: v_lshl_or_b32 v4, v13, 16, v4 +; GFX9-DL-NEXT: v_lshl_or_b32 v7, v7, 16, v8 +; GFX9-DL-NEXT: v_lshl_or_b32 v8, v14, 16, v15 +; GFX9-DL-NEXT: v_lshl_or_b32 v4, v12, 16, v4 ; GFX9-DL-NEXT: v_pk_ashrrev_i16 v9, 12, v9 op_sel_hi:[0,1] ; GFX9-DL-NEXT: v_pk_ashrrev_i16 v6, 12, v6 op_sel_hi:[0,1] ; GFX9-DL-NEXT: s_waitcnt vmcnt(0) @@ -2529,57 +2529,57 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX10-DL-XNACK-NEXT: v_mov_b32_e32 v0, 0 ; GFX10-DL-XNACK-NEXT: global_load_ushort v3, v0, s[0:1] ; GFX10-DL-XNACK-NEXT: s_waitcnt vmcnt(2) -; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v11, 15, v1 +; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v5, 28, v1 +; GFX10-DL-XNACK-NEXT: v_bfe_u32 v6, v1, 24, 4 +; GFX10-DL-XNACK-NEXT: v_bfe_u32 v7, v1, 20, 4 +; GFX10-DL-XNACK-NEXT: v_bfe_u32 v8, v1, 16, 4 +; GFX10-DL-XNACK-NEXT: v_bfe_u32 v9, v1, 12, 4 +; GFX10-DL-XNACK-NEXT: v_bfe_u32 v10, v1, 8, 4 +; GFX10-DL-XNACK-NEXT: v_bfe_u32 v11, v1, 4, 4 +; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v1, 15, v1 ; GFX10-DL-XNACK-NEXT: s_waitcnt vmcnt(1) ; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v13, 15, v2 -; GFX10-DL-XNACK-NEXT: v_bfe_u32 v5, v1, 24, 4 -; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v6, 28, v1 -; GFX10-DL-XNACK-NEXT: v_bfe_u32 v7, v1, 16, 4 -; GFX10-DL-XNACK-NEXT: v_bfe_u32 v8, v1, 20, 4 -; GFX10-DL-XNACK-NEXT: v_bfe_u32 v9, v1, 8, 4 -; GFX10-DL-XNACK-NEXT: v_bfe_u32 v10, v1, 12, 4 -; GFX10-DL-XNACK-NEXT: v_bfe_u32 v1, v1, 4, 4 ; GFX10-DL-XNACK-NEXT: v_bfe_u32 v16, v2, 4, 4 -; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v11, v4, v11 +; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v12, 28, v2 +; GFX10-DL-XNACK-NEXT: v_bfe_u32 v14, v2, 24, 4 +; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v1, v4, v1 ; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v13, v4, v13 -; GFX10-DL-XNACK-NEXT: v_bfe_u32 v18, v2, 8, 4 -; GFX10-DL-XNACK-NEXT: v_bfe_u32 v12, v2, 24, 4 -; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v14, 28, v2 -; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v1, v1, 16, v11 +; GFX10-DL-XNACK-NEXT: v_bfe_u32 v15, v2, 20, 4 +; GFX10-DL-XNACK-NEXT: v_bfe_u32 v17, v2, 16, 4 +; GFX10-DL-XNACK-NEXT: v_bfe_u32 v18, v2, 12, 4 +; GFX10-DL-XNACK-NEXT: v_bfe_u32 v2, v2, 8, 4 +; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v1, v11, 16, v1 ; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v11, v16, 16, v13 -; GFX10-DL-XNACK-NEXT: v_bfe_u32 v15, v2, 16, 4 -; GFX10-DL-XNACK-NEXT: v_bfe_u32 v17, v2, 20, 4 -; GFX10-DL-XNACK-NEXT: v_bfe_u32 v2, v2, 12, 4 -; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v9, v4, v9 -; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v7, v4, v7 -; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v13, v4, v18 +; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v10, v4, v10 +; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v8, v4, v8 +; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v2, v4, v2 ; GFX10-DL-XNACK-NEXT: v_pk_lshlrev_b16 v1, 12, v1 op_sel_hi:[0,1] ; GFX10-DL-XNACK-NEXT: v_pk_lshlrev_b16 v11, 12, v11 op_sel_hi:[0,1] -; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v9, v10, 16, v9 -; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v7, v8, 16, v7 -; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v2, v2, 16, v13 +; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v9, v9, 16, v10 +; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v7, v7, 16, v8 +; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v2, v18, 16, v2 ; GFX10-DL-XNACK-NEXT: v_pk_ashrrev_i16 v1, 12, v1 op_sel_hi:[0,1] ; GFX10-DL-XNACK-NEXT: v_pk_ashrrev_i16 v8, 12, v11 op_sel_hi:[0,1] -; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v10, v4, v15 +; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v10, v4, v17 ; GFX10-DL-XNACK-NEXT: v_pk_lshlrev_b16 v9, 12, v9 op_sel_hi:[0,1] ; GFX10-DL-XNACK-NEXT: v_pk_lshlrev_b16 v2, 12, v2 op_sel_hi:[0,1] ; GFX10-DL-XNACK-NEXT: v_pk_lshlrev_b16 v7, 12, v7 op_sel_hi:[0,1] ; GFX10-DL-XNACK-NEXT: v_pk_mul_lo_u16 v1, v1, v8 -; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v8, v17, 16, v10 +; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v8, v15, 16, v10 ; GFX10-DL-XNACK-NEXT: v_pk_ashrrev_i16 v9, 12, v9 op_sel_hi:[0,1] ; GFX10-DL-XNACK-NEXT: v_pk_ashrrev_i16 v2, 12, v2 op_sel_hi:[0,1] ; GFX10-DL-XNACK-NEXT: v_pk_ashrrev_i16 v7, 12, v7 op_sel_hi:[0,1] ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v10, 16, v1 ; GFX10-DL-XNACK-NEXT: s_waitcnt vmcnt(0) ; GFX10-DL-XNACK-NEXT: v_add_nc_u16 v1, v1, v3 -; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v3, v4, v5 -; GFX10-DL-XNACK-NEXT: v_pk_lshlrev_b16 v5, 12, v8 op_sel_hi:[0,1] -; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v4, v4, v12 +; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v3, v4, v6 +; GFX10-DL-XNACK-NEXT: v_pk_lshlrev_b16 v6, 12, v8 op_sel_hi:[0,1] +; GFX10-DL-XNACK-NEXT: v_and_b32_e32 v4, v4, v14 ; GFX10-DL-XNACK-NEXT: v_pk_mul_lo_u16 v2, v9, v2 ; GFX10-DL-XNACK-NEXT: v_add_nc_u16 v1, v1, v10 -; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v3, v6, 16, v3 -; GFX10-DL-XNACK-NEXT: v_pk_ashrrev_i16 v5, 12, v5 op_sel_hi:[0,1] -; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v4, v14, 16, v4 +; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v3, v5, 16, v3 +; GFX10-DL-XNACK-NEXT: v_pk_ashrrev_i16 v5, 12, v6 op_sel_hi:[0,1] +; GFX10-DL-XNACK-NEXT: v_lshl_or_b32 v4, v12, 16, v4 ; GFX10-DL-XNACK-NEXT: v_lshrrev_b32_e32 v6, 16, v2 ; GFX10-DL-XNACK-NEXT: v_add_nc_u16 v1, v1, v2 ; GFX10-DL-XNACK-NEXT: v_pk_lshlrev_b16 v2, 12, v3 op_sel_hi:[0,1] @@ -2617,57 +2617,57 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1, ; GFX10-DL-NOXNACK-NEXT: global_load_dword v0, v0, s[6:7] ; GFX10-DL-NOXNACK-NEXT: global_load_ushort v3, v2, s[0:1] ; GFX10-DL-NOXNACK-NEXT: s_waitcnt vmcnt(2) -; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v11, 15, v1 +; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v5, 28, v1 +; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v6, v1, 24, 4 +; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v7, v1, 20, 4 +; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v8, v1, 16, 4 +; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v9, v1, 12, 4 +; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v10, v1, 8, 4 +; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v11, v1, 4, 4 +; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v1, 15, v1 ; GFX10-DL-NOXNACK-NEXT: s_waitcnt vmcnt(1) ; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v13, 15, v0 -; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v5, v1, 24, 4 -; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v6, 28, v1 -; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v7, v1, 16, 4 -; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v8, v1, 20, 4 -; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v9, v1, 8, 4 -; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v10, v1, 12, 4 -; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v1, v1, 4, 4 ; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v16, v0, 4, 4 -; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v11, v4, v11 +; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v12, 28, v0 +; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v14, v0, 24, 4 +; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v1, v4, v1 ; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v13, v4, v13 -; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v18, v0, 8, 4 -; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v12, v0, 24, 4 -; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v14, 28, v0 -; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v1, v1, 16, v11 +; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v15, v0, 20, 4 +; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v17, v0, 16, 4 +; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v18, v0, 12, 4 +; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v0, v0, 8, 4 +; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v1, v11, 16, v1 ; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v11, v16, 16, v13 -; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v15, v0, 16, 4 -; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v17, v0, 20, 4 -; GFX10-DL-NOXNACK-NEXT: v_bfe_u32 v0, v0, 12, 4 -; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v9, v4, v9 -; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v7, v4, v7 -; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v13, v4, v18 +; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v10, v4, v10 +; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v8, v4, v8 +; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v0, v4, v0 ; GFX10-DL-NOXNACK-NEXT: v_pk_lshlrev_b16 v1, 12, v1 op_sel_hi:[0,1] ; GFX10-DL-NOXNACK-NEXT: v_pk_lshlrev_b16 v11, 12, v11 op_sel_hi:[0,1] -; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v9, v10, 16, v9 -; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v7, v8, 16, v7 -; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v0, v0, 16, v13 +; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v9, v9, 16, v10 +; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v7, v7, 16, v8 +; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v0, v18, 16, v0 ; GFX10-DL-NOXNACK-NEXT: v_pk_ashrrev_i16 v1, 12, v1 op_sel_hi:[0,1] ; GFX10-DL-NOXNACK-NEXT: v_pk_ashrrev_i16 v8, 12, v11 op_sel_hi:[0,1] -; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v10, v4, v15 +; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v10, v4, v17 ; GFX10-DL-NOXNACK-NEXT: v_pk_lshlrev_b16 v9, 12, v9 op_sel_hi:[0,1] ; GFX10-DL-NOXNACK-NEXT: v_pk_lshlrev_b16 v0, 12, v0 op_sel_hi:[0,1] ; GFX10-DL-NOXNACK-NEXT: v_pk_lshlrev_b16 v7, 12, v7 op_sel_hi:[0,1] ; GFX10-DL-NOXNACK-NEXT: v_pk_mul_lo_u16 v1, v1, v8 -; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v8, v17, 16, v10 +; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v8, v15, 16, v10 ; GFX10-DL-NOXNACK-NEXT: v_pk_ashrrev_i16 v9, 12, v9 op_sel_hi:[0,1] ; GFX10-DL-NOXNACK-NEXT: v_pk_ashrrev_i16 v0, 12, v0 op_sel_hi:[0,1] ; GFX10-DL-NOXNACK-NEXT: v_pk_ashrrev_i16 v7, 12, v7 op_sel_hi:[0,1] ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v10, 16, v1 ; GFX10-DL-NOXNACK-NEXT: s_waitcnt vmcnt(0) ; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16 v1, v1, v3 -; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v3, v4, v5 -; GFX10-DL-NOXNACK-NEXT: v_pk_lshlrev_b16 v5, 12, v8 op_sel_hi:[0,1] -; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v4, v4, v12 +; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v3, v4, v6 +; GFX10-DL-NOXNACK-NEXT: v_pk_lshlrev_b16 v6, 12, v8 op_sel_hi:[0,1] +; GFX10-DL-NOXNACK-NEXT: v_and_b32_e32 v4, v4, v14 ; GFX10-DL-NOXNACK-NEXT: v_pk_mul_lo_u16 v0, v9, v0 ; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16 v1, v1, v10 -; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v3, v6, 16, v3 -; GFX10-DL-NOXNACK-NEXT: v_pk_ashrrev_i16 v5, 12, v5 op_sel_hi:[0,1] -; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v4, v14, 16, v4 +; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v3, v5, 16, v3 +; GFX10-DL-NOXNACK-NEXT: v_pk_ashrrev_i16 v5, 12, v6 op_sel_hi:[0,1] +; GFX10-DL-NOXNACK-NEXT: v_lshl_or_b32 v4, v12, 16, v4 ; GFX10-DL-NOXNACK-NEXT: v_lshrrev_b32_e32 v6, 16, v0 ; GFX10-DL-NOXNACK-NEXT: v_add_nc_u16 v0, v1, v0 ; GFX10-DL-NOXNACK-NEXT: v_pk_lshlrev_b16 v1, 12, v3 op_sel_hi:[0,1] diff --git a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll index 85dc9991bdaa..8d4d929e029d 100644 --- a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll +++ b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll @@ -179,13 +179,13 @@ define <4 x half> @shuffle_v4f16_35u5(<4 x half> addrspace(1)* %arg0, <4 x half> ; GFX9-LABEL: shuffle_v4f16_35u5: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_load_dword v4, v[2:3], off ; GFX9-NEXT: global_load_dword v5, v[0:1], off offset:4 +; GFX9-NEXT: global_load_dword v4, v[2:3], off ; GFX9-NEXT: v_mov_b32_e32 v0, 0xffff ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v4 -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_and_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v4 ; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, v4 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -194,13 +194,13 @@ define <4 x half> @shuffle_v4f16_35u5(<4 x half> addrspace(1)* %arg0, <4 x half> ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: global_load_dword v4, v[2:3], off ; GFX10-NEXT: global_load_dword v5, v[0:1], off offset:4 +; GFX10-NEXT: global_load_dword v4, v[2:3], off ; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff ; GFX10-NEXT: s_waitcnt vmcnt(1) -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v4 -; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_and_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v4 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, v4 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -214,13 +214,13 @@ define <4 x half> @shuffle_v4f16_357u(<4 x half> addrspace(1)* %arg0, <4 x half> ; GFX9-LABEL: shuffle_v4f16_357u: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_load_dwordx2 v[4:5], v[2:3], off ; GFX9-NEXT: global_load_dword v6, v[0:1], off offset:4 +; GFX9-NEXT: global_load_dwordx2 v[4:5], v[2:3], off ; GFX9-NEXT: v_mov_b32_e32 v0, 0xffff ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v4 -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_and_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v4 ; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v5 ; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -229,13 +229,13 @@ define <4 x half> @shuffle_v4f16_357u(<4 x half> addrspace(1)* %arg0, <4 x half> ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: global_load_dwordx2 v[4:5], v[2:3], off ; GFX10-NEXT: global_load_dword v6, v[0:1], off offset:4 +; GFX10-NEXT: global_load_dwordx2 v[4:5], v[2:3], off ; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff ; GFX10-NEXT: s_waitcnt vmcnt(1) -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v4 -; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_and_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v4 ; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v5 ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -803,8 +803,8 @@ define <4 x half> @shuffle_v4f16_5734(<4 x half> addrspace(1)* %arg0, <4 x half> ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_and_b32_sdwa v1, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v5 ; GFX9-NEXT: v_and_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v5 ; GFX9-NEXT: v_lshl_or_b32 v1, v4, 16, v1 ; GFX9-NEXT: v_lshl_or_b32 v0, v2, 16, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -817,11 +817,11 @@ define <4 x half> @shuffle_v4f16_5734(<4 x half> addrspace(1)* %arg0, <4 x half> ; GFX10-NEXT: global_load_dword v6, v[0:1], off offset:4 ; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff ; GFX10-NEXT: s_waitcnt vmcnt(1) -; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v5 -; GFX10-NEXT: v_and_b32_sdwa v2, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX10-NEXT: v_and_b32_sdwa v1, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v5 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_and_b32_sdwa v3, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v2 +; GFX10-NEXT: v_lshl_or_b32 v0, v2, 16, v1 ; GFX10-NEXT: v_lshl_or_b32 v1, v4, 16, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x half>, <4 x half> addrspace(1)* %arg0 @@ -985,13 +985,13 @@ define <4 x half> @shuffle_v4f16_6161(<4 x half> addrspace(1)* %arg0, <4 x half> ; GFX9-LABEL: shuffle_v4f16_6161: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_load_dword v4, v[0:1], off -; GFX9-NEXT: global_load_dword v5, v[2:3], off offset:4 +; GFX9-NEXT: global_load_dword v4, v[2:3], off offset:4 +; GFX9-NEXT: global_load_dword v5, v[0:1], off ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v4 +; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v4 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v5 -; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v5 +; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; GFX9-NEXT: v_mov_b32_e32 v1, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -999,13 +999,13 @@ define <4 x half> @shuffle_v4f16_6161(<4 x half> addrspace(1)* %arg0, <4 x half> ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: global_load_dword v4, v[0:1], off -; GFX10-NEXT: global_load_dword v5, v[2:3], off offset:4 +; GFX10-NEXT: global_load_dword v4, v[2:3], off offset:4 +; GFX10-NEXT: global_load_dword v5, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(1) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v4 +; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v4 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v5 -; GFX10-NEXT: v_lshl_or_b32 v0, v0, 16, v1 +; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v5 +; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 ; GFX10-NEXT: v_mov_b32_e32 v1, v0 ; GFX10-NEXT: s_setpc_b64 s[30:31] %val0 = load <4 x half>, <4 x half> addrspace(1)* %arg0 From 62a6b9e9ab3eb778111e90a34fee1e6a7c64db8a Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 22 Dec 2021 14:29:41 -0800 Subject: [PATCH 091/293] Revert "[DwarfDebug] Support emitting function-local declaration for a lexical block" & dependent patches This patch causes invalid DWARF to be generated in some cases of LTO + Split DWARF - follow-up on the original review thread (D113741) contains further detail and test cases. This reverts commit 75b622a7959479ccdb758ebe0973061b7b4fcacd. This reverts commit b6ccca217c35a95b8c2a337a7801b37cb23dbae2. This reverts commit 514d37441918dd04a2cd4f35c08959d7c3ff096d. --- .../CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 132 ++---- .../lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 41 +- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 60 +-- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h | 12 +- llvm/lib/CodeGen/AsmPrinter/DwarfFile.h | 7 +- llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 2 +- llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h | 2 +- .../Generic/import-inlined-declaration.ll | 72 --- .../Generic/imported-name-inlined.ll | 10 +- .../DebugInfo/Generic/inlined-local-type.ll | 125 ------ .../DebugInfo/Generic/inlined-static-var.ll | 92 ---- .../DebugInfo/Generic/lexical_block_static.ll | 141 ------ .../DebugInfo/Generic/lexical_block_types.ll | 421 ------------------ llvm/test/DebugInfo/Generic/namespace.ll | 33 +- 14 files changed, 107 insertions(+), 1043 deletions(-) delete mode 100644 llvm/test/DebugInfo/Generic/import-inlined-declaration.ll delete mode 100644 llvm/test/DebugInfo/Generic/inlined-local-type.ll delete mode 100644 llvm/test/DebugInfo/Generic/inlined-static-var.ll delete mode 100644 llvm/test/DebugInfo/Generic/lexical_block_static.ll delete mode 100644 llvm/test/DebugInfo/Generic/lexical_block_types.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index cf8c0c03772e..9b73f0ab2f05 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -551,18 +551,8 @@ void DwarfCompileUnit::constructScopeDIE(LexicalScope *Scope, // Emit lexical blocks. DIE *ScopeDIE = constructLexicalScopeDIE(Scope); assert(ScopeDIE && "Scope DIE should not be null."); - ParentScopeDIE.addChild(ScopeDIE); - - // Track abstract and concrete lexical block scopes. - if (Scope->isAbstractScope()) { - assert(!getAbstractScopeDIEs().count(DS) && - "Abstract DIE for this scope exists!"); - getAbstractScopeDIEs()[DS] = ScopeDIE; - } else if (!Scope->getInlinedAt()) { - assert(!LocalScopeDIEs.count(DS) && "Concrete DIE for this scope exists!"); - LocalScopeDIEs[DS] = ScopeDIE; - } + ParentScopeDIE.addChild(ScopeDIE); createAndAddScopeChildren(Scope, *ScopeDIE); } @@ -656,7 +646,7 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { auto *InlinedSP = getDISubprogram(DS); // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram // was inlined from another compile unit. - DIE *OriginDIE = getAbstractScopeDIEs()[InlinedSP]; + DIE *OriginDIE = getAbstractSPDies()[InlinedSP]; assert(OriginDIE && "Unable to find original DIE for an inlined subprogram."); auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_inlined_subroutine); @@ -1020,12 +1010,6 @@ DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, if (Scope) { assert(!Scope->getInlinedAt()); assert(!Scope->isAbstractScope()); - - // Remember the subrogram before creating child entities. - assert(!LocalScopeDIEs.count(Sub) && - "Concrete DIE for the subprogram exists!"); - LocalScopeDIEs[Sub] = &ScopeDIE; - // Collect lexical scope children first. // ObjectPointer might be a local (non-argument) local variable if it's a // block's synthetic this pointer. @@ -1061,18 +1045,27 @@ DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope, for (DbgVariable *DV : Locals) ScopeDIE.addChild(constructVariableDIE(*DV, *Scope, ObjectPointer)); + // Emit imported entities (skipped in gmlt-like data). + if (!includeMinimalInlineScopes()) { + for (const auto *IE : ImportedEntities[Scope->getScopeNode()]) + ScopeDIE.addChild(constructImportedEntityDIE(cast(IE))); + } + // Emit labels. for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope)) ScopeDIE.addChild(constructLabelDIE(*DL, *Scope)); // Emit inner lexical scopes. - auto needToEmitLexicalScope = [this](LexicalScope *LS) -> bool { + auto needToEmitLexicalScope = [this](LexicalScope *LS) { if (isa(LS->getScopeNode())) return true; auto Vars = DU->getScopeVariables().lookup(LS); if (!Vars.Args.empty() || !Vars.Locals.empty()) return true; - return LocalScopesWithLocalDecls.count(LS->getScopeNode()); + if (!includeMinimalInlineScopes() && + !ImportedEntities[LS->getScopeNode()].empty()) + return true; + return false; }; for (LexicalScope *LS : Scope->getChildren()) { // If the lexical block doesn't have non-scope children, skip @@ -1088,10 +1081,11 @@ DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope, void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( LexicalScope *Scope) { + DIE *&AbsDef = getAbstractSPDies()[Scope->getScopeNode()]; + if (AbsDef) + return; auto *SP = cast(Scope->getScopeNode()); - if (getAbstractScopeDIEs().count(SP)) - return; DIE *ContextDIE; DwarfCompileUnit *ContextCU = this; @@ -1115,19 +1109,14 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( // Passing null as the associated node because the abstract definition // shouldn't be found by lookup. - DIE &AbsDef = ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram, - *ContextDIE, nullptr); - - // Store the DIE before creating children. - getAbstractScopeDIEs()[SP] = &AbsDef; - - ContextCU->applySubprogramAttributesToDefinition(SP, AbsDef); - ContextCU->addSInt(AbsDef, dwarf::DW_AT_inline, + AbsDef = &ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, nullptr); + ContextCU->applySubprogramAttributesToDefinition(SP, *AbsDef); + ContextCU->addSInt(*AbsDef, dwarf::DW_AT_inline, DD->getDwarfVersion() <= 4 ? Optional() : dwarf::DW_FORM_implicit_const, dwarf::DW_INL_inlined); - if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, AbsDef)) - ContextCU->addDIEEntry(AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer); + if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, *AbsDef)) + ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer); } bool DwarfCompileUnit::useGNUAnalogForDwarf5Feature() const { @@ -1261,61 +1250,47 @@ void DwarfCompileUnit::constructCallSiteParmEntryDIEs( } } -DIE *DwarfCompileUnit::createImportedEntityDIE(const DIImportedEntity *IE) { - DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)IE->getTag()); - insertDIE(IE, IMDie); - +DIE *DwarfCompileUnit::constructImportedEntityDIE( + const DIImportedEntity *Module) { + DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag()); + insertDIE(Module, IMDie); DIE *EntityDie; - auto *Entity = IE->getEntity(); + auto *Entity = Module->getEntity(); if (auto *NS = dyn_cast(Entity)) EntityDie = getOrCreateNameSpace(NS); else if (auto *M = dyn_cast(Entity)) EntityDie = getOrCreateModule(M); - else if (auto *SP = dyn_cast(Entity)) { - // If we have abstract subprogram created, refer it. - if (auto *AbsSPDie = getAbstractScopeDIEs().lookup(SP)) - EntityDie = AbsSPDie; - else - EntityDie = getOrCreateSubprogramDIE(SP); - } else if (auto *T = dyn_cast(Entity)) + else if (auto *SP = dyn_cast(Entity)) + EntityDie = getOrCreateSubprogramDIE(SP); + else if (auto *T = dyn_cast(Entity)) EntityDie = getOrCreateTypeDIE(T); else if (auto *GV = dyn_cast(Entity)) EntityDie = getOrCreateGlobalVariableDIE(GV, {}); else EntityDie = getDIE(Entity); assert(EntityDie); - - addSourceLine(*IMDie, IE->getLine(), IE->getFile()); + addSourceLine(*IMDie, Module->getLine(), Module->getFile()); addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie); - StringRef Name = IE->getName(); + StringRef Name = Module->getName(); if (!Name.empty()) addString(*IMDie, dwarf::DW_AT_name, Name); // This is for imported module with renamed entities (such as variables and // subprograms). - DINodeArray Elements = IE->getElements(); + DINodeArray Elements = Module->getElements(); for (const auto *Element : Elements) { if (!Element) continue; - IMDie->addChild(createImportedEntityDIE(cast(Element))); + IMDie->addChild( + constructImportedEntityDIE(cast(Element))); } - return IMDie; -} -void DwarfCompileUnit::createAndAddImportedEntityDIE( - const DIImportedEntity *IE) { - DIE *ContextDIE = getOrCreateContextDIE(IE->getScope()); - assert(ContextDIE && - "Could not get or create scope for the imported entity!"); - if (!ContextDIE) - return; - - ContextDIE->addChild(createImportedEntityDIE(IE)); + return IMDie; } void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) { DIE *D = getDIE(SP); - if (DIE *AbsSPDIE = getAbstractScopeDIEs().lookup(SP)) { + if (DIE *AbsSPDIE = getAbstractSPDies().lookup(SP)) { if (D) // If this subprogram has an abstract definition, reference that addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE); @@ -1605,38 +1580,3 @@ void DwarfCompileUnit::createBaseTypeDIEs() { Btr.Die = &Die; } } - -static DIE * -findLocalScopeDIE(const DILocalScope *LS, - DenseMap &ScopeDIEs) { - DIE *ScopeDIE = ScopeDIEs.lookup(LS); - if (isa(LS) && !ScopeDIE) - return nullptr; - if (!ScopeDIE) - return findLocalScopeDIE(cast(LS->getScope()), ScopeDIEs); - return ScopeDIE; -} - -DIE *DwarfCompileUnit::findLocalScopeDIE(const DIScope *S) { - auto *LScope = dyn_cast_or_null(S); - if (!LScope) - return nullptr; - - // Check if we have an abstract tree. - if (getAbstractScopeDIEs().count(LScope->getSubprogram())) - return ::findLocalScopeDIE(LScope, getAbstractScopeDIEs()); - - return ::findLocalScopeDIE(LScope, LocalScopeDIEs); -} - -DIE *DwarfCompileUnit::getOrCreateContextDIE(const DIScope *Context) { - if (auto *LScope = dyn_cast_or_null(Context)) { - if (DIE *ScopeDIE = findLocalScopeDIE(LScope)) - return ScopeDIE; - - // If nothing was found, fall back to DISubprogram and let - // DwarfUnit::getOrCreateContextDIE() create a new DIE for it. - Context = LScope->getSubprogram(); - } - return DwarfUnit::getOrCreateContextDIE(Context); -} diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 251bc4bea68b..fb03982b5e4a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -62,6 +62,11 @@ class DwarfCompileUnit final : public DwarfUnit { /// The start of the unit macro info within macro section. MCSymbol *MacroLabelBegin; + using ImportedEntityList = SmallVector; + using ImportedEntityMap = DenseMap; + + ImportedEntityMap ImportedEntities; + /// GlobalNames - A map of globally visible named entities for this unit. StringMap GlobalNames; @@ -75,14 +80,9 @@ class DwarfCompileUnit final : public DwarfUnit { // ranges/locs. const MCSymbol *BaseAddress = nullptr; - DenseMap LocalScopeDIEs; - DenseMap AbstractLocalScopeDIEs; + DenseMap AbstractSPDies; DenseMap> AbstractEntities; - /// LocalScopesWithLocalDecls - A list of non-empty local scopes - /// (with declaraions of static locals, function-local types, or imports). - SmallPtrSet LocalScopesWithLocalDecls; - /// DWO ID for correlating skeleton and split units. uint64_t DWOId = 0; @@ -92,10 +92,10 @@ class DwarfCompileUnit final : public DwarfUnit { bool isDwoUnit() const override; - DenseMap &getAbstractScopeDIEs() { + DenseMap &getAbstractSPDies() { if (isDwoUnit() && !DD->shareAcrossDWOCUs()) - return AbstractLocalScopeDIEs; - return DU->getAbstractScopeDIEs(); + return AbstractSPDies; + return DU->getAbstractSPDies(); } DenseMap> &getAbstractEntities() { @@ -169,6 +169,17 @@ class DwarfCompileUnit final : public DwarfUnit { unsigned getOrCreateSourceID(const DIFile *File) override; + void addImportedEntity(const DIImportedEntity* IE) { + DIScope *Scope = IE->getScope(); + assert(Scope && "Invalid Scope encoding!"); + if (!isa(Scope)) + // No need to add imported enities that are not local declaration. + return; + + auto *LocalScope = cast(Scope)->getNonLexicalBlockFileScope(); + ImportedEntities[LocalScope].push_back(IE); + } + /// addRange - Add an address range to the list of ranges for this unit. void addRange(RangeSpan Range); @@ -210,9 +221,6 @@ class DwarfCompileUnit final : public DwarfUnit { void createBaseTypeDIEs(); - DIE *findLocalScopeDIE(const DIScope *S); - DIE *getOrCreateContextDIE(const DIScope *Ty) override; - /// Construct a DIE for this subprogram scope. DIE &constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope); @@ -251,9 +259,8 @@ class DwarfCompileUnit final : public DwarfUnit { void constructCallSiteParmEntryDIEs(DIE &CallSiteDIE, SmallVector &Params); - /// Construct DIE for an imported entity. - DIE *createImportedEntityDIE(const DIImportedEntity *IE); - void createAndAddImportedEntityDIE(const DIImportedEntity *IE); + /// Construct import_module DIE. + DIE *constructImportedEntityDIE(const DIImportedEntity *Module); void finishSubprogramDefinition(const DISubprogram *SP); void finishEntityDefinition(const DbgEntity *Entity); @@ -350,10 +357,6 @@ class DwarfCompileUnit final : public DwarfUnit { bool hasDwarfPubSections() const; void addBaseTypeRef(DIEValueList &Die, int64_t Idx); - - void recordLocalScopeWithDecls(const DILocalScope *S) { - LocalScopesWithLocalDecls.insert(S); - } }; } // end namespace llvm diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 8f5bc3a07c54..0089753c0408 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -516,7 +516,7 @@ void DwarfDebug::addSubprogramNames(const DICompileUnit &CU, // well into the name table. Only do that if we are going to actually emit // that name. if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName() && - (useAllLinkageNames() || InfoHolder.getAbstractScopeDIEs().lookup(SP))) + (useAllLinkageNames() || InfoHolder.getAbstractSPDies().lookup(SP))) addAccelName(CU, SP->getLinkageName(), Die); // If this is an Objective-C selector name add it to the ObjC accelerator @@ -1067,45 +1067,6 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit, } } } - -// Collect local scopes that contain any local declarations -// (excluding local variables) to be sure they will be emitted -// (see DwarfCompileUnit::createAndAddScopeChildren() for details). -static void collectLocalScopesWithDeclsFromCU(const DICompileUnit *CUNode, - DwarfCompileUnit &CU) { - auto getLocalScope = [](const DIScope *S) -> const DILocalScope * { - if (!S) - return nullptr; - if (isa(S)) - S = S->getScope(); - if (const auto *LScope = dyn_cast_or_null(S)) - return LScope->getNonLexicalBlockFileScope(); - return nullptr; - }; - - for (auto *GVE : CUNode->getGlobalVariables()) - if (auto *LScope = getLocalScope(GVE->getVariable()->getScope())) - CU.recordLocalScopeWithDecls(LScope); - - for (auto *Ty : CUNode->getEnumTypes()) - if (auto *LScope = getLocalScope(Ty->getScope())) - CU.recordLocalScopeWithDecls(LScope); - - for (auto *Ty : CUNode->getRetainedTypes()) - if (DIType *RT = dyn_cast(Ty)) - if (auto *LScope = getLocalScope(RT->getScope())) - CU.recordLocalScopeWithDecls(LScope); - - for (auto *IE : CUNode->getImportedEntities()) - if (auto *LScope = getLocalScope(IE->getScope())) - CU.recordLocalScopeWithDecls(LScope); - - // FIXME: We know nothing about local records and typedefs here. - // since nothing but local variables (and members of local records) - // references them. So that they will be emitted in a first available - // parent scope DIE. -} - // Create new DwarfCompileUnit for the given metadata node with tag // DW_TAG_compile_unit. DwarfCompileUnit & @@ -1120,6 +1081,9 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { DwarfCompileUnit &NewCU = *OwnedUnit; InfoHolder.addUnit(std::move(OwnedUnit)); + for (auto *IE : DIUnit->getImportedEntities()) + NewCU.addImportedEntity(IE); + // LTO with assembly output shares a single line table amongst multiple CUs. // To avoid the compilation directory being ambiguous, let the line table // explicitly describe the directory of all files, never relying on the @@ -1139,14 +1103,17 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { CUMap.insert({DIUnit, &NewCU}); CUDieMap.insert({&NewCU.getUnitDie(), &NewCU}); - - // Record local scopes, that have some globals (static locals), - // imports or types declared within. - collectLocalScopesWithDeclsFromCU(DIUnit, NewCU); - return NewCU; } +void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, + const DIImportedEntity *N) { + if (isa(N->getScope())) + return; + if (DIE *D = TheCU.getOrCreateContextDIE(N->getScope())) + D->addChild(TheCU.constructImportedEntityDIE(N)); +} + // Emit all Dwarf sections that should come prior to the content. Create // global DIEs and emit initial debug info sections. This is invoked by // the target AsmPrinter. @@ -1402,7 +1369,6 @@ void DwarfDebug::endModule() { assert(CurFn == nullptr); assert(CurMI == nullptr); - // Collect global variables info. DenseMap> GVMap; for (const GlobalVariable &Global : MMI->getModule()->globals()) { @@ -1462,7 +1428,7 @@ void DwarfDebug::endModule() { // Emit imported entities last so that the relevant context // is already available. for (auto *IE : CUNode->getImportedEntities()) - CU->createAndAddImportedEntityDIE(IE); + constructAndAddImportedEntityDIE(*CU, IE); CU->createBaseTypeDIEs(); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 79f8423af602..ff2589b3c953 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -316,13 +316,11 @@ class DwarfDebug : public DebugHandlerBase { /// can refer to them in spite of insertions into this list. DebugLocStream DebugLocs; - using SubprogramSetVector = - SetVector, - SmallPtrSet>; - /// This is a collection of subprogram MDNodes that are processed to /// create DIEs. - SubprogramSetVector ProcessedSPNodes; + SetVector, + SmallPtrSet> + ProcessedSPNodes; /// If nonnull, stores the current machine function we're processing. const MachineFunction *CurFn = nullptr; @@ -600,6 +598,10 @@ class DwarfDebug : public DebugHandlerBase { void finishUnitAttributes(const DICompileUnit *DIUnit, DwarfCompileUnit &NewCU); + /// Construct imported_module or imported_declaration DIE. + void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, + const DIImportedEntity *N); + /// Register a source line with debug info. Returns the unique /// label that was emitted and which provides correspondence to the /// source line list. diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h index 464f4f048016..79a6ce7801b7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -26,7 +26,6 @@ class DbgEntity; class DbgVariable; class DbgLabel; class DINode; -class DILocalScope; class DwarfCompileUnit; class DwarfUnit; class LexicalScope; @@ -88,7 +87,7 @@ class DwarfFile { DenseMap ScopeLabels; // Collection of abstract subprogram DIEs. - DenseMap AbstractLocalScopeDIEs; + DenseMap AbstractSPDies; DenseMap> AbstractEntities; /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can @@ -163,8 +162,8 @@ class DwarfFile { return ScopeLabels; } - DenseMap &getAbstractScopeDIEs() { - return AbstractLocalScopeDIEs; + DenseMap &getAbstractSPDies() { + return AbstractSPDies; } DenseMap> &getAbstractEntities() { diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 7edf9be92eac..6b6d63f14f87 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1219,7 +1219,7 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP, "decl has a linkage name and it is different"); if (DeclLinkageName.empty() && // Always emit it for abstract subprograms. - (DD->useAllLinkageNames() || DU->getAbstractScopeDIEs().lookup(SP))) + (DD->useAllLinkageNames() || DU->getAbstractSPDies().lookup(SP))) addLinkageName(SPDie, LinkageName); if (!DeclDie) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 1643edf05b7a..54b0079dd7ce 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -250,7 +250,7 @@ class DwarfUnit : public DIEUnit { DIE *getOrCreateTypeDIE(const MDNode *TyNode); /// Get context owner's DIE. - virtual DIE *getOrCreateContextDIE(const DIScope *Context); + DIE *getOrCreateContextDIE(const DIScope *Context); /// Construct DIEs for types that contain vtables. void constructContainingTypeDIEs(); diff --git a/llvm/test/DebugInfo/Generic/import-inlined-declaration.ll b/llvm/test/DebugInfo/Generic/import-inlined-declaration.ll deleted file mode 100644 index 471526b1395c..000000000000 --- a/llvm/test/DebugInfo/Generic/import-inlined-declaration.ll +++ /dev/null @@ -1,72 +0,0 @@ -; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump - | FileCheck --implicit-check-not "{{DW_TAG|NULL}}" %s - -; namespace ns { -; inline __attribute__((always_inline)) -; void foo() { int a = 4; } -; } -; -; void goo() { -; using ns::foo; -; foo(); -; } - -; Ensure that imported declarations reference the correct subprograms even if -; those subprograms are inlined. - -; CHECK: DW_TAG_compile_unit -; CHECK: DW_TAG_namespace -; CHECK: DW_AT_name ("ns") -; CHECK: [[FOO:0x.*]]: DW_TAG_subprogram -; CHECK: DW_AT_name ("foo") -; CHECK: DW_TAG_variable -; CHECK: NULL -; CHECK: NULL -; CHECK: DW_TAG_base_type -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("goo") -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_AT_abstract_origin ([[FOO]] -; CHECK: DW_TAG_variable -; CHECK: NULL -; CHECK: DW_TAG_imported_declaration -; CHECK: DW_AT_import ([[FOO]]) -; CHECK: NULL -; CHECK: NULL - -; Function Attrs: mustprogress noinline optnone uwtable -define dso_local void @_Z3goov() !dbg !4 { -entry: - %a.i = alloca i32, align 4 - call void @llvm.dbg.declare(metadata i32* %a.i, metadata !16, metadata !DIExpression()), !dbg !18 - store i32 4, i32* %a.i, align 4, !dbg !18 - ret void, !dbg !20 -} - -; Function Attrs: nofree nosync nounwind readnone speculatable willreturn -declare void @llvm.dbg.declare(metadata, metadata, metadata) - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!10, !11, !12, !13, !14} -!llvm.ident = !{!15} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, imports: !2, splitDebugInlining: false, nameTableKind: None) -!1 = !DIFile(filename: "imported-inlined-declaration.cpp", directory: "") -!2 = !{!3} -!3 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !4, entity: !8, file: !1, line: 7) -!4 = distinct !DISubprogram(name: "goo", linkageName: "_Z3goov", scope: !1, file: !1, line: 6, type: !5, scopeLine: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !7) -!5 = !DISubroutineType(types: !6) -!6 = !{null} -!7 = !{} -!8 = distinct !DISubprogram(name: "foo", linkageName: "_ZN2ns3fooEv", scope: !9, file: !1, line: 3, type: !5, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !7) -!9 = !DINamespace(name: "ns", scope: null) -!10 = !{i32 7, !"Dwarf Version", i32 4} -!11 = !{i32 2, !"Debug Info Version", i32 3} -!12 = !{i32 1, !"wchar_size", i32 4} -!13 = !{i32 7, !"uwtable", i32 1} -!14 = !{i32 7, !"frame-pointer", i32 2} -!15 = !{!"clang version 14.0.0"} -!16 = !DILocalVariable(name: "a", scope: !8, file: !1, line: 3, type: !17) -!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!18 = !DILocation(line: 3, column: 18, scope: !8, inlinedAt: !19) -!19 = distinct !DILocation(line: 8, column: 2, scope: !4) -!20 = !DILocation(line: 9, column: 1, scope: !4) diff --git a/llvm/test/DebugInfo/Generic/imported-name-inlined.ll b/llvm/test/DebugInfo/Generic/imported-name-inlined.ll index a26b687b1f67..a3aaaff66f85 100644 --- a/llvm/test/DebugInfo/Generic/imported-name-inlined.ll +++ b/llvm/test/DebugInfo/Generic/imported-name-inlined.ll @@ -13,17 +13,21 @@ ; Ensure that top level imported declarations don't produce an extra degenerate ; concrete subprogram definition. +; FIXME: imported entities should only be emitted to the abstract origin if one is present + ; CHECK: DW_TAG_compile_unit ; CHECK: DW_TAG_subprogram ; CHECK: DW_AT_name ("f1") ; CHECK: DW_TAG_imported_declaration ; CHECK: NULL +; CHECK: DW_TAG_namespace +; CHECK: DW_TAG_subprogram +; CHECK: NULL ; CHECK: DW_TAG_subprogram ; CHECK: DW_AT_name ("f2") ; CHECK: DW_TAG_inlined_subroutine -; CHECK: NULL -; CHECK: DW_TAG_namespace -; CHECK: DW_TAG_subprogram +; CHECK: DW_TAG_imported_declaration +; CHECK: NULL ; CHECK: NULL ; CHECK: NULL diff --git a/llvm/test/DebugInfo/Generic/inlined-local-type.ll b/llvm/test/DebugInfo/Generic/inlined-local-type.ll deleted file mode 100644 index b3f3f80c07d2..000000000000 --- a/llvm/test/DebugInfo/Generic/inlined-local-type.ll +++ /dev/null @@ -1,125 +0,0 @@ -; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-info - | FileCheck --implicit-check-not "{{DW_TAG|NULL}}" %s - -; inline __attribute__((always_inline)) -; int removed() { struct A {int i;}; struct A a; return a.i++; } -; -; __attribute__((always_inline)) -; int not_removed() { struct B {int i;}; struct B b; return b.i++; } -; -; int foo() { return removed() + not_removed(); }} - -; Ensure that function-local types have the correct subprogram parent even if -; those subprograms are inlined. - -; CHECK: DW_TAG_compile_unit -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_abstract_origin ({{0x.*}} "not_removed") -; CHECK: DW_TAG_variable -; CHECK: NULL -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("removed") -; CHECK: [[A:0x.*]]: DW_TAG_structure_type -; CHECK: DW_AT_name ("A") -; CHECK: DW_TAG_member -; CHECK: NULL -; CHECK: DW_TAG_variable -; CHECK: DW_AT_type ([[A]] "A") -; CHECK: NULL -; CHECK: DW_TAG_base_type -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("not_removed") -; CHECK: [[B:0x.*]]: DW_TAG_structure_type -; CHECK: DW_AT_name ("B") -; CHECK: DW_TAG_member -; CHECK: NULL -; CHECK: DW_TAG_variable -; CHECK: DW_AT_type ([[B]] "B") -; CHECK: NULL -; CHECK: DW_TAG_subprogram -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_TAG_variable -; CHECK: NULL -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_TAG_variable -; CHECK: NULL -; CHECK: NULL -; CHECK: NULL - -%struct.B = type { i32 } -%struct.A = type { i32 } - -define dso_local i32 @not_removed() !dbg !12 { - %1 = alloca %struct.B, align 4 - call void @llvm.dbg.declare(metadata %struct.B* %1, metadata !18, metadata !DIExpression()), !dbg !22 - %2 = getelementptr inbounds %struct.B, %struct.B* %1, i32 0, i32 0, !dbg !23 - %3 = load i32, i32* %2, align 4, !dbg !24 - %4 = add nsw i32 %3, 1, !dbg !24 - store i32 %4, i32* %2, align 4, !dbg !24 - ret i32 %3, !dbg !25 -} - -declare void @llvm.dbg.declare(metadata, metadata, metadata) - -define dso_local i32 @foo() !dbg !26 { - %1 = alloca %struct.A, align 4 - %2 = alloca %struct.B, align 4 - call void @llvm.dbg.declare(metadata %struct.A* %1, metadata !27, metadata !DIExpression()), !dbg !32 - %3 = getelementptr inbounds %struct.A, %struct.A* %1, i32 0, i32 0, !dbg !34 - %4 = load i32, i32* %3, align 4, !dbg !35 - %5 = add nsw i32 %4, 1, !dbg !35 - store i32 %5, i32* %3, align 4, !dbg !35 - call void @llvm.dbg.declare(metadata %struct.B* %2, metadata !18, metadata !DIExpression()), !dbg !36 - %6 = getelementptr inbounds %struct.B, %struct.B* %2, i32 0, i32 0, !dbg !38 - %7 = load i32, i32* %6, align 4, !dbg !39 - %8 = add nsw i32 %7, 1, !dbg !39 - store i32 %8, i32* %6, align 4, !dbg !39 - %9 = add nsw i32 %4, %7, !dbg !40 - ret i32 %9, !dbg !41 -} - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8, !9, !10} -!llvm.ident = !{!11} - -!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) -!1 = !DIFile(filename: "inlined-local-type.cpp", directory: "") -!2 = !{i32 7, !"Dwarf Version", i32 4} -!3 = !{i32 2, !"Debug Info Version", i32 3} -!4 = !{i32 1, !"wchar_size", i32 4} -!5 = !{i32 1, !"branch-target-enforcement", i32 0} -!6 = !{i32 1, !"sign-return-address", i32 0} -!7 = !{i32 1, !"sign-return-address-all", i32 0} -!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} -!9 = !{i32 7, !"uwtable", i32 1} -!10 = !{i32 7, !"frame-pointer", i32 1} -!11 = !{!"clang version 14.0.0"} -!12 = distinct !DISubprogram(name: "not_removed", scope: !13, file: !13, line: 5, type: !14, scopeLine: 5, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !17) -!13 = !DIFile(filename: "inlined-local-type.cpp", directory: "") -!14 = !DISubroutineType(types: !15) -!15 = !{!16} -!16 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!17 = !{} -!18 = !DILocalVariable(name: "b", scope: !12, file: !13, line: 5, type: !19) -!19 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "B", scope: !12, file: !13, line: 5, size: 32, elements: !20) -!20 = !{!21} -!21 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !19, file: !13, line: 5, baseType: !16, size: 32) -!22 = !DILocation(line: 5, column: 49, scope: !12) -!23 = !DILocation(line: 5, column: 61, scope: !12) -!24 = !DILocation(line: 5, column: 62, scope: !12) -!25 = !DILocation(line: 5, column: 52, scope: !12) -!26 = distinct !DISubprogram(name: "foo", scope: !13, file: !13, line: 7, type: !14, scopeLine: 7, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !17) -!27 = !DILocalVariable(name: "a", scope: !28, file: !13, line: 2, type: !29) -!28 = distinct !DISubprogram(name: "removed", scope: !13, file: !13, line: 2, type: !14, scopeLine: 2, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !17) -!29 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "A", scope: !28, file: !13, line: 2, size: 32, elements: !30) -!30 = !{!31} -!31 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !29, file: !13, line: 2, baseType: !16, size: 32) -!32 = !DILocation(line: 2, column: 45, scope: !28, inlinedAt: !33) -!33 = distinct !DILocation(line: 7, column: 20, scope: !26) -!34 = !DILocation(line: 2, column: 57, scope: !28, inlinedAt: !33) -!35 = !DILocation(line: 2, column: 58, scope: !28, inlinedAt: !33) -!36 = !DILocation(line: 5, column: 49, scope: !12, inlinedAt: !37) -!37 = distinct !DILocation(line: 7, column: 32, scope: !26) -!38 = !DILocation(line: 5, column: 61, scope: !12, inlinedAt: !37) -!39 = !DILocation(line: 5, column: 62, scope: !12, inlinedAt: !37) -!40 = !DILocation(line: 7, column: 30, scope: !26) -!41 = !DILocation(line: 7, column: 13, scope: !26) diff --git a/llvm/test/DebugInfo/Generic/inlined-static-var.ll b/llvm/test/DebugInfo/Generic/inlined-static-var.ll deleted file mode 100644 index 60c986fc2b64..000000000000 --- a/llvm/test/DebugInfo/Generic/inlined-static-var.ll +++ /dev/null @@ -1,92 +0,0 @@ -; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-info - | FileCheck --implicit-check-not "{{DW_TAG|NULL}}" %s - -; inline __attribute__((always_inline)) -; int removed() { static int A; return A++; } -; -; __attribute__((always_inline)) -; int not_removed() { static int B; return B++; } -; -; int foo() { return removed() + not_removed(); } - -; Ensure that global variables belong to the correct subprograms even if those -; subprograms are inlined. - -; CHECK: DW_TAG_compile_unit -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_abstract_origin {{.*}} "_Z11not_removedv" -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("removed") -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("A") -; CHECK: NULL -; CHECK: DW_TAG_base_type -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("not_removed") -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("B") -; CHECK: NULL -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("foo") -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_TAG_inlined_subroutine -; CHECK: NULL -; CHECK: NULL - -@_ZZ11not_removedvE1A = internal global i32 0, align 4, !dbg !0 -@_ZZ7removedvE1A = linkonce_odr dso_local global i32 0, align 4, !dbg !10 - -define dso_local i32 @_Z11not_removedv() !dbg !2 { - %1 = load i32, i32* @_ZZ11not_removedvE1A, align 4, !dbg !24 - %2 = add nsw i32 %1, 1, !dbg !24 - store i32 %2, i32* @_ZZ11not_removedvE1A, align 4, !dbg !24 - ret i32 %1, !dbg !25 -} - -define dso_local i32 @_Z3foov() !dbg !26 { - %1 = load i32, i32* @_ZZ7removedvE1A, align 4, !dbg !27 - %2 = add nsw i32 %1, 1, !dbg !27 - store i32 %2, i32* @_ZZ7removedvE1A, align 4, !dbg !27 - %3 = load i32, i32* @_ZZ11not_removedvE1A, align 4, !dbg !29 - %4 = add nsw i32 %3, 1, !dbg !29 - store i32 %4, i32* @_ZZ11not_removedvE1A, align 4, !dbg !29 - %5 = add nsw i32 %1, %3, !dbg !31 - ret i32 %5, !dbg !32 -} - -!llvm.dbg.cu = !{!7} -!llvm.module.flags = !{!14, !15, !16, !17, !18, !19, !20, !21, !22} -!llvm.ident = !{!23} - -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "B", scope: !2, file: !3, line: 5, type: !6, isLocal: true, isDefinition: true) -!2 = distinct !DISubprogram(name: "not_removed", linkageName: "_Z11not_removedv", scope: !3, file: !3, line: 5, type: !4, scopeLine: 5, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !7, retainedNodes: !13) -!3 = !DIFile(filename: "example.cpp", directory: "") -!4 = !DISubroutineType(types: !5) -!5 = !{!6} -!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!7 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !8, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !9, splitDebugInlining: false, nameTableKind: None) -!8 = !DIFile(filename: "example.cpp", directory: "") -!9 = !{!0, !10} -!10 = !DIGlobalVariableExpression(var: !11, expr: !DIExpression()) -!11 = distinct !DIGlobalVariable(name: "A", scope: !12, file: !3, line: 2, type: !6, isLocal: false, isDefinition: true) -!12 = distinct !DISubprogram(name: "removed", linkageName: "_Z7removedv", scope: !3, file: !3, line: 2, type: !4, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !7, retainedNodes: !13) -!13 = !{} -!14 = !{i32 7, !"Dwarf Version", i32 4} -!15 = !{i32 2, !"Debug Info Version", i32 3} -!16 = !{i32 1, !"wchar_size", i32 4} -!17 = !{i32 1, !"branch-target-enforcement", i32 0} -!18 = !{i32 1, !"sign-return-address", i32 0} -!19 = !{i32 1, !"sign-return-address-all", i32 0} -!20 = !{i32 1, !"sign-return-address-with-bkey", i32 0} -!21 = !{i32 7, !"uwtable", i32 1} -!22 = !{i32 7, !"frame-pointer", i32 1} -!23 = !{!"clang version 14.0.0"} -!24 = !DILocation(line: 5, column: 43, scope: !2) -!25 = !DILocation(line: 5, column: 35, scope: !2) -!26 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !3, file: !3, line: 7, type: !4, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !7, retainedNodes: !13) -!27 = !DILocation(line: 2, column: 39, scope: !12, inlinedAt: !28) -!28 = distinct !DILocation(line: 7, column: 20, scope: !26) -!29 = !DILocation(line: 5, column: 43, scope: !2, inlinedAt: !30) -!30 = distinct !DILocation(line: 7, column: 32, scope: !26) -!31 = !DILocation(line: 7, column: 30, scope: !26) -!32 = !DILocation(line: 7, column: 13, scope: !26) diff --git a/llvm/test/DebugInfo/Generic/lexical_block_static.ll b/llvm/test/DebugInfo/Generic/lexical_block_static.ll deleted file mode 100644 index 56327b27cdb1..000000000000 --- a/llvm/test/DebugInfo/Generic/lexical_block_static.ll +++ /dev/null @@ -1,141 +0,0 @@ -; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-info - | FileCheck --implicit-check-not "{{DW_TAG|NULL}}" %s - -; inline __attribute__((always_inline)) -; int removed() { -; { -; static int A; -; return A++; -; } -; } -; -; __attribute__((always_inline)) -; int not_removed() { -; { -; static int B; -; return B++; -; } -; } -; -; int foo() { -; { -; static int C; -; return ++C + removed() + not_removed(); -; } -; } - -; CHECK: DW_TAG_compile_unit - -; Out-of-line definition of `not_removed()`. -; The empty lexical block is created to match abstract origin. -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_abstract_origin {{.*}} "_Z11not_removedv" -; CHECK: DW_TAG_lexical_block -; CHECK: NULL - -; Abstract definition of `removed()` -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("removed") -; CHECK: DW_AT_inline (DW_INL_inlined) -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("A") -; CHECK: DW_AT_location -; CHECK: NULL -; CHECK: NULL -; CHECK: DW_TAG_base_type - -; Abstract definition of `not_removed()` -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("not_removed") -; CHECK: DW_AT_inline (DW_INL_inlined) -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("B") -; CHECK: DW_AT_location -; CHECK: NULL -; CHECK: NULL - -; Definition of foo(). -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("foo") -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_TAG_lexical_block -; CHECK: NULL -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_TAG_lexical_block -; CHECK: NULL -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("C") -; CHECK: DW_AT_location -; CHECK: NULL -; CHECK: NULL -; CHECK: NULL - -@_ZZ11not_removedvE1B = internal global i32 0, align 4, !dbg !0 -@_ZZ3foovE1C = internal global i32 0, align 4, !dbg !10 -@_ZZ7removedvE1A = linkonce_odr dso_local global i32 0, align 4, !dbg !15 - -define dso_local i32 @_Z11not_removedv() !dbg !4 { -entry: - %0 = load i32, i32* @_ZZ11not_removedvE1B, align 4, !dbg !25 - %inc = add nsw i32 %0, 1, !dbg !25 - store i32 %inc, i32* @_ZZ11not_removedvE1B, align 4, !dbg !25 - ret i32 %0, !dbg !26 -} - -define dso_local i32 @_Z3foov() !dbg !13 { -entry: - %0 = load i32, i32* @_ZZ3foovE1C, align 4, !dbg !27 - %inc = add nsw i32 %0, 1, !dbg !27 - store i32 %inc, i32* @_ZZ3foovE1C, align 4, !dbg !27 - %1 = load i32, i32* @_ZZ7removedvE1A, align 4, !dbg !28 - %inc.i3 = add nsw i32 %1, 1, !dbg !28 - store i32 %inc.i3, i32* @_ZZ7removedvE1A, align 4, !dbg !28 - %add = add nsw i32 %inc, %1, !dbg !30 - %2 = load i32, i32* @_ZZ11not_removedvE1B, align 4, !dbg !31 - %inc.i = add nsw i32 %2, 1, !dbg !31 - store i32 %inc.i, i32* @_ZZ11not_removedvE1B, align 4, !dbg !31 - %add2 = add nsw i32 %add, %2, !dbg !33 - ret i32 %add2, !dbg !34 -} - -!llvm.dbg.cu = !{!8} -!llvm.module.flags = !{!19, !20, !21, !22, !23} -!llvm.ident = !{!24} - -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "B", scope: !2, file: !3, line: 13, type: !7, isLocal: true, isDefinition: true) -!2 = distinct !DILexicalBlock(scope: !4, file: !3, line: 12, column: 3) -!3 = !DIFile(filename: "test_static.cpp", directory: "/") -!4 = distinct !DISubprogram(name: "not_removed", linkageName: "_Z11not_removedv", scope: !3, file: !3, line: 11, type: !5, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !8, retainedNodes: !14) -!5 = !DISubroutineType(types: !6) -!6 = !{!7} -!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!8 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 14.0.0 (git@github.com:llvm/llvm-project.git e1d09ac2d118825452bfc26e44565f7f4122fd6d)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !9, splitDebugInlining: false, nameTableKind: None) -!9 = !{!0, !10, !15} -!10 = !DIGlobalVariableExpression(var: !11, expr: !DIExpression()) -!11 = distinct !DIGlobalVariable(name: "C", scope: !12, file: !3, line: 20, type: !7, isLocal: true, isDefinition: true) -!12 = distinct !DILexicalBlock(scope: !13, file: !3, line: 19, column: 3) -!13 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !3, file: !3, line: 18, type: !5, scopeLine: 18, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !8, retainedNodes: !14) -!14 = !{} -!15 = !DIGlobalVariableExpression(var: !16, expr: !DIExpression()) -!16 = distinct !DIGlobalVariable(name: "A", scope: !17, file: !3, line: 5, type: !7, isLocal: false, isDefinition: true) -!17 = distinct !DILexicalBlock(scope: !18, file: !3, line: 4, column: 3) -!18 = distinct !DISubprogram(name: "removed", linkageName: "_Z7removedv", scope: !3, file: !3, line: 3, type: !5, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !8, retainedNodes: !14) -!19 = !{i32 7, !"Dwarf Version", i32 4} -!20 = !{i32 2, !"Debug Info Version", i32 3} -!21 = !{i32 1, !"wchar_size", i32 4} -!22 = !{i32 7, !"uwtable", i32 1} -!23 = !{i32 7, !"frame-pointer", i32 2} -!24 = !{!"clang version 14.0.0 (git@github.com:llvm/llvm-project.git)"} -!25 = !DILocation(line: 14, column: 13, scope: !2) -!26 = !DILocation(line: 14, column: 5, scope: !2) -!27 = !DILocation(line: 21, column: 12, scope: !12) -!28 = !DILocation(line: 6, column: 13, scope: !17, inlinedAt: !29) -!29 = distinct !DILocation(line: 21, column: 18, scope: !12) -!30 = !DILocation(line: 21, column: 16, scope: !12) -!31 = !DILocation(line: 14, column: 13, scope: !2, inlinedAt: !32) -!32 = distinct !DILocation(line: 21, column: 30, scope: !12) -!33 = !DILocation(line: 21, column: 28, scope: !12) -!34 = !DILocation(line: 21, column: 5, scope: !12) diff --git a/llvm/test/DebugInfo/Generic/lexical_block_types.ll b/llvm/test/DebugInfo/Generic/lexical_block_types.ll deleted file mode 100644 index 1d4e9e7eb3e7..000000000000 --- a/llvm/test/DebugInfo/Generic/lexical_block_types.ll +++ /dev/null @@ -1,421 +0,0 @@ -; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-info - | FileCheck --implicit-check-not "{{DW_TAG|NULL}}" %s - -; inline __attribute__((always_inline)) -; void removed() { -; struct A1 { int i; }; -; typedef int Int1; -; { -; struct I1 { Int1 j; }; -; struct C1 { typedef char Char1; Char1 c; }; -; A1 a1; a1.i++; -; { -; I1 i1; i1.j++; -; C1 c1; c1.c++; -; } -; } -; } -; -; __attribute__((always_inline)) -; void not_removed() { -; struct A2 { int i; }; -; typedef int Int2; -; { -; struct I2 { Int2 j; }; -; struct C2 { typedef char Char2; Char2 c; }; -; A2 a2; a2.i++; -; { -; I2 i2; i2.j++; -; C2 c2; c2.c++; -; } -; } -; } -; -; void foo() { -; struct A3 { int i; }; -; typedef int Int3; -; { -; struct I3 { Int3 j; }; -; { -; struct C3 { typedef char Char3; Char3 c; }; -; A3 a3; a3.i++; -; { -; I3 i3; i3.j++; -; C3 c3; c3.c++; -; } -; } -; } -; removed(); -; not_removed(); -; } -; -; CHECK: DW_TAG_compile_unit - -; Out-of-line definition of `not_removed()` shouldn't contain any debug info for types. -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_abstract_origin {{.*}} "_Z11not_removedv" -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_abstract_origin {{.*}} "a2" -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_abstract_origin {{.*}} "i2" -; CHECK: DW_TAG_variable -; CHECK: DW_AT_abstract_origin {{.*}} "c2" -; CHECK: NULL -; CHECK: NULL -; CHECK: NULL - -; Abstract definition of `removed()`. -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("removed") -; CHECK: DW_AT_inline (DW_INL_inlined) - -; I1 and C1 defined in the first lexical block, typedef Char1 is a child of C1. -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("a1") -; CHECK: DW_AT_type {{.*}} "A1" -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_type {{.*}} "I1" -; CHECK: DW_TAG_variable -; CHECK: DW_AT_type {{.*}} "C1" -; CHECK: NULL -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("I1") -; CHECK: DW_TAG_member -; CHECK: DW_AT_type {{.*}} "Int1" -; CHECK: NULL -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("C1") -; CHECK: DW_TAG_member -; CHECK: DW_AT_type {{.*}} "C1::Char1" -; CHECK: DW_TAG_typedef -; CHECK: DW_AT_name ("Char1") -; CHECK: NULL -; CHECK: NULL - -; A1 and typedef Int1 defined in subprogram scope. -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("A1") -; CHECK: DW_TAG_member -; CHECK: NULL -; CHECK: DW_TAG_typedef -; CHECK: DW_AT_name ("Int1") -; CHECK: NULL - -; CHECK: DW_TAG_base_type -; CHECK: DW_TAG_base_type - -; Abstract definition of `not_removed()`. -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("not_removed") -; CHECK: DW_AT_inline (DW_INL_inlined) - -; I2 and C2 defined in the first lexical block, typedef Char2 is a child of C2. -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("a2") -; CHECK: DW_AT_type {{.*}} "A2" -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("i2") -; CHECK: DW_AT_type {{.*}} "I2" -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("c2") -; CHECK: DW_AT_type {{.*}} "C2" -; CHECK: NULL -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("I2") -; CHECK: DW_TAG_member -; CHECK: DW_AT_type {{.*}} "Int2" -; CHECK: NULL -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("C2") -; CHECK: DW_TAG_member -; CHECK: DW_AT_type {{.*}} "C2::Char2" -; CHECK: DW_TAG_typedef -; CHECK: DW_AT_name ("Char2") -; CHECK: NULL -; CHECK: NULL - -; A2 and typedef Int2 defined in subprogram scope. -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("A2") -; CHECK: DW_TAG_member -; CHECK: NULL -; CHECK: DW_TAG_typedef -; CHECK: DW_AT_name ("Int2") -; CHECK: NULL - -; Definition of `foo()`. -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("foo") - -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("a3") -; CHECK: DW_AT_type {{.*}} "A3" -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("i3") -; CHECK: DW_AT_type {{.*}} "I3" -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("c3") -; CHECK: DW_AT_type {{.*}} "C3" -; CHECK: NULL - -; C3 has the inner lexical block scope, typedef Char3 is a child of C3. -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("C3") -; CHECK: DW_TAG_member -; CHECK: DW_AT_type {{.*}} "C3::Char3" -; CHECK: DW_TAG_typedef -; CHECK: DW_AT_name ("Char3") -; CHECK: NULL -; CHECK: NULL - -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_AT_abstract_origin {{.*}} "_Z7removedv" -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_variable -; CHECK: NULL -; CHECK: NULL -; CHECK: NULL - -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_AT_abstract_origin {{.*}} "_Z11not_removedv" -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_variable -; CHECK: NULL -; CHECK: NULL -; CHECK: NULL - -; A3 and typedef Int3 defined in the subprogram scope. -; FIXME: I3 has subprogram scope here, but should be in the outer lexical block -; (which is ommitted). It wasn't placed correctly, because it's the only non-scope -; entity in the block and it isn't listed in retainedTypes; we simply wasn't aware -; about it while deciding whether to create a lexical block or not. -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("A3") -; CHECK: DW_TAG_member -; CHECK: NULL -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("I3") -; CHECK: DW_TAG_member -; CHECK: DW_AT_type {{.*}} "Int3" -; CHECK: NULL -; CHECK: DW_TAG_typedef -; CHECK: DW_AT_name ("Int3") -; CHECK: NULL -; CHECK: NULL - -%struct.A2 = type { i32 } -%struct.I2 = type { i32 } -%struct.C2 = type { i8 } -%struct.A1 = type { i32 } -%struct.I1 = type { i32 } -%struct.C1 = type { i8 } -%struct.A3 = type { i32 } -%struct.I3 = type { i32 } -%struct.C3 = type { i8 } - -define dso_local void @_Z11not_removedv() !dbg !8 { -entry: - %a2 = alloca %struct.A2, align 4 - %i2 = alloca %struct.I2, align 4 - %c2 = alloca %struct.C2, align 1 - call void @llvm.dbg.declare(metadata %struct.A2* %a2, metadata !12, metadata !DIExpression()), !dbg !18 - %i = getelementptr inbounds %struct.A2, %struct.A2* %a2, i32 0, i32 0, !dbg !19 - %0 = load i32, i32* %i, align 4, !dbg !20 - %inc = add nsw i32 %0, 1, !dbg !20 - store i32 %inc, i32* %i, align 4, !dbg !20 - call void @llvm.dbg.declare(metadata %struct.I2* %i2, metadata !21, metadata !DIExpression()), !dbg !27 - %j = getelementptr inbounds %struct.I2, %struct.I2* %i2, i32 0, i32 0, !dbg !28 - %1 = load i32, i32* %j, align 4, !dbg !29 - %inc1 = add nsw i32 %1, 1, !dbg !29 - store i32 %inc1, i32* %j, align 4, !dbg !29 - call void @llvm.dbg.declare(metadata %struct.C2* %c2, metadata !30, metadata !DIExpression()), !dbg !36 - %c = getelementptr inbounds %struct.C2, %struct.C2* %c2, i32 0, i32 0, !dbg !37 - %2 = load i8, i8* %c, align 1, !dbg !38 - %inc2 = add i8 %2, 1, !dbg !38 - store i8 %inc2, i8* %c, align 1, !dbg !38 - ret void, !dbg !39 -} - -declare void @llvm.dbg.declare(metadata, metadata, metadata) - -define dso_local void @_Z3foov() !dbg !40 { -entry: - %a1.i = alloca %struct.A1, align 4 - %i1.i = alloca %struct.I1, align 4 - %c1.i = alloca %struct.C1, align 1 - %a2.i = alloca %struct.A2, align 4 - %i2.i = alloca %struct.I2, align 4 - %c2.i = alloca %struct.C2, align 1 - %a3 = alloca %struct.A3, align 4 - %i3 = alloca %struct.I3, align 4 - %c3 = alloca %struct.C3, align 1 - call void @llvm.dbg.declare(metadata %struct.A3* %a3, metadata !41, metadata !DIExpression()), !dbg !47 - %i = getelementptr inbounds %struct.A3, %struct.A3* %a3, i32 0, i32 0, !dbg !48 - %0 = load i32, i32* %i, align 4, !dbg !49 - %inc = add nsw i32 %0, 1, !dbg !49 - store i32 %inc, i32* %i, align 4, !dbg !49 - call void @llvm.dbg.declare(metadata %struct.I3* %i3, metadata !50, metadata !DIExpression()), !dbg !56 - %j = getelementptr inbounds %struct.I3, %struct.I3* %i3, i32 0, i32 0, !dbg !57 - %1 = load i32, i32* %j, align 4, !dbg !58 - %inc1 = add nsw i32 %1, 1, !dbg !58 - store i32 %inc1, i32* %j, align 4, !dbg !58 - call void @llvm.dbg.declare(metadata %struct.C3* %c3, metadata !59, metadata !DIExpression()), !dbg !64 - %c = getelementptr inbounds %struct.C3, %struct.C3* %c3, i32 0, i32 0, !dbg !65 - %2 = load i8, i8* %c, align 1, !dbg !66 - %inc2 = add i8 %2, 1, !dbg !66 - store i8 %inc2, i8* %c, align 1, !dbg !66 - call void @llvm.dbg.declare(metadata %struct.A1* %a1.i, metadata !67, metadata !DIExpression()), !dbg !73 - %i.i3 = getelementptr inbounds %struct.A1, %struct.A1* %a1.i, i32 0, i32 0, !dbg !75 - %3 = load i32, i32* %i.i3, align 4, !dbg !76 - %inc.i4 = add nsw i32 %3, 1, !dbg !76 - store i32 %inc.i4, i32* %i.i3, align 4, !dbg !76 - call void @llvm.dbg.declare(metadata %struct.I1* %i1.i, metadata !77, metadata !DIExpression()), !dbg !83 - %j.i5 = getelementptr inbounds %struct.I1, %struct.I1* %i1.i, i32 0, i32 0, !dbg !84 - %4 = load i32, i32* %j.i5, align 4, !dbg !85 - %inc1.i6 = add nsw i32 %4, 1, !dbg !85 - store i32 %inc1.i6, i32* %j.i5, align 4, !dbg !85 - call void @llvm.dbg.declare(metadata %struct.C1* %c1.i, metadata !86, metadata !DIExpression()), !dbg !91 - %c.i7 = getelementptr inbounds %struct.C1, %struct.C1* %c1.i, i32 0, i32 0, !dbg !92 - %5 = load i8, i8* %c.i7, align 1, !dbg !93 - %inc2.i8 = add i8 %5, 1, !dbg !93 - store i8 %inc2.i8, i8* %c.i7, align 1, !dbg !93 - call void @llvm.dbg.declare(metadata %struct.A2* %a2.i, metadata !12, metadata !DIExpression()), !dbg !94 - %i.i = getelementptr inbounds %struct.A2, %struct.A2* %a2.i, i32 0, i32 0, !dbg !96 - %6 = load i32, i32* %i.i, align 4, !dbg !97 - %inc.i = add nsw i32 %6, 1, !dbg !97 - store i32 %inc.i, i32* %i.i, align 4, !dbg !97 - call void @llvm.dbg.declare(metadata %struct.I2* %i2.i, metadata !21, metadata !DIExpression()), !dbg !98 - %j.i = getelementptr inbounds %struct.I2, %struct.I2* %i2.i, i32 0, i32 0, !dbg !99 - %7 = load i32, i32* %j.i, align 4, !dbg !100 - %inc1.i = add nsw i32 %7, 1, !dbg !100 - store i32 %inc1.i, i32* %j.i, align 4, !dbg !100 - call void @llvm.dbg.declare(metadata %struct.C2* %c2.i, metadata !30, metadata !DIExpression()), !dbg !101 - %c.i = getelementptr inbounds %struct.C2, %struct.C2* %c2.i, i32 0, i32 0, !dbg !102 - %8 = load i8, i8* %c.i, align 1, !dbg !103 - %inc2.i = add i8 %8, 1, !dbg !103 - store i8 %inc2.i, i8* %c.i, align 1, !dbg !103 - ret void, !dbg !104 -} - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!2, !3, !4, !5, !6} -!llvm.ident = !{!7} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) -!1 = !DIFile(filename: "test.cpp", directory: "/") -!2 = !{i32 7, !"Dwarf Version", i32 4} -!3 = !{i32 2, !"Debug Info Version", i32 3} -!4 = !{i32 1, !"wchar_size", i32 4} -!5 = !{i32 7, !"uwtable", i32 1} -!6 = !{i32 7, !"frame-pointer", i32 2} -!7 = !{!"clang version 14.0.0"} -!8 = distinct !DISubprogram(name: "not_removed", linkageName: "_Z11not_removedv", scope: !1, file: !1, line: 17, type: !9, scopeLine: 17, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !11) -!9 = !DISubroutineType(types: !10) -!10 = !{null} -!11 = !{} -!12 = !DILocalVariable(name: "a2", scope: !13, file: !1, line: 23, type: !14) -!13 = distinct !DILexicalBlock(scope: !8, file: !1, line: 20, column: 3) -!14 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "A2", scope: !8, file: !1, line: 18, size: 32, flags: DIFlagTypePassByValue, elements: !15) -!15 = !{!16} -!16 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !14, file: !1, line: 18, baseType: !17, size: 32) -!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!18 = !DILocation(line: 23, column: 8, scope: !13) -!19 = !DILocation(line: 23, column: 15, scope: !13) -!20 = !DILocation(line: 23, column: 16, scope: !13) -!21 = !DILocalVariable(name: "i2", scope: !22, file: !1, line: 25, type: !23) -!22 = distinct !DILexicalBlock(scope: !13, file: !1, line: 24, column: 5) -!23 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "I2", scope: !13, file: !1, line: 21, size: 32, flags: DIFlagTypePassByValue, elements: !24) -!24 = !{!25} -!25 = !DIDerivedType(tag: DW_TAG_member, name: "j", scope: !23, file: !1, line: 21, baseType: !26, size: 32) -!26 = !DIDerivedType(tag: DW_TAG_typedef, name: "Int2", scope: !8, file: !1, line: 19, baseType: !17) -!27 = !DILocation(line: 25, column: 10, scope: !22) -!28 = !DILocation(line: 25, column: 17, scope: !22) -!29 = !DILocation(line: 25, column: 18, scope: !22) -!30 = !DILocalVariable(name: "c2", scope: !22, file: !1, line: 26, type: !31) -!31 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "C2", scope: !13, file: !1, line: 22, size: 8, flags: DIFlagTypePassByValue, elements: !32) -!32 = !{!33} -!33 = !DIDerivedType(tag: DW_TAG_member, name: "c", scope: !31, file: !1, line: 22, baseType: !34, size: 8) -!34 = !DIDerivedType(tag: DW_TAG_typedef, name: "Char2", scope: !31, file: !1, line: 22, baseType: !35) -!35 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) -!36 = !DILocation(line: 26, column: 10, scope: !22) -!37 = !DILocation(line: 26, column: 17, scope: !22) -!38 = !DILocation(line: 26, column: 18, scope: !22) -!39 = !DILocation(line: 29, column: 1, scope: !8) -!40 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 31, type: !9, scopeLine: 31, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !11) -!41 = !DILocalVariable(name: "a3", scope: !42, file: !1, line: 38, type: !44) -!42 = distinct !DILexicalBlock(scope: !43, file: !1, line: 36, column: 5) -!43 = distinct !DILexicalBlock(scope: !40, file: !1, line: 34, column: 3) -!44 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "A3", scope: !40, file: !1, line: 32, size: 32, flags: DIFlagTypePassByValue, elements: !45) -!45 = !{!46} -!46 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !44, file: !1, line: 32, baseType: !17, size: 32) -!47 = !DILocation(line: 38, column: 10, scope: !42) -!48 = !DILocation(line: 38, column: 17, scope: !42) -!49 = !DILocation(line: 38, column: 18, scope: !42) -!50 = !DILocalVariable(name: "i3", scope: !51, file: !1, line: 40, type: !52) -!51 = distinct !DILexicalBlock(scope: !42, file: !1, line: 39, column: 7) -!52 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "I3", scope: !43, file: !1, line: 35, size: 32, flags: DIFlagTypePassByValue, elements: !53) -!53 = !{!54} -!54 = !DIDerivedType(tag: DW_TAG_member, name: "j", scope: !52, file: !1, line: 35, baseType: !55, size: 32) -!55 = !DIDerivedType(tag: DW_TAG_typedef, name: "Int3", scope: !40, file: !1, line: 33, baseType: !17) -!56 = !DILocation(line: 40, column: 12, scope: !51) -!57 = !DILocation(line: 40, column: 19, scope: !51) -!58 = !DILocation(line: 40, column: 20, scope: !51) -!59 = !DILocalVariable(name: "c3", scope: !51, file: !1, line: 41, type: !60) -!60 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "C3", scope: !42, file: !1, line: 37, size: 8, flags: DIFlagTypePassByValue, elements: !61) -!61 = !{!62} -!62 = !DIDerivedType(tag: DW_TAG_member, name: "c", scope: !60, file: !1, line: 37, baseType: !63, size: 8) -!63 = !DIDerivedType(tag: DW_TAG_typedef, name: "Char3", scope: !60, file: !1, line: 37, baseType: !35) -!64 = !DILocation(line: 41, column: 12, scope: !51) -!65 = !DILocation(line: 41, column: 19, scope: !51) -!66 = !DILocation(line: 41, column: 20, scope: !51) -!67 = !DILocalVariable(name: "a1", scope: !68, file: !1, line: 8, type: !70) -!68 = distinct !DILexicalBlock(scope: !69, file: !1, line: 5, column: 3) -!69 = distinct !DISubprogram(name: "removed", linkageName: "_Z7removedv", scope: !1, file: !1, line: 2, type: !9, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !11) -!70 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "A1", scope: !69, file: !1, line: 3, size: 32, flags: DIFlagTypePassByValue, elements: !71, identifier: "_ZTSZ7removedvE2A1") -!71 = !{!72} -!72 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !70, file: !1, line: 3, baseType: !17, size: 32) -!73 = !DILocation(line: 8, column: 8, scope: !68, inlinedAt: !74) -!74 = distinct !DILocation(line: 45, column: 3, scope: !40) -!75 = !DILocation(line: 8, column: 15, scope: !68, inlinedAt: !74) -!76 = !DILocation(line: 8, column: 16, scope: !68, inlinedAt: !74) -!77 = !DILocalVariable(name: "i1", scope: !78, file: !1, line: 10, type: !79) -!78 = distinct !DILexicalBlock(scope: !68, file: !1, line: 9, column: 5) -!79 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "I1", scope: !68, file: !1, line: 6, size: 32, flags: DIFlagTypePassByValue, elements: !80, identifier: "_ZTSZ7removedvE2I1") -!80 = !{!81} -!81 = !DIDerivedType(tag: DW_TAG_member, name: "j", scope: !79, file: !1, line: 6, baseType: !82, size: 32) -!82 = !DIDerivedType(tag: DW_TAG_typedef, name: "Int1", scope: !69, file: !1, line: 4, baseType: !17) -!83 = !DILocation(line: 10, column: 10, scope: !78, inlinedAt: !74) -!84 = !DILocation(line: 10, column: 17, scope: !78, inlinedAt: !74) -!85 = !DILocation(line: 10, column: 18, scope: !78, inlinedAt: !74) -!86 = !DILocalVariable(name: "c1", scope: !78, file: !1, line: 11, type: !87) -!87 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "C1", scope: !68, file: !1, line: 7, size: 8, flags: DIFlagTypePassByValue, elements: !88, identifier: "_ZTSZ7removedvE2C1") -!88 = !{!89} -!89 = !DIDerivedType(tag: DW_TAG_member, name: "c", scope: !87, file: !1, line: 7, baseType: !90, size: 8) -!90 = !DIDerivedType(tag: DW_TAG_typedef, name: "Char1", scope: !87, file: !1, line: 7, baseType: !35) -!91 = !DILocation(line: 11, column: 10, scope: !78, inlinedAt: !74) -!92 = !DILocation(line: 11, column: 17, scope: !78, inlinedAt: !74) -!93 = !DILocation(line: 11, column: 18, scope: !78, inlinedAt: !74) -!94 = !DILocation(line: 23, column: 8, scope: !13, inlinedAt: !95) -!95 = distinct !DILocation(line: 46, column: 3, scope: !40) -!96 = !DILocation(line: 23, column: 15, scope: !13, inlinedAt: !95) -!97 = !DILocation(line: 23, column: 16, scope: !13, inlinedAt: !95) -!98 = !DILocation(line: 25, column: 10, scope: !22, inlinedAt: !95) -!99 = !DILocation(line: 25, column: 17, scope: !22, inlinedAt: !95) -!100 = !DILocation(line: 25, column: 18, scope: !22, inlinedAt: !95) -!101 = !DILocation(line: 26, column: 10, scope: !22, inlinedAt: !95) -!102 = !DILocation(line: 26, column: 17, scope: !22, inlinedAt: !95) -!103 = !DILocation(line: 26, column: 18, scope: !22, inlinedAt: !95) -!104 = !DILocation(line: 47, column: 1, scope: !40) diff --git a/llvm/test/DebugInfo/Generic/namespace.ll b/llvm/test/DebugInfo/Generic/namespace.ll index 7b9bf9b531b8..6998e69b33d9 100644 --- a/llvm/test/DebugInfo/Generic/namespace.ll +++ b/llvm/test/DebugInfo/Generic/namespace.ll @@ -22,21 +22,15 @@ ; CHECK: DW_TAG_formal_parameter ; CHECK: NULL -; CHECK: [[FUNC_FWD:0x[0-9a-f]*]]:{{.*}}DW_TAG_subprogram -; CHECK: DW_AT_name ("func_fwd") -; CHECK-NOT: DW_AT_declaration - -; CHECK: [[I:0x[0-9a-f]*]]:{{ *}}DW_TAG_variable -; CHECK: DW_AT_name ("i") -; CHECK: [[VAR_FWD:0x[0-9a-f]*]]:{{ *}}DW_TAG_variable -; CHECK: DW_AT_name ("var_fwd") - ; CHECK: [[FOO:0x[0-9a-f]*]]:{{ *}}DW_TAG_structure_type ; CHECK: DW_AT_name ("foo") ; CHECK: DW_AT_declaration ; CHECK: [[BAR:0x[0-9a-f]*]]:{{ *}}DW_TAG_structure_type ; CHECK: DW_AT_name ("bar") +; CHECK: [[I:0x[0-9a-f]*]]:{{ *}}DW_TAG_variable +; CHECK: DW_AT_name ("i") + ; CHECK: [[BAZ:0x[0-9a-f]*]]:{{.*}}DW_TAG_typedef ; CHECK: DW_AT_name ("baz") @@ -47,6 +41,13 @@ ; CHECK: [[FUNC_DECL:0x[0-9a-f]*]]:{{.*}}DW_TAG_subprogram ; CHECK: DW_AT_name ("func_decl") ; CHECK: DW_AT_declaration + +; CHECK: [[VAR_FWD:0x[0-9a-f]*]]:{{ *}}DW_TAG_variable +; CHECK: DW_AT_name ("var_fwd") + +; CHECK: [[FUNC_FWD:0x[0-9a-f]*]]:{{.*}}DW_TAG_subprogram +; CHECK: DW_AT_name ("func_fwd") +; CHECK-NOT: DW_AT_declaration ; CHECK: NULL ; CHECK: DW_TAG_imported_module @@ -61,12 +62,6 @@ ; CHECK: DW_AT_MIPS_linkage_name ; CHECK: DW_AT_name ("func") ; CHECK: DW_TAG_formal_parameter -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_imported_module -; CHECK: DW_AT_decl_file ([[F2]]) -; CHECK: DW_AT_decl_line (23) -; CHECK: DW_AT_import {{.*}} -; CHECK: NULL ; CHECK: DW_TAG_imported_module ; CHECK: DW_AT_decl_file ([[F2:.*]]) ; CHECK: DW_AT_decl_line (26) @@ -117,10 +112,16 @@ ; CHECK: DW_AT_decl_file ([[F2]]) ; CHECK: DW_AT_decl_line (37) ; CHECK: DW_AT_import ([[FUNC_FWD]]) +; CHECK: DW_TAG_lexical_block +; CHECK: DW_TAG_imported_module +; CHECK: DW_AT_decl_file ([[F2]]) +; CHECK: DW_AT_decl_line (23) +; CHECK: DW_AT_import {{.*}} +; CHECK: NULL ; CHECK: NULL -; CHECK: DW_TAG_subprogram ; CHECK: DW_TAG_base_type +; CHECK: DW_TAG_subprogram ; CHECK: DW_TAG_imported_module ; CHECK: DW_AT_decl_file ([[F2:.*]]) ; CHECK: DW_AT_decl_line (18) From c3f0e1ea3ef802059e0c2bd60227ea2d83a910ac Mon Sep 17 00:00:00 2001 From: Zequan Wu Date: Mon, 20 Dec 2021 16:18:25 -0800 Subject: [PATCH 092/293] [LLDB][DWARF] Fix duplicate TypeSP in type list Differential Revision: https://reviews.llvm.org/D115308 --- lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 4ac6e165dda3..b90f104c4d21 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -1530,7 +1530,6 @@ TypeSP DWARFASTParserClang::UpdateSymbolContextScopeForType( return type_sp; SymbolFileDWARF *dwarf = die.GetDWARF(); - TypeList &type_list = dwarf->GetTypeList(); DWARFDIE sc_parent_die = SymbolFileDWARF::GetParentSymbolContextDIE(die); dw_tag_t sc_parent_tag = sc_parent_die.Tag(); @@ -1550,10 +1549,6 @@ TypeSP DWARFASTParserClang::UpdateSymbolContextScopeForType( if (symbol_context_scope != nullptr) type_sp->SetSymbolContextScope(symbol_context_scope); - // We are ready to put this type into the uniqued list up at the module - // level. - type_list.Insert(type_sp); - dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); return type_sp; } From 9b39737129f549ce3a17893c780bab96441ac921 Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Fri, 12 Nov 2021 15:45:56 -0800 Subject: [PATCH 093/293] [libc] add modified Eisel-Lemire for long doubles The Eisel-Lemire algorithm is an effecient way to handle converting to floating point numbers from strings, but in its base form it only supports up to 64 bit floating point numbers. This adds an implementation to handle long doubles. Reviewed By: lntue Differential Revision: https://reviews.llvm.org/D115280 --- libc/src/__support/str_to_float.h | 108 ++++++++++++++++++ libc/test/src/__support/str_to_float_test.cpp | 94 ++++++++++++++- 2 files changed, 197 insertions(+), 5 deletions(-) diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h index a7ac3e2e9a68..8defeed122a9 100644 --- a/libc/src/__support/str_to_float.h +++ b/libc/src/__support/str_to_float.h @@ -182,6 +182,114 @@ eisel_lemire(typename fputil::FPBits::UIntType mantissa, int32_t exp10, return true; } +#if !defined(LONG_DOUBLE_IS_DOUBLE) +template <> +inline bool eisel_lemire( + typename fputil::FPBits::UIntType mantissa, int32_t exp10, + typename fputil::FPBits::UIntType *outputMantissa, + uint32_t *outputExp2) { + using BitsType = typename fputil::FPBits::UIntType; + constexpr uint32_t BITS_IN_MANTISSA = sizeof(mantissa) * 8; + + // Exp10 Range + // This doesn't reach very far into the range for long doubles, since it's + // sized for doubles and their 11 exponent bits, and not for long doubles and + // their 15 exponent bits (max exponent of ~300 for double vs ~5000 for long + // double). This is a known tradeoff, and was made because a proper long + // double table would be approximately 16 times larger. This would have + // significant memory and storage costs all the time to speed up a relatively + // uncommon path. In addition the exp10_to_exp2 function only approximates + // multiplying by log(10)/log(2), and that approximation may not be accurate + // out to the full long double range. + if (exp10 < DETAILED_POWERS_OF_TEN_MIN_EXP_10 || + exp10 > DETAILED_POWERS_OF_TEN_MAX_EXP_10) { + return false; + } + + // Normalization + uint32_t clz = leading_zeroes(mantissa); + mantissa <<= clz; + + uint32_t exp2 = exp10_to_exp2(exp10) + BITS_IN_MANTISSA + + fputil::FloatProperties::EXPONENT_BIAS - clz; + + // Multiplication + const uint64_t *power_of_ten = + DETAILED_POWERS_OF_TEN[exp10 - DETAILED_POWERS_OF_TEN_MIN_EXP_10]; + + // Since the input mantissa is more than 64 bits, we have to multiply with the + // full 128 bits of the power of ten to get an approximation with the same + // number of significant bits. This means that we only get the one + // approximation, and that approximation is 256 bits long. + __uint128_t approx_upper = static_cast<__uint128_t>(high64(mantissa)) * + static_cast<__uint128_t>(power_of_ten[1]); + + __uint128_t approx_middle = static_cast<__uint128_t>(high64(mantissa)) * + static_cast<__uint128_t>(power_of_ten[0]) + + static_cast<__uint128_t>(low64(mantissa)) * + static_cast<__uint128_t>(power_of_ten[1]); + + __uint128_t approx_lower = static_cast<__uint128_t>(low64(mantissa)) * + static_cast<__uint128_t>(power_of_ten[0]); + + __uint128_t final_approx_lower = + approx_lower + (static_cast<__uint128_t>(low64(approx_middle)) << 64); + __uint128_t final_approx_upper = approx_upper + high64(approx_middle) + + (final_approx_lower < approx_lower ? 1 : 0); + + // The halfway constant is used to check if the bits that will be shifted away + // intially are all 1. For 80 bit floats this is 128 (bitstype size) - 64 + // (final mantissa size) - 3 (we shift away the last two bits separately for + // accuracy, and the most significant bit is ignored.) = 61 bits. Similarly, + // it's 12 bits for 128 bit floats in this case. + constexpr __uint128_t HALFWAY_CONSTANT = + (__uint128_t(1) << (BITS_IN_MANTISSA - + fputil::FloatProperties::MANTISSA_WIDTH - + 3)) - + 1; + + if ((final_approx_upper & HALFWAY_CONSTANT) == HALFWAY_CONSTANT && + final_approx_lower + mantissa < mantissa) { + return false; + } + + // Shifting to 65 bits for 80 bit floats and 113 bits for 128 bit floats + BitsType msb = final_approx_upper >> (BITS_IN_MANTISSA - 1); + BitsType final_mantissa = + final_approx_upper >> + (msb + BITS_IN_MANTISSA - + (fputil::FloatProperties::MANTISSA_WIDTH + 3)); + exp2 -= 1 ^ msb; // same as !msb + + // Half-way ambiguity + if (final_approx_lower == 0 && (final_approx_upper & HALFWAY_CONSTANT) == 0 && + (final_mantissa & 3) == 1) { + return false; + } + + // From 65 to 64 bits for 80 bit floats and 113 to 112 bits for 128 bit + // floats + final_mantissa += final_mantissa & 1; + final_mantissa >>= 1; + if ((final_mantissa >> + (fputil::FloatProperties::MANTISSA_WIDTH + 1)) > 0) { + final_mantissa >>= 1; + ++exp2; + } + + // The if block is equivalent to (but has fewer branches than): + // if exp2 <= 0 || exp2 >= MANTISSA_MAX { etc } + if (exp2 - 1 >= + (1 << fputil::FloatProperties::EXPONENT_WIDTH) - 2) { + return false; + } + + *outputMantissa = final_mantissa; + *outputExp2 = exp2; + return true; +} +#endif + // The nth item in POWERS_OF_TWO represents the greatest power of two less than // 10^n. This tells us how much we can safely shift without overshooting. constexpr uint8_t POWERS_OF_TWO[19] = { diff --git a/libc/test/src/__support/str_to_float_test.cpp b/libc/test/src/__support/str_to_float_test.cpp index 44f0cecec76c..e75434621157 100644 --- a/libc/test/src/__support/str_to_float_test.cpp +++ b/libc/test/src/__support/str_to_float_test.cpp @@ -175,13 +175,8 @@ TEST_F(LlvmLibcStrToFloatTest, EiselLemireFallbackStates) { // Check the fallback states for the algorithm: uint32_t float_output_mantissa = 0; uint64_t double_output_mantissa = 0; - __uint128_t too_long_mantissa = 0; uint32_t output_exp2 = 0; - // This Eisel-Lemire implementation doesn't support long doubles yet. - ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( - too_long_mantissa, 0, &too_long_mantissa, &output_exp2)); - // This number can't be evaluated by Eisel-Lemire since it's exactly 1024 away // from both of its closest floating point approximations // (12345678901234548736 and 12345678901234550784) @@ -272,3 +267,92 @@ TEST(LlvmLibcStrToFloatTest, SimpleDecimalConversionExtraTypes) { // EXPECT_EQ(outputExp2, uint32_t(1089)); // EXPECT_EQ(errno, 0); } + +#if defined(LONG_DOUBLE_IS_DOUBLE) +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat64AsLongDouble) { + eisel_lemire_test(123, 0, 0x1EC00000000000, 1029); +} +#elif defined(SPECIAL_X86_LONG_DOUBLE) +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat80Simple) { + eisel_lemire_test(123, 0, 0xf600000000000000, 16389); + eisel_lemire_test(12345678901234568192u, 0, 0xab54a98ceb1f0c00, + 16446); +} + +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat80LongerMantissa) { + eisel_lemire_test((__uint128_t(0x1234567812345678) << 64) + + __uint128_t(0x1234567812345678), + 0, 0x91a2b3c091a2b3c1, 16507); + eisel_lemire_test((__uint128_t(0x1234567812345678) << 64) + + __uint128_t(0x1234567812345678), + 300, 0xd97757de56adb65c, 17503); + eisel_lemire_test((__uint128_t(0x1234567812345678) << 64) + + __uint128_t(0x1234567812345678), + -300, 0xc30feb9a7618457d, 15510); +} + +// These tests check numbers at the edge of the DETAILED_POWERS_OF_TEN table. +// This doesn't reach very far into the range for long doubles, since it's sized +// for doubles and their 11 exponent bits, and not for long doubles and their +// 15 exponent bits. This is a known tradeoff, and was made because a proper +// long double table would be approximately 16 times longer (specifically the +// maximum exponent would need to be about 5000, leading to a 10,000 entry +// table). This would have significant memory and storage costs all the time to +// speed up a relatively uncommon path. +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat80TableLimits) { + eisel_lemire_test(1, 347, 0xd13eb46469447567, 17535); + eisel_lemire_test(1, -348, 0xfa8fd5a0081c0288, 15226); +} + +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat80Fallback) { + uint32_t outputExp2 = 0; + __uint128_t quadOutputMantissa = 0; + + // This number is halfway between two possible results, and the algorithm + // can't determine which is correct. + ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( + 12345678901234567890u, 1, &quadOutputMantissa, &outputExp2)); + + // These numbers' exponents are out of range for the current powers of ten + // table. + ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( + 1, 1000, &quadOutputMantissa, &outputExp2)); + ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( + 1, -1000, &quadOutputMantissa, &outputExp2)); +} +#else +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat128Simple) { + eisel_lemire_test(123, 0, (__uint128_t(0x1ec0000000000) << 64), + 16389); + eisel_lemire_test(12345678901234568192u, 0, + (__uint128_t(0x156a95319d63e) << 64) + + __uint128_t(0x1800000000000000), + 16446); +} + +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat128LongerMantissa) { + eisel_lemire_test( + (__uint128_t(0x1234567812345678) << 64) + __uint128_t(0x1234567812345678), + 0, (__uint128_t(0x1234567812345) << 64) + __uint128_t(0x6781234567812345), + 16507); + eisel_lemire_test( + (__uint128_t(0x1234567812345678) << 64) + __uint128_t(0x1234567812345678), + 300, + (__uint128_t(0x1b2eeafbcad5b) << 64) + __uint128_t(0x6cb8b4451dfcde19), + 17503); + eisel_lemire_test( + (__uint128_t(0x1234567812345678) << 64) + __uint128_t(0x1234567812345678), + -300, + (__uint128_t(0x1861fd734ec30) << 64) + __uint128_t(0x8afa7189f0f7595f), + 15510); +} + +TEST_F(LlvmLibcStrToFloatTest, EiselLemireFloat128Fallback) { + uint32_t outputExp2 = 0; + __uint128_t quadOutputMantissa = 0; + + ASSERT_FALSE(__llvm_libc::internal::eisel_lemire( + (__uint128_t(0x5ce0e9a56015fec5) << 64) + __uint128_t(0xaadfa328ae39b333), + 1, &quadOutputMantissa, &outputExp2)); +} +#endif From ed6c757d5c5926a72183cdd15a5b1efc54111db0 Mon Sep 17 00:00:00 2001 From: Andrew Browne Date: Wed, 22 Dec 2021 12:35:42 -0800 Subject: [PATCH 094/293] [DFSan] Add functions to print origin trace from origin id instead of address. dfsan_print_origin_id_trace dfsan_sprint_origin_id_trace Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D116184 --- .../include/sanitizer/dfsan_interface.h | 7 ++ compiler-rt/lib/dfsan/dfsan.cpp | 85 ++++++++++++++++--- compiler-rt/lib/dfsan/done_abilist.txt | 4 + .../test/dfsan/origin_id_stack_trace.c | 74 ++++++++++++++++ 4 files changed, 156 insertions(+), 14 deletions(-) create mode 100644 compiler-rt/test/dfsan/origin_id_stack_trace.c diff --git a/compiler-rt/include/sanitizer/dfsan_interface.h b/compiler-rt/include/sanitizer/dfsan_interface.h index d6209a3ea2b2..769aecfb0d70 100644 --- a/compiler-rt/include/sanitizer/dfsan_interface.h +++ b/compiler-rt/include/sanitizer/dfsan_interface.h @@ -87,6 +87,9 @@ void dfsan_weak_hook_strncmp(void *caller_pc, const char *s1, const char *s2, /// prints description at the beginning of the trace. If origin tracking is not /// on, or the address is not labeled, it prints nothing. void dfsan_print_origin_trace(const void *addr, const char *description); +/// As above, but use an origin id from dfsan_get_origin() instead of address. +/// Does not include header line with taint label and address information. +void dfsan_print_origin_id_trace(dfsan_origin origin); /// Prints the origin trace of the label at the address \p addr to a /// pre-allocated output buffer. If origin tracking is not on, or the address is @@ -124,6 +127,10 @@ void dfsan_print_origin_trace(const void *addr, const char *description); /// return value is not less than \p out_buf_size. size_t dfsan_sprint_origin_trace(const void *addr, const char *description, char *out_buf, size_t out_buf_size); +/// As above, but use an origin id from dfsan_get_origin() instead of address. +/// Does not include header line with taint label and address information. +size_t dfsan_sprint_origin_id_trace(dfsan_origin origin, char *out_buf, + size_t out_buf_size); /// Prints the stack trace leading to this call to a pre-allocated output /// buffer. diff --git a/compiler-rt/lib/dfsan/dfsan.cpp b/compiler-rt/lib/dfsan/dfsan.cpp index ce2c04df83a8..ee7221c7b9a8 100644 --- a/compiler-rt/lib/dfsan/dfsan.cpp +++ b/compiler-rt/lib/dfsan/dfsan.cpp @@ -630,22 +630,16 @@ void PrintInvalidOriginWarning(dfsan_label label, const void *address) { d.Warning(), label, address, d.Default()); } -bool PrintOriginTraceToStr(const void *addr, const char *description, - InternalScopedString *out) { - CHECK(out); - CHECK(dfsan_get_track_origins()); +void PrintInvalidOriginIdWarning(dfsan_origin origin) { Decorator d; + Printf( + " %sOrigin Id %d has invalid origin tracking. This can " + "be a DFSan bug.%s\n", + d.Warning(), origin, d.Default()); +} - const dfsan_label label = *__dfsan::shadow_for(addr); - CHECK(label); - - const dfsan_origin origin = *__dfsan::origin_for(addr); - - out->append(" %sTaint value 0x%x (at %p) origin tracking (%s)%s\n", - d.Origin(), label, addr, description ? description : "", - d.Default()); - - Origin o = Origin::FromRawId(origin); +bool PrintOriginTraceFramesToStr(Origin o, InternalScopedString *out) { + Decorator d; bool found = false; while (o.isChainedOrigin()) { @@ -668,6 +662,25 @@ bool PrintOriginTraceToStr(const void *addr, const char *description, return found; } +bool PrintOriginTraceToStr(const void *addr, const char *description, + InternalScopedString *out) { + CHECK(out); + CHECK(dfsan_get_track_origins()); + Decorator d; + + const dfsan_label label = *__dfsan::shadow_for(addr); + CHECK(label); + + const dfsan_origin origin = *__dfsan::origin_for(addr); + + out->append(" %sTaint value 0x%x (at %p) origin tracking (%s)%s\n", + d.Origin(), label, addr, description ? description : "", + d.Default()); + + Origin o = Origin::FromRawId(origin); + return PrintOriginTraceFramesToStr(o, out); +} + } // namespace extern "C" SANITIZER_INTERFACE_ATTRIBUTE void dfsan_print_origin_trace( @@ -725,6 +738,50 @@ dfsan_sprint_origin_trace(const void *addr, const char *description, return trace.length(); } +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void dfsan_print_origin_id_trace( + dfsan_origin origin) { + if (!dfsan_get_track_origins()) { + PrintNoOriginTrackingWarning(); + return; + } + Origin o = Origin::FromRawId(origin); + + InternalScopedString trace; + bool success = PrintOriginTraceFramesToStr(o, &trace); + + if (trace.length()) + Printf("%s", trace.data()); + + if (!success) + PrintInvalidOriginIdWarning(origin); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE uptr dfsan_sprint_origin_id_trace( + dfsan_origin origin, char *out_buf, uptr out_buf_size) { + CHECK(out_buf); + + if (!dfsan_get_track_origins()) { + PrintNoOriginTrackingWarning(); + return 0; + } + Origin o = Origin::FromRawId(origin); + + InternalScopedString trace; + bool success = PrintOriginTraceFramesToStr(o, &trace); + + if (!success) { + PrintInvalidOriginIdWarning(origin); + return 0; + } + + if (out_buf_size) { + internal_strncpy(out_buf, trace.data(), out_buf_size - 1); + out_buf[out_buf_size - 1] = '\0'; + } + + return trace.length(); +} + extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin dfsan_get_init_origin(const void *addr) { if (!dfsan_get_track_origins()) diff --git a/compiler-rt/lib/dfsan/done_abilist.txt b/compiler-rt/lib/dfsan/done_abilist.txt index eef7c48948cc..b944b799b152 100644 --- a/compiler-rt/lib/dfsan/done_abilist.txt +++ b/compiler-rt/lib/dfsan/done_abilist.txt @@ -30,8 +30,12 @@ fun:dfsan_flush=uninstrumented fun:dfsan_flush=discard fun:dfsan_print_origin_trace=uninstrumented fun:dfsan_print_origin_trace=discard +fun:dfsan_print_origin_id_trace=uninstrumented +fun:dfsan_print_origin_id_trace=discard fun:dfsan_sprint_origin_trace=uninstrumented fun:dfsan_sprint_origin_trace=discard +fun:dfsan_sprint_origin_id_trace=uninstrumented +fun:dfsan_sprint_origin_id_trace=discard fun:dfsan_sprint_stack_trace=uninstrumented fun:dfsan_sprint_stack_trace=discard fun:dfsan_get_origin=uninstrumented diff --git a/compiler-rt/test/dfsan/origin_id_stack_trace.c b/compiler-rt/test/dfsan/origin_id_stack_trace.c new file mode 100644 index 000000000000..5958a8a3bea2 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_id_stack_trace.c @@ -0,0 +1,74 @@ +// RUN: %clang_dfsan -gmlt -mllvm -dfsan-track-origins=1 %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out +// +// RUN: %clang_dfsan -gmlt -mllvm -dfsan-track-origins=1 -mllvm -dfsan-instrument-with-call-threshold=0 %s -o %t && \ +// RUN: %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out +// +// REQUIRES: x86_64-target-arch + +#include +#include +#include + +__attribute__((noinline)) int foo(int a, int b) { return a + b; } + +__attribute__((noinline)) void bar(int depth, void *addr, int size) { + if (depth) { + bar(depth - 1, addr, size); + } else { + dfsan_set_label(1, addr, size); + } +} + +__attribute__((noinline)) void baz(int depth, void *addr, int size) { + bar(depth, addr, size); +} + +int main(int argc, char *argv[]) { + int a = 10; + int b = 20; + baz(8, &a, sizeof(a)); + int c = foo(a, b); + dfsan_origin c_o = dfsan_get_origin(c); + dfsan_print_origin_id_trace(c_o); + // CHECK: Origin value: {{.*}}, Taint value was created at + // CHECK: #0 {{.*}} in bar.dfsan {{.*}}origin_id_stack_trace.c:[[@LINE-16]] + // CHECK-COUNT-8: #{{[0-9]+}} {{.*}} in bar.dfsan {{.*}}origin_id_stack_trace.c:[[@LINE-19]] + // CHECK: #9 {{.*}} in baz.dfsan {{.*}}origin_id_stack_trace.c:[[@LINE-13]] + + char buf[3000]; + size_t length = dfsan_sprint_origin_id_trace(c_o, buf, sizeof(buf)); + + printf("==OUTPUT==\n\n%s==EOS==\n", buf); + // CHECK: ==OUTPUT== + // CHECK: Origin value: {{.*}}, Taint value was created at + // CHECK: #0 {{.*}} in bar.dfsan {{.*}}origin_id_stack_trace.c:[[@LINE-26]] + // CHECK-COUNT-8: #{{[0-9]+}} {{.*}} in bar.dfsan {{.*}}origin_id_stack_trace.c:[[@LINE-29]] + // CHECK: #9 {{.*}} in baz.dfsan {{.*}}origin_id_stack_trace.c:[[@LINE-23]] + // CHECK: ==EOS== + + char tinybuf[20]; + size_t same_length = + dfsan_sprint_origin_id_trace(c_o, tinybuf, sizeof(tinybuf)); + + printf("==TRUNCATED OUTPUT==\n\n%s==EOS==\n", tinybuf); + // CHECK: ==TRUNCATED OUTPUT== + // CHECK: Origin value: 0x1==EOS== + + printf("Returned length: %zu\n", length); + printf("Actual length: %zu\n", strlen(buf)); + printf("Returned length with truncation: %zu\n", same_length); + + // CHECK: Returned length: [[#LEN:]] + // CHECK: Actual length: [[#LEN]] + // CHECK: Returned length with truncation: [[#LEN]] + + buf[0] = '\0'; + length = dfsan_sprint_origin_id_trace(c_o, buf, 0); + printf("Output=\"%s\"\n", buf); + printf("Returned length: %zu\n", length); + // CHECK: Output="" + // CHECK: Returned length: [[#LEN]] +} From 352945dd36c64fd68b4c73fcc393ee5828da639a Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Wed, 22 Dec 2021 11:00:37 +0100 Subject: [PATCH 095/293] [libc++] Remove unused headers from Remove unused headers from `` Spies: libcxx-commits Differential Revision: https://reviews.llvm.org/D116146 --- libcxx/include/filesystem | 23 ------------------- libcxx/src/filesystem/directory_iterator.cpp | 3 ++- libcxx/src/filesystem/filesystem_common.h | 1 + .../filesystems/convert_file_time.pass.cpp | 9 ++++---- .../directory_entry.obs/comparisons.pass.cpp | 4 ++-- .../class.directory_iterator/types.pass.cpp | 3 ++- .../filesystem_error.members.pass.cpp | 4 +++- .../path.member/path.assign/copy.pass.cpp | 3 ++- .../path.member/path.assign/move.pass.cpp | 3 ++- .../path.member/path.construct/copy.pass.cpp | 3 ++- .../path.member/path.construct/move.pass.cpp | 3 ++- .../filesystems/class.path/synop.pass.cpp | 3 ++- libcxx/test/support/filesystem_test_helper.h | 3 ++- 13 files changed, 27 insertions(+), 38 deletions(-) diff --git a/libcxx/include/filesystem b/libcxx/include/filesystem index 09d90614aa0d..6203b9a123cc 100644 --- a/libcxx/include/filesystem +++ b/libcxx/include/filesystem @@ -254,32 +254,9 @@ inline constexpr bool std::ranges::enable_view #include<__filesystem/space_info.h> #include<__filesystem/u8path.h> - -#include <__availability> -#include <__config> -#include <__debug> -#include <__ranges/enable_borrowed_range.h> -#include <__ranges/enable_view.h> -#include <__utility/forward.h> -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) -# include -# include // for quoted -#endif - #if defined(_LIBCPP_HAS_NO_FILESYSTEM_LIBRARY) # error "The Filesystem library is not supported since libc++ has been configured with LIBCXX_ENABLE_FILESYSTEM disabled" #endif diff --git a/libcxx/src/filesystem/directory_iterator.cpp b/libcxx/src/filesystem/directory_iterator.cpp index 6219ceafd36e..90b255d9877f 100644 --- a/libcxx/src/filesystem/directory_iterator.cpp +++ b/libcxx/src/filesystem/directory_iterator.cpp @@ -6,8 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "filesystem" #include "__config" +#include "filesystem" +#include "stack" #if defined(_LIBCPP_WIN32API) #define WIN32_LEAN_AND_MEAN #define NOMINMAX diff --git a/libcxx/src/filesystem/filesystem_common.h b/libcxx/src/filesystem/filesystem_common.h index a2c340e61083..717894e537d2 100644 --- a/libcxx/src/filesystem/filesystem_common.h +++ b/libcxx/src/filesystem/filesystem_common.h @@ -17,6 +17,7 @@ #include "cstdlib" #include "ctime" #include "filesystem" +#include "system_error" #if !defined(_LIBCPP_WIN32API) # include diff --git a/libcxx/test/libcxx/input.output/filesystems/convert_file_time.pass.cpp b/libcxx/test/libcxx/input.output/filesystems/convert_file_time.pass.cpp index f93737c2372d..bbeaedd51f22 100644 --- a/libcxx/test/libcxx/input.output/filesystems/convert_file_time.pass.cpp +++ b/libcxx/test/libcxx/input.output/filesystems/convert_file_time.pass.cpp @@ -14,12 +14,13 @@ // ADDITIONAL_COMPILE_FLAGS: -I %S/../../../../src/filesystem -#include +#include #include -#include -#include #include -#include +#include +#include +#include +#include #include "filesystem_common.h" diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/comparisons.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/comparisons.pass.cpp index e71d4e24664a..f7e5770a7cd7 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/comparisons.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/comparisons.pass.cpp @@ -19,10 +19,10 @@ // bool operator> (directory_entry const&) const noexcept; // bool operator>=(directory_entry const&) const noexcept; - #include "filesystem_include.h" -#include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/types.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/types.pass.cpp index 97b5d23d9dbb..403d1ae338dd 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/types.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/types.pass.cpp @@ -19,8 +19,9 @@ // typedef ... iterator_category #include "filesystem_include.h" -#include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/filesystems/class.filesystem_error/filesystem_error.members.pass.cpp b/libcxx/test/std/input.output/filesystems/class.filesystem_error/filesystem_error.members.pass.cpp index 3c0ae5d0886c..8fd23b60081b 100644 --- a/libcxx/test/std/input.output/filesystems/class.filesystem_error/filesystem_error.members.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.filesystem_error/filesystem_error.members.pass.cpp @@ -21,8 +21,10 @@ // const path& path2() const noexcept; #include "filesystem_include.h" -#include #include +#include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/copy.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/copy.pass.cpp index a7182557d3f7..9044b2f5a45e 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/copy.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/copy.pass.cpp @@ -15,8 +15,9 @@ // path& operator=(path const&); #include "filesystem_include.h" -#include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp index 69dff085e58f..0f3e4745fd53 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp @@ -17,8 +17,9 @@ // path& operator=(path&&) noexcept #include "filesystem_include.h" -#include #include +#include +#include #include "test_macros.h" #include "count_new.h" diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/copy.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/copy.pass.cpp index 31d00ffd2cd3..1155b7c1b3b5 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/copy.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/copy.pass.cpp @@ -15,8 +15,9 @@ // path(path const&) #include "filesystem_include.h" -#include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp index 40a8c54f802d..9f575ba61329 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp @@ -16,8 +16,9 @@ // path(path&&) noexcept #include "filesystem_include.h" -#include #include +#include +#include #include "test_macros.h" #include "count_new.h" diff --git a/libcxx/test/std/input.output/filesystems/class.path/synop.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/synop.pass.cpp index e9e0a990165a..ab18399fd5d6 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/synop.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/synop.pass.cpp @@ -17,8 +17,9 @@ // static constexpr value_type preferred_separator = ...; #include "filesystem_include.h" -#include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/support/filesystem_test_helper.h b/libcxx/test/support/filesystem_test_helper.h index 00f4701e881a..44a01645dc49 100644 --- a/libcxx/test/support/filesystem_test_helper.h +++ b/libcxx/test/support/filesystem_test_helper.h @@ -14,9 +14,10 @@ #endif #include +#include #include // for printf #include -#include +#include #include #include "make_string.h" From 3a5fb57393c3bc77be9e7afc2ec9d4ec3c9bbf70 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 22 Dec 2021 17:41:50 -0800 Subject: [PATCH 096/293] [ELF] Replace LazyObjFile with lazy ObjFile/BitcodeFile The new `lazy` state is the inverse of the previous `LazyObjFile::extracted`. There are many advantages: * previously when a LazyObjFile was extracted, a new ObjFile/BitcodeFile was created; now the file is reused, just with `lazy` cleared * avoid the confusing transfer of `symbols` from LazyObjFile to the new file * the `incompatible file:` diagnostic is unified with `is incompatible with` * simpler code, smaller executable (6200+ bytes smaller on x86-64) * make eager parsing feasible (for parallel section/symbol table initialization) --- lld/ELF/Driver.cpp | 6 +-- lld/ELF/InputFiles.cpp | 80 +++++++++++++------------------ lld/ELF/InputFiles.h | 47 ++++++------------ lld/ELF/LTO.cpp | 4 +- lld/ELF/Symbols.cpp | 11 +++-- lld/ELF/Symbols.h | 4 +- lld/test/ELF/lazy-arch-conflict.s | 2 +- 7 files changed, 62 insertions(+), 92 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 05cc4db5af74..7d01b7f33dec 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -90,7 +90,7 @@ bool elf::link(ArrayRef args, bool canExitEarly, archiveFiles.clear(); binaryFiles.clear(); bitcodeFiles.clear(); - lazyObjFiles.clear(); + lazyBitcodeFiles.clear(); objectFiles.clear(); sharedFiles.clear(); backwardReferences.clear(); @@ -248,7 +248,7 @@ void LinkerDriver::addFile(StringRef path, bool withLOption) { for (const std::pair &p : getArchiveMembers(mbref)) - files.push_back(make(p.first, path, p.second)); + files.push_back(createLazyFile(p.first, path, p.second)); return; } @@ -273,7 +273,7 @@ void LinkerDriver::addFile(StringRef path, bool withLOption) { case file_magic::bitcode: case file_magic::elf_relocatable: if (inLib) - files.push_back(make(mbref, "", 0)); + files.push_back(createLazyFile(mbref, "", 0)); else files.push_back(createObjectFile(mbref)); break; diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index fb1fca1c7f0f..cf8fd1dfc313 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -46,7 +46,7 @@ uint32_t InputFile::nextGroupId; std::vector elf::archiveFiles; std::vector elf::binaryFiles; std::vector elf::bitcodeFiles; -std::vector elf::lazyObjFiles; +std::vector elf::lazyBitcodeFiles; std::vector elf::objectFiles; std::vector elf::sharedFiles; @@ -186,9 +186,13 @@ template static void doParseFile(InputFile *file) { } // Lazy object file - if (auto *f = dyn_cast(file)) { - lazyObjFiles.push_back(f); - f->parse(); + if (file->lazy) { + if (auto *f = dyn_cast(file)) { + lazyBitcodeFiles.push_back(f); + f->parseLazy(); + } else { + cast>(file)->parseLazy(); + } return; } @@ -1130,15 +1134,14 @@ template void ObjFile::initializeSymbols() { // defined symbol in a .eh_frame becomes dangling symbols. if (sec == &InputSection::discarded) { Undefined und{this, name, binding, stOther, type, secIdx}; - // !ArchiveFile::parsed or LazyObjFile::extracted means that the file + // !ArchiveFile::parsed or !LazyObjFile::lazy means that the file // containing this object has not finished processing, i.e. this symbol is // a result of a lazy symbol extract. We should demote the lazy symbol to // an Undefined so that any relocations outside of the group to it will // trigger a discarded section error. if ((sym->symbolKind == Symbol::LazyArchiveKind && !cast(sym->file)->parsed) || - (sym->symbolKind == Symbol::LazyObjectKind && - cast(sym->file)->extracted)) { + (sym->symbolKind == Symbol::LazyObjectKind && !sym->file->lazy)) { sym->replace(und); // Prevent LTO from internalizing the symbol in case there is a // reference to this symbol from this file. @@ -1630,9 +1633,10 @@ static uint8_t getOsAbi(const Triple &t) { } BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName, - uint64_t offsetInArchive) + uint64_t offsetInArchive, bool lazy) : InputFile(BitcodeKind, mb) { this->archiveName = archiveName; + this->lazy = lazy; std::string path = mb.getBufferIdentifier().str(); if (config->thinLTOIndexOnly) @@ -1718,6 +1722,12 @@ template void BitcodeFile::parse() { addDependentLibrary(l, this); } +void BitcodeFile::parseLazy() { + for (const lto::InputFile::Symbol &sym : obj->symbols()) + if (!sym.isUndefined()) + symtab->addSymbol(LazyObject{*this, saver.save(sym.getName())}); +} + void BinaryFile::parse() { ArrayRef data = arrayRefFromStringRef(mb.getBuffer()); auto *section = make(this, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, @@ -1744,7 +1754,7 @@ void BinaryFile::parse() { InputFile *elf::createObjectFile(MemoryBufferRef mb, StringRef archiveName, uint64_t offsetInArchive) { if (isBitcode(mb)) - return make(mb, archiveName, offsetInArchive); + return make(mb, archiveName, offsetInArchive, /*lazy=*/false); switch (getELFKind(mb, archiveName)) { case ELF32LEKind: @@ -1760,41 +1770,20 @@ InputFile *elf::createObjectFile(MemoryBufferRef mb, StringRef archiveName, } } -void LazyObjFile::extract() { - if (extracted) - return; - extracted = true; - - InputFile *file = createObjectFile(mb, archiveName, offsetInArchive); - file->groupId = groupId; - - // Copy symbol vector so that the new InputFile doesn't have to - // insert the same defined symbols to the symbol table again. - file->symbols = std::move(symbols); +InputFile *elf::createLazyFile(MemoryBufferRef mb, StringRef archiveName, + uint64_t offsetInArchive) { + if (isBitcode(mb)) + return make(mb, archiveName, offsetInArchive, /*lazy=*/true); - parseFile(file); + auto *file = + cast(createObjectFile(mb, archiveName, offsetInArchive)); + file->lazy = true; + return file; } -template void LazyObjFile::parse() { +template void ObjFile::parseLazy() { using Elf_Sym = typename ELFT::Sym; - // A lazy object file wraps either a bitcode file or an ELF file. - if (isBitcode(this->mb)) { - std::unique_ptr obj = - CHECK(lto::InputFile::create(this->mb), this); - for (const lto::InputFile::Symbol &sym : obj->symbols()) { - if (sym.isUndefined()) - continue; - symtab->addSymbol(LazyObject{*this, saver.save(sym.getName())}); - } - return; - } - - if (getELFKind(this->mb, archiveName) != config->ekind) { - error("incompatible file: " + this->mb.getBufferIdentifier()); - return; - } - // Find a symbol table. ELFFile obj = check(ELFFile::create(mb.getBuffer())); ArrayRef sections = CHECK(obj.sections(), this); @@ -1825,16 +1814,16 @@ template void LazyObjFile::parse() { continue; sym->resolve(LazyObject{*this, sym->getName()}); - // If extracted, stop iterating because this->symbols has been transferred - // to the instantiated ObjFile. - if (extracted) + // If extracted, stop iterating because the symbol resolution has been + // done by ObjFile::parse. + if (!lazy) return; } return; } } -bool LazyObjFile::shouldExtractForCommon(const StringRef &name) { +bool InputFile::shouldExtractForCommon(StringRef name) { if (isBitcode(mb)) return isBitcodeNonCommonDef(mb, name, archiveName); @@ -1855,11 +1844,6 @@ template void BitcodeFile::parse(); template void BitcodeFile::parse(); template void BitcodeFile::parse(); -template void LazyObjFile::parse(); -template void LazyObjFile::parse(); -template void LazyObjFile::parse(); -template void LazyObjFile::parse(); - template class elf::ObjFile; template class elf::ObjFile; template class elf::ObjFile; diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index e15f8798ae1c..dd39bc397a0f 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -101,6 +101,10 @@ class InputFile { // Get filename to use for linker script processing. StringRef getNameForScript() const; + // Check if a non-common symbol should be extracted to override a common + // definition. + bool shouldExtractForCommon(StringRef name); + // If not empty, this stores the name of the archive containing this file. // We use this string for creating error messages. SmallString<0> archiveName; @@ -132,6 +136,11 @@ class InputFile { ELFKind ekind = ELFNoneKind; uint8_t osabi = 0; uint8_t abiVersion = 0; + + // True if this is a relocatable object file/bitcode file between --start-lib + // and --end-lib. + bool lazy = false; + // True if this is an argument for --just-symbols. Usually false. bool justSymbols = false; @@ -211,6 +220,7 @@ template class ObjFile : public ELFFileBase { } void parse(bool ignoreComdats = false); + void parseLazy(); StringRef getShtGroupSignature(ArrayRef sections, const Elf_Shdr &sec); @@ -294,36 +304,6 @@ template class ObjFile : public ELFFileBase { llvm::once_flag initDwarf; }; -// LazyObjFile is analogous to ArchiveFile in the sense that -// the file contains lazy symbols. The difference is that -// LazyObjFile wraps a single file instead of multiple files. -// -// This class is used for --start-lib and --end-lib options which -// instruct the linker to link object files between them with the -// archive file semantics. -class LazyObjFile : public InputFile { -public: - LazyObjFile(MemoryBufferRef m, StringRef archiveName, - uint64_t offsetInArchive) - : InputFile(LazyObjKind, m), offsetInArchive(offsetInArchive) { - this->archiveName = archiveName; - } - - static bool classof(const InputFile *f) { return f->kind() == LazyObjKind; } - - template void parse(); - void extract(); - - // Check if a non-common symbol should be extracted to override a common - // definition. - bool shouldExtractForCommon(const StringRef &name); - - bool extracted = false; - -private: - uint64_t offsetInArchive; -}; - // An ArchiveFile object represents a .a file. class ArchiveFile : public InputFile { public: @@ -354,9 +334,10 @@ class ArchiveFile : public InputFile { class BitcodeFile : public InputFile { public: BitcodeFile(MemoryBufferRef m, StringRef archiveName, - uint64_t offsetInArchive); + uint64_t offsetInArchive, bool lazy); static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; } template void parse(); + void parseLazy(); std::unique_ptr obj; }; @@ -406,6 +387,8 @@ class BinaryFile : public InputFile { InputFile *createObjectFile(MemoryBufferRef mb, StringRef archiveName = "", uint64_t offsetInArchive = 0); +InputFile *createLazyFile(MemoryBufferRef mb, StringRef archiveName, + uint64_t offsetInArchive); inline bool isBitcode(MemoryBufferRef mb) { return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; @@ -416,7 +399,7 @@ std::string replaceThinLTOSuffix(StringRef path); extern std::vector archiveFiles; extern std::vector binaryFiles; extern std::vector bitcodeFiles; -extern std::vector lazyObjFiles; +extern std::vector lazyBitcodeFiles; extern std::vector objectFiles; extern std::vector sharedFiles; diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index 46dc77a6789c..b1632b8bae8a 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -278,8 +278,8 @@ void BitcodeCompiler::add(BitcodeFile &f) { // This is needed because this is what GNU gold plugin does and we have a // distributed build system that depends on that behavior. static void thinLTOCreateEmptyIndexFiles() { - for (LazyObjFile *f : lazyObjFiles) { - if (f->extracted || !isBitcode(f->mb)) + for (BitcodeFile *f : lazyBitcodeFiles) { + if (!f->lazy) continue; std::string path = replaceThinLTOSuffix(getThinLTOOutputFile(f->getName())); std::unique_ptr os = openFile(path + ".thinlto.bc"); diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index f00d3217a6af..f6f0ad0087d7 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -256,10 +256,12 @@ void Symbol::parseSymbolVersion() { } void Symbol::extract() const { - if (auto *sym = dyn_cast(this)) + if (auto *sym = dyn_cast(this)) { cast(sym->file)->extract(sym->sym); - else - cast(this->file)->extract(); + } else if (file->lazy) { + file->lazy = false; + parseFile(file); + } } MemoryBufferRef LazyArchive::getMemberBuffer() { @@ -711,8 +713,7 @@ template void Symbol::resolveLazy(const LazyT &other) { return; } } else if (auto *loSym = dyn_cast(&other)) { - LazyObjFile *obj = cast(loSym->file); - if (obj->shouldExtractForCommon(loSym->getName())) { + if (loSym->file->shouldExtractForCommon(loSym->getName())) { replaceCommon(*this, other); return; } diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h index b0b7832135a7..beb45ec14147 100644 --- a/lld/ELF/Symbols.h +++ b/lld/ELF/Symbols.h @@ -430,7 +430,9 @@ class LazyObject : public Symbol { public: LazyObject(InputFile &file, StringRef name) : Symbol(LazyObjectKind, &file, name, llvm::ELF::STB_GLOBAL, - llvm::ELF::STV_DEFAULT, llvm::ELF::STT_NOTYPE) {} + llvm::ELF::STV_DEFAULT, llvm::ELF::STT_NOTYPE) { + isUsedInRegularObj = false; + } static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; } }; diff --git a/lld/test/ELF/lazy-arch-conflict.s b/lld/test/ELF/lazy-arch-conflict.s index 991476c5d6ec..b6b41ae1025a 100644 --- a/lld/test/ELF/lazy-arch-conflict.s +++ b/lld/test/ELF/lazy-arch-conflict.s @@ -4,4 +4,4 @@ # RUN: echo '.globl foo; foo:' | llvm-mc -filetype=obj -triple=i686-pc-linux - -o %t32.o # RUN: not ld.lld %t64.o --start-lib %t32.o --end-lib -o /dev/null 2>&1 | FileCheck %s -# CHECK: error: incompatible file: {{.*}}32.o +# CHECK: error: {{.*}}32.o is incompatible with {{.*}}64.o From 157d56550293d5b7f8fbe65456930169acf64783 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 22 Dec 2021 18:19:37 -0800 Subject: [PATCH 097/293] [docs] Update new builder instructions to starting on the staging buildmaster This adds a step in the instructions to switch to the production buildmaster at the very end (after explicit approval), and updates the early instruction to start with the builder pointed at the staging buildmaster. Differential Revision: https://reviews.llvm.org/D116129 --- llvm/docs/HowToAddABuilder.rst | 36 +++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/llvm/docs/HowToAddABuilder.rst b/llvm/docs/HowToAddABuilder.rst index 24837f7c580a..5bb82489262e 100644 --- a/llvm/docs/HowToAddABuilder.rst +++ b/llvm/docs/HowToAddABuilder.rst @@ -80,7 +80,7 @@ Here are the steps you can follow to do so: to authenticate your buildbot-worker. #. Create a buildbot-worker in context of that buildbot-worker account. Point it - to the **lab.llvm.org** port **9990** (see `Buildbot documentation, + to the **lab.llvm.org** port **9994** (see `Buildbot documentation, Creating a worker `_ for more details) by running the following command: @@ -88,12 +88,14 @@ Here are the steps you can follow to do so: .. code-block:: bash $ buildbot-worker create-worker \ - lab.llvm.org:9990 \ + lab.llvm.org:9994 \ \ - To point a worker to silent master please use lab.llvm.org:9994 instead - of lab.llvm.org:9990. + This will cause your new worker to connect to the staging buildmaster + which is silent by default. Only once a new worker is stable, and + approval from Galina has been received (see last step) should it + be pointed at the main buildmaster. #. Fill the buildbot-worker description and admin name/e-mail. Here is an example of the buildbot-worker description:: @@ -140,13 +142,25 @@ Here are the steps you can follow to do so: will let you know that your changes are applied and buildmaster is reconfigured. -#. Check the status of your buildbot-worker on the `Waterfall Display - `_ to make sure it is connected, - and the `Workers Display `_ to see if - administrator contact and worker information are correct. - -#. Wait for the first build to succeed and enjoy. - +#. Check the status of your buildbot-worker on the `Waterfall Display (Staging) + `_ to make sure it is + connected, and the `Workers Display (Staging) + `_ to see if administrator + contact and worker information are correct. + +#. At this point, you have a working builder connected to the staging + buildmaster. You can now make sure it is reliably green and keeps + up with the build queue. No notifications will be sent, so you can + keep an unstable builder connected to staging indefinitely. + +#. (Optional) Once the builder is stable on the staging buildmaster with + several days of green history, you can chose to move it to the production + buildmaster to enable developer notifications. Please email `Galina + Kistanova `_ for review and approval. + + To move a worker to production (once approved), stop your worker, edit the + buildbot.tac file to change the port number from 9994 to 9990 and start it + again. Best Practices for Configuring a Fast Builder ============================================= From ee5d5e19f99db863fc3037eb125d8cfce3b2c5d8 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 22 Dec 2021 18:21:59 -0800 Subject: [PATCH 098/293] [funcattrs] Use callsite param attributes from indirect calls when inferring access attributes Arguments to an indirect call is by definition outside the SCC, but there's no reason we can't use locally defined facts on the call site. This also has the nice effect of further simplifying the code. Differential Revision: https://reviews.llvm.org/D116118 --- llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 23 ++++------- .../Transforms/FunctionAttrs/readattrs.ll | 39 +++++++++++++++++++ 2 files changed, 46 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 516d4006894c..321d4a19a585 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -710,22 +710,13 @@ determinePointerAccessAttrs(Argument *A, if (CB.doesNotAccessMemory()) continue; - Function *F = CB.getCalledFunction(); - if (!F) { - if (CB.onlyReadsMemory()) { - IsRead = true; - continue; - } - return Attribute::None; - } - - if (CB.isArgOperand(U) && UseIndex < F->arg_size() && - SCCNodes.count(F->getArg(UseIndex))) { - // This is an argument which is part of the speculative SCC. Note that - // only operands corresponding to formal arguments of the callee can - // participate in the speculation. - break; - } + if (Function *F = CB.getCalledFunction()) + if (CB.isArgOperand(U) && UseIndex < F->arg_size() && + SCCNodes.count(F->getArg(UseIndex))) + // This is an argument which is part of the speculative SCC. Note + // that only operands corresponding to formal arguments of the callee + // can participate in the speculation. + break; // The accessors used on call site here do the right thing for calls and // invokes with operand bundles. diff --git a/llvm/test/Transforms/FunctionAttrs/readattrs.ll b/llvm/test/Transforms/FunctionAttrs/readattrs.ll index 3ca0bc438227..8ea47cf43070 100644 --- a/llvm/test/Transforms/FunctionAttrs/readattrs.ll +++ b/llvm/test/Transforms/FunctionAttrs/readattrs.ll @@ -146,3 +146,42 @@ define void @unsound_readonly(i8* %ignored, i8* %escaped_then_written) { store i8 0, i8* %addr.ld ret void } + + +; CHECK: define void @fptr_test1a(i8* nocapture readnone %p, void (i8*)* nocapture readonly %f) +define void @fptr_test1a(i8* %p, void (i8*)* %f) { + call void %f(i8* nocapture readnone %p) + ret void +} + +; CHECK: define void @fptr_test1b(i8* %p, void (i8*)* nocapture readonly %f) +define void @fptr_test1b(i8* %p, void (i8*)* %f) { + ; Can't infer readnone here because call might capture %p + call void %f(i8* readnone %p) + ret void +} + +; CHECK: define void @fptr_test1c(i8* readnone %p, void (i8*)* nocapture readonly %f) +define void @fptr_test1c(i8* %p, void (i8*)* %f) { + call void %f(i8* readnone %p) readonly + ret void +} + +; CHECK: define void @fptr_test2a(i8* nocapture readonly %p, void (i8*)* nocapture readonly %f) +define void @fptr_test2a(i8* %p, void (i8*)* %f) { + call void %f(i8* nocapture readonly %p) + ret void +} + +; CHECK: define void @fptr_test2b(i8* %p, void (i8*)* nocapture readonly %f) +define void @fptr_test2b(i8* %p, void (i8*)* %f) { + ; Can't infer readonly here because call might capture %p + call void %f(i8* readonly %p) + ret void +} + +; CHECK: define void @fptr_test2c(i8* readonly %p, void (i8*)* nocapture readonly %f) +define void @fptr_test2c(i8* %p, void (i8*)* %f) { + call void %f(i8* readonly %p) readonly + ret void +} From 28a3e7dea2abb673c3dd3c08c111a6b00875bfb3 Mon Sep 17 00:00:00 2001 From: jacquesguan Date: Fri, 17 Dec 2021 17:28:27 +0800 Subject: [PATCH 099/293] [RISCV] Override hasAndNotCompare to use more andn when have Zbb extension. Enable transform (X & Y) == Y ---> (~X & Y) == 0 and (X & Y) != Y ---> (~X & Y) != 0 when have Zbb extension to use more andn instruction. Differential Revision: https://reviews.llvm.org/D115922 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +- llvm/lib/Target/RISCV/RISCVISelLowering.h | 2 +- llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll | 6 ++---- llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll | 12 ++++-------- 4 files changed, 8 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index f33965b50459..399be7abc9e6 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1166,7 +1166,7 @@ bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { return Subtarget.hasStdExtZbb(); } -bool RISCVTargetLowering::hasAndNot(SDValue Y) const { +bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const { EVT VT = Y.getValueType(); // FIXME: Support vectors once we have tests. diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 4ccfcbc50f7c..a2d3c74ed5fd 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -315,7 +315,7 @@ class RISCVTargetLowering : public TargetLowering { bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override; bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; - bool hasAndNot(SDValue Y) const override; + bool hasAndNotCompare(SDValue Y) const override; bool shouldSinkOperands(Instruction *I, SmallVectorImpl &Ops) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT, diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll index b2994fcdaf81..452de04563f0 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll @@ -568,8 +568,7 @@ define i1 @andn_seqz_i32(i32 %a, i32 %b) nounwind { ; ; RV32ZBB-LABEL: andn_seqz_i32: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: and a0, a0, a1 -; RV32ZBB-NEXT: xor a0, a0, a1 +; RV32ZBB-NEXT: andn a0, a1, a0 ; RV32ZBB-NEXT: seqz a0, a0 ; RV32ZBB-NEXT: ret ; @@ -625,8 +624,7 @@ define i1 @andn_snez_i32(i32 %a, i32 %b) nounwind { ; ; RV32ZBB-LABEL: andn_snez_i32: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: and a0, a0, a1 -; RV32ZBB-NEXT: xor a0, a0, a1 +; RV32ZBB-NEXT: andn a0, a1, a0 ; RV32ZBB-NEXT: snez a0, a0 ; RV32ZBB-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll index be08a3ee8b1f..dc9c37b2ba6c 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll @@ -639,8 +639,7 @@ define i1 @andn_seqz_i32(i32 signext %a, i32 signext %b) nounwind { ; ; RV64ZBB-LABEL: andn_seqz_i32: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: and a0, a0, a1 -; RV64ZBB-NEXT: xor a0, a0, a1 +; RV64ZBB-NEXT: andn a0, a1, a0 ; RV64ZBB-NEXT: seqz a0, a0 ; RV64ZBB-NEXT: ret ; @@ -665,8 +664,7 @@ define i1 @andn_seqz_i64(i64 %a, i64 %b) nounwind { ; ; RV64ZBB-LABEL: andn_seqz_i64: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: and a0, a0, a1 -; RV64ZBB-NEXT: xor a0, a0, a1 +; RV64ZBB-NEXT: andn a0, a1, a0 ; RV64ZBB-NEXT: seqz a0, a0 ; RV64ZBB-NEXT: ret ; @@ -691,8 +689,7 @@ define i1 @andn_snez_i32(i32 signext %a, i32 signext %b) nounwind { ; ; RV64ZBB-LABEL: andn_snez_i32: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: and a0, a0, a1 -; RV64ZBB-NEXT: xor a0, a0, a1 +; RV64ZBB-NEXT: andn a0, a1, a0 ; RV64ZBB-NEXT: snez a0, a0 ; RV64ZBB-NEXT: ret ; @@ -717,8 +714,7 @@ define i1 @andn_snez_i64(i64 %a, i64 %b) nounwind { ; ; RV64ZBB-LABEL: andn_snez_i64: ; RV64ZBB: # %bb.0: -; RV64ZBB-NEXT: and a0, a0, a1 -; RV64ZBB-NEXT: xor a0, a0, a1 +; RV64ZBB-NEXT: andn a0, a1, a0 ; RV64ZBB-NEXT: snez a0, a0 ; RV64ZBB-NEXT: ret ; From e5b23e55a281646cd8ce85b24f5ac2d8a06cbef6 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 22 Dec 2021 19:25:22 -0800 Subject: [PATCH 100/293] Autogen a test in advance of another change --- .../sink_sideeffecting_instruction.ll | 98 +++++++++++++------ 1 file changed, 66 insertions(+), 32 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll b/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll index e1d411c7a5a8..aed7db49b5fd 100644 --- a/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll +++ b/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll @@ -1,12 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -instcombine -S < %s | FileCheck %s ; Function Attrs: noinline uwtable define dso_local i32 @foo(i32* nocapture writeonly %arg) local_unnamed_addr #0 { +; CHECK-LABEL: @foo( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[VAR:%.*]] = call i32 @baz() #[[ATTR2:[0-9]+]] +; CHECK-NEXT: store i32 [[VAR]], i32* [[ARG:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[VAR1:%.*]] = call i32 @baz() #[[ATTR2]] +; CHECK-NEXT: ret i32 [[VAR1]] +; bb: - %tmp = call i32 @baz() - store i32 %tmp, i32* %arg, align 4, !tbaa !2 - %tmp1 = call i32 @baz() - ret i32 %tmp1 + %var = call i32 @baz() + store i32 %var, i32* %arg, align 4, !tbaa !2 + %var1 = call i32 @baz() + ret i32 %var1 } declare dso_local i32 @baz() local_unnamed_addr @@ -15,19 +23,19 @@ declare dso_local i32 @baz() local_unnamed_addr ; with out-params which are unused in the caller(test8). Note that foo is ; marked noinline to prevent IPO transforms. ; int foo(); -; +; ; extern int foo(int *out) __attribute__((noinline)); ; int foo(int *out) { ; *out = baz(); ; return baz(); ; } -; +; ; int test() { -; +; ; int notdead; ; if (foo(¬dead)) ; return 0; -; +; ; int dead; ; int tmp = foo(&dead); ; if (notdead) @@ -35,45 +43,71 @@ declare dso_local i32 @baz() local_unnamed_addr ; return bar(); ; } -; TODO: We should be able to sink the second call @foo at bb5 down to bb_crit_edge +; TODO: We should be able to sink the second call @foo at bb5 down to bb_crit_edge define dso_local i32 @test() local_unnamed_addr #2 { -; CHECK-LABEL: test -; CHECK: bb5: -; CHECK: %tmp7 = call i32 @foo(i32* nonnull writeonly %tmp1) -; CHECK-NEXT: br i1 %tmp9, label %bb10, label %bb_crit_edge +; CHECK-LABEL: @test( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[VAR1:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[VAR2:%.*]] = bitcast i32* [[VAR]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[VAR2]]) +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @foo(i32* nonnull writeonly [[VAR]]) +; CHECK-NEXT: [[VAR4:%.*]] = icmp eq i32 [[VAR3]], 0 +; CHECK-NEXT: br i1 [[VAR4]], label [[BB5:%.*]], label [[BB14:%.*]] +; CHECK: bb5: +; CHECK-NEXT: [[VAR6:%.*]] = bitcast i32* [[VAR1]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[VAR6]]) +; CHECK-NEXT: [[VAR8:%.*]] = load i32, i32* [[VAR]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[VAR9:%.*]] = icmp eq i32 [[VAR8]], 0 +; CHECK-NEXT: [[VAR7:%.*]] = call i32 @foo(i32* nonnull writeonly [[VAR1]]) +; CHECK-NEXT: br i1 [[VAR9]], label [[BB10:%.*]], label [[BB_CRIT_EDGE:%.*]] +; CHECK: bb10: +; CHECK-NEXT: [[VAR11:%.*]] = call i32 @bar() +; CHECK-NEXT: br label [[BB12:%.*]] +; CHECK: bb_crit_edge: +; CHECK-NEXT: br label [[BB12]] +; CHECK: bb12: +; CHECK-NEXT: [[VAR13:%.*]] = phi i32 [ [[VAR11]], [[BB10]] ], [ [[VAR7]], [[BB_CRIT_EDGE]] ] +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[VAR6]]) +; CHECK-NEXT: br label [[BB14]] +; CHECK: bb14: +; CHECK-NEXT: [[VAR15:%.*]] = phi i32 [ [[VAR13]], [[BB12]] ], [ 0, [[BB:%.*]] ] +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[VAR2]]) +; CHECK-NEXT: ret i32 [[VAR15]] +; bb: - %tmp = alloca i32, align 4 - %tmp1 = alloca i32, align 4 - %tmp2 = bitcast i32* %tmp to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp2) #4 - %tmp3 = call i32 @foo(i32* nonnull writeonly %tmp) - %tmp4 = icmp eq i32 %tmp3, 0 - br i1 %tmp4, label %bb5, label %bb14 + %var = alloca i32, align 4 + %var1 = alloca i32, align 4 + %var2 = bitcast i32* %var to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %var2) #4 + %var3 = call i32 @foo(i32* nonnull writeonly %var) + %var4 = icmp eq i32 %var3, 0 + br i1 %var4, label %bb5, label %bb14 bb5: ; preds = %bb - %tmp6 = bitcast i32* %tmp1 to i8* - call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %tmp6) #4 - %tmp8 = load i32, i32* %tmp, align 4, !tbaa !2 - %tmp9 = icmp eq i32 %tmp8, 0 - %tmp7 = call i32 @foo(i32* nonnull writeonly %tmp1) - br i1 %tmp9, label %bb10, label %bb_crit_edge + %var6 = bitcast i32* %var1 to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %var6) #4 + %var8 = load i32, i32* %var, align 4, !tbaa !2 + %var9 = icmp eq i32 %var8, 0 + %var7 = call i32 @foo(i32* nonnull writeonly %var1) + br i1 %var9, label %bb10, label %bb_crit_edge bb10: ; preds = %bb5 - %tmp11 = call i32 @bar() + %var11 = call i32 @bar() br label %bb12 bb_crit_edge: br label %bb12 bb12: ; preds = %bb10, %bb5 - %tmp13 = phi i32 [ %tmp11, %bb10 ], [ %tmp7, %bb_crit_edge ] - call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp6) #4 + %var13 = phi i32 [ %var11, %bb10 ], [ %var7, %bb_crit_edge ] + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %var6) #4 br label %bb14 bb14: ; preds = %bb12, %bb - %tmp15 = phi i32 [ %tmp13, %bb12 ], [ 0, %bb ] - call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %tmp2) - ret i32 %tmp15 + %var15 = phi i32 [ %var13, %bb12 ], [ 0, %bb ] + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %var2) + ret i32 %var15 } declare i32 @bar() From da41cfddcad6740d2867ae0725110b77abc3a5ad Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 22 Dec 2021 19:35:42 -0800 Subject: [PATCH 101/293] Add test coverage for D109917 and variants --- .../sink_sideeffecting_instruction.ll | 128 ++++++++++++++---- 1 file changed, 105 insertions(+), 23 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll b/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll index aed7db49b5fd..791d4d9b2da8 100644 --- a/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll +++ b/llvm/test/Transforms/InstCombine/sink_sideeffecting_instruction.ll @@ -2,21 +2,21 @@ ; RUN: opt -instcombine -S < %s | FileCheck %s ; Function Attrs: noinline uwtable -define dso_local i32 @foo(i32* nocapture writeonly %arg) local_unnamed_addr #0 { +define i32 @foo(i32* nocapture writeonly %arg) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: bb: -; CHECK-NEXT: [[VAR:%.*]] = call i32 @baz() #[[ATTR2:[0-9]+]] -; CHECK-NEXT: store i32 [[VAR]], i32* [[ARG:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]] -; CHECK-NEXT: [[VAR1:%.*]] = call i32 @baz() #[[ATTR2]] +; CHECK-NEXT: [[VAR:%.*]] = call i32 @baz() +; CHECK-NEXT: store i32 [[VAR]], i32* [[ARG:%.*]], align 4 +; CHECK-NEXT: [[VAR1:%.*]] = call i32 @baz() ; CHECK-NEXT: ret i32 [[VAR1]] ; bb: %var = call i32 @baz() - store i32 %var, i32* %arg, align 4, !tbaa !2 + store i32 %var, i32* %arg, align 4 %var1 = call i32 @baz() ret i32 %var1 } -declare dso_local i32 @baz() local_unnamed_addr +declare i32 @baz() ; Function Attrs: uwtable ; This is an equivalent IR for a c-style example with a large function (foo) @@ -44,7 +44,7 @@ declare dso_local i32 @baz() local_unnamed_addr ; } ; TODO: We should be able to sink the second call @foo at bb5 down to bb_crit_edge -define dso_local i32 @test() local_unnamed_addr #2 { +define i32 @test() { ; CHECK-LABEL: @test( ; CHECK-NEXT: bb: ; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 @@ -57,7 +57,7 @@ define dso_local i32 @test() local_unnamed_addr #2 { ; CHECK: bb5: ; CHECK-NEXT: [[VAR6:%.*]] = bitcast i32* [[VAR1]] to i8* ; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[VAR6]]) -; CHECK-NEXT: [[VAR8:%.*]] = load i32, i32* [[VAR]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: [[VAR8:%.*]] = load i32, i32* [[VAR]], align 4 ; CHECK-NEXT: [[VAR9:%.*]] = icmp eq i32 [[VAR8]], 0 ; CHECK-NEXT: [[VAR7:%.*]] = call i32 @foo(i32* nonnull writeonly [[VAR1]]) ; CHECK-NEXT: br i1 [[VAR9]], label [[BB10:%.*]], label [[BB_CRIT_EDGE:%.*]] @@ -87,7 +87,7 @@ bb: bb5: ; preds = %bb %var6 = bitcast i32* %var1 to i8* call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %var6) #4 - %var8 = load i32, i32* %var, align 4, !tbaa !2 + %var8 = load i32, i32* %var, align 4 %var9 = icmp eq i32 %var8, 0 %var7 = call i32 @foo(i32* nonnull writeonly %var1) br i1 %var9, label %bb10, label %bb_crit_edge @@ -110,18 +110,100 @@ bb14: ; preds = %bb12, %bb ret i32 %var15 } +declare i32 @unknown(i32* %dest) + +define i32 @sink_to_use(i1 %c) { +; CHECK-LABEL: @sink_to_use( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i32, align 4 + %var3 = call i32 @unknown(i32* %var) argmemonly nounwind willreturn + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + ret i32 %var3 +} + +define i32 @neg_infinite_loop(i1 %c) { +; CHECK-LABEL: @neg_infinite_loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i32, align 4 + %var3 = call i32 @unknown(i32* %var) argmemonly nounwind + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + ret i32 %var3 +} + +define i32 @neg_throw(i1 %c) { +; CHECK-LABEL: @neg_throw( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR3:[0-9]+]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i32, align 4 + %var3 = call i32 @unknown(i32* %var) argmemonly willreturn + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + ret i32 %var3 +} + +define i32 @neg_unknown_write(i1 %c) { +; CHECK-LABEL: @neg_unknown_write( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[VAR3:%.*]] = call i32 @unknown(i32* nonnull [[VAR]]) #[[ATTR4:[0-9]+]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[EARLY_RETURN:%.*]], label [[USE_BLOCK:%.*]] +; CHECK: early_return: +; CHECK-NEXT: ret i32 0 +; CHECK: use_block: +; CHECK-NEXT: ret i32 [[VAR3]] +; +entry: + %var = alloca i32, align 4 + %var3 = call i32 @unknown(i32* %var) nounwind willreturn + br i1 %c, label %early_return, label %use_block + +early_return: + ret i32 0 + +use_block: + ret i32 %var3 +} + declare i32 @bar() -; Function Attrs: argmemonly nofree nosync nounwind willreturn -declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #3 - -; Function Attrs: argmemonly nofree nosync nounwind willreturn -declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #3 - -attributes #0 = { noinline uwtable argmemonly nounwind willreturn writeonly } -attributes #3 = { argmemonly nofree nosync nounwind willreturn } -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{!"clang version 10.0.0-4ubuntu1 "} -!2 = !{!3, !3, i64 0} -!3 = !{!"int", !4, i64 0} -!4 = !{!"omnipotent char", !5, i64 0} -!5 = !{!"Simple C++ TBAA"} +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) From 682d01a1c1c52bd95d3d06267d6017395770256b Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Thu, 23 Dec 2021 10:35:46 +0800 Subject: [PATCH 102/293] [X86][MS-InlineAsm] Use exact conditions to recognize MS global variables D115225 tried to roll back the effects on symbols of MS inline asm introduced by D113096. But the combination of the conditions cannot match all the changes. As a result, there are still fails after the patch. This patch fixes the problem by checking the exact conditions for MS global variables, i.e., variable (by FrontendSize != 0) + non rip/eip (by DefaultBaseReg == 0), so that we can fully roll back for D113096. Reviewed By: skan Differential Revision: https://reviews.llvm.org/D116090 --- clang/test/CodeGen/ms-inline-asm-functions.c | 2 +- llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp | 3 ++- llvm/lib/Target/X86/AsmParser/X86Operand.h | 7 +++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/test/CodeGen/ms-inline-asm-functions.c b/clang/test/CodeGen/ms-inline-asm-functions.c index c958d8803871..1a6ead9286df 100644 --- a/clang/test/CodeGen/ms-inline-asm-functions.c +++ b/clang/test/CodeGen/ms-inline-asm-functions.c @@ -39,7 +39,7 @@ int bar() { int baz() { // CHECK-LABEL: _baz: __asm mov eax, k; - // CHECK: movl k, %eax + // CHECK: movl _k, %eax __asm mov eax, kptr; // CHECK: movl _kptr, %eax } diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 6bae55695f3d..2ba0b97229cc 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1759,7 +1759,8 @@ bool X86AsmParser::CreateMemForMSInlineAsm( // registers in a mmory expression, and though unaccessible via rip/eip. if (IsGlobalLV && (BaseReg || IndexReg)) { Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start, - End, Size, Identifier, Decl)); + End, Size, Identifier, Decl, + FrontendSize)); return false; } // Otherwise, we set the base register to a non-zero value diff --git a/llvm/lib/Target/X86/AsmParser/X86Operand.h b/llvm/lib/Target/X86/AsmParser/X86Operand.h index 6d20c6af5fd2..0116c081541b 100644 --- a/llvm/lib/Target/X86/AsmParser/X86Operand.h +++ b/llvm/lib/Target/X86/AsmParser/X86Operand.h @@ -286,10 +286,9 @@ struct X86Operand final : public MCParsedAsmOperand { bool isOffsetOfLocal() const override { return isImm() && Imm.LocalRef; } bool isMemPlaceholder(const MCInstrDesc &Desc) const override { - // Add more restrictions to avoid the use of global symbols. This helps - // with reducing the code size. - return !Desc.isRematerializable() && !Desc.isCall() && isMem() && - !Mem.BaseReg && !Mem.IndexReg; + // Only MS InlineAsm uses global variables with registers rather than + // rip/eip. + return !Mem.DefaultBaseReg && Mem.FrontendSize; } bool needAddressOf() const override { return AddressOf; } From 724e6861b3d64eb74ecd98eff88955c91a200940 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Wed, 22 Dec 2021 23:42:05 +0000 Subject: [PATCH 103/293] Fix warning: "unused variable 'attrClass'" (NFC) --- mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp index ad0b05e4c47b..a2b862ea7cea 100644 --- a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp +++ b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp @@ -1144,7 +1144,6 @@ static bool emitSerializationFns(const RecordKeeper &recordKeeper, utilsString; raw_string_ostream dSerFn(dSerFnString), dDesFn(dDesFnString), serFn(serFnString), deserFn(deserFnString); - Record *attrClass = recordKeeper.getClass("Attr"); // Emit the serialization and deserialization functions simultaneously. StringRef opVar("op"); From 94a47dfde283523e5b072abc68f42368d12e271a Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Thu, 23 Dec 2021 03:59:25 +0000 Subject: [PATCH 104/293] Revert "Fix warning: "unused variable 'attrClass'" (NFC)" This reverts commit 724e6861b3d64eb74ecd98eff88955c91a200940. This broke the build. --- mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp index a2b862ea7cea..ad0b05e4c47b 100644 --- a/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp +++ b/mlir/tools/mlir-tblgen/SPIRVUtilsGen.cpp @@ -1144,6 +1144,7 @@ static bool emitSerializationFns(const RecordKeeper &recordKeeper, utilsString; raw_string_ostream dSerFn(dSerFnString), dDesFn(dDesFnString), serFn(serFnString), deserFn(deserFnString); + Record *attrClass = recordKeeper.getClass("Attr"); // Emit the serialization and deserialization functions simultaneously. StringRef opVar("op"); From a954558e878ed9e97e99036229e99af8c6b6c881 Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Thu, 23 Dec 2021 12:44:33 +0800 Subject: [PATCH 105/293] Revert "[X86][MS-InlineAsm] Use exact conditions to recognize MS global variables" This reverts commit 682d01a1c1c52bd95d3d06267d6017395770256b. Revert for buildbot fails. --- clang/test/CodeGen/ms-inline-asm-functions.c | 2 +- llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp | 3 +-- llvm/lib/Target/X86/AsmParser/X86Operand.h | 7 ++++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/test/CodeGen/ms-inline-asm-functions.c b/clang/test/CodeGen/ms-inline-asm-functions.c index 1a6ead9286df..c958d8803871 100644 --- a/clang/test/CodeGen/ms-inline-asm-functions.c +++ b/clang/test/CodeGen/ms-inline-asm-functions.c @@ -39,7 +39,7 @@ int bar() { int baz() { // CHECK-LABEL: _baz: __asm mov eax, k; - // CHECK: movl _k, %eax + // CHECK: movl k, %eax __asm mov eax, kptr; // CHECK: movl _kptr, %eax } diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 2ba0b97229cc..6bae55695f3d 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1759,8 +1759,7 @@ bool X86AsmParser::CreateMemForMSInlineAsm( // registers in a mmory expression, and though unaccessible via rip/eip. if (IsGlobalLV && (BaseReg || IndexReg)) { Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start, - End, Size, Identifier, Decl, - FrontendSize)); + End, Size, Identifier, Decl)); return false; } // Otherwise, we set the base register to a non-zero value diff --git a/llvm/lib/Target/X86/AsmParser/X86Operand.h b/llvm/lib/Target/X86/AsmParser/X86Operand.h index 0116c081541b..6d20c6af5fd2 100644 --- a/llvm/lib/Target/X86/AsmParser/X86Operand.h +++ b/llvm/lib/Target/X86/AsmParser/X86Operand.h @@ -286,9 +286,10 @@ struct X86Operand final : public MCParsedAsmOperand { bool isOffsetOfLocal() const override { return isImm() && Imm.LocalRef; } bool isMemPlaceholder(const MCInstrDesc &Desc) const override { - // Only MS InlineAsm uses global variables with registers rather than - // rip/eip. - return !Mem.DefaultBaseReg && Mem.FrontendSize; + // Add more restrictions to avoid the use of global symbols. This helps + // with reducing the code size. + return !Desc.isRematerializable() && !Desc.isCall() && isMem() && + !Mem.BaseReg && !Mem.IndexReg; } bool needAddressOf() const override { return AddressOf; } From baa3eb0dd9ac788d3decf33bb17bd552b59af19d Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 22 Dec 2021 20:51:11 -0800 Subject: [PATCH 106/293] [ELF] Change some non-null pointer parameters to references. NFC --- lld/ELF/Arch/PPC.cpp | 2 +- lld/ELF/LinkerScript.cpp | 22 ++++----- lld/ELF/LinkerScript.h | 2 +- lld/ELF/Relocations.cpp | 44 ++++++++--------- lld/ELF/SyntheticSections.cpp | 48 +++++++++--------- lld/ELF/SyntheticSections.h | 4 +- lld/ELF/Thunks.cpp | 2 +- lld/ELF/Writer.cpp | 92 +++++++++++++++++------------------ 8 files changed, 108 insertions(+), 108 deletions(-) diff --git a/lld/ELF/Arch/PPC.cpp b/lld/ELF/Arch/PPC.cpp index 3a7d3f477e02..e28b62329494 100644 --- a/lld/ELF/Arch/PPC.cpp +++ b/lld/ELF/Arch/PPC.cpp @@ -77,7 +77,7 @@ void elf::writePPC32GlinkSection(uint8_t *buf, size_t numEntries) { // non-GOT-non-PLT relocations referencing external functions for -fpie/-fPIE. uint32_t glink = in.plt->getVA(); // VA of .glink if (!config->isPic) { - for (const Symbol *sym : cast(in.plt)->canonical_plts) { + for (const Symbol *sym : cast(*in.plt).canonical_plts) { writePPC32PltCallStub(buf, sym->getGotPltVA(), nullptr, 0); buf += 16; glink += 16; diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index 38b1b9093cd5..999bb94d6416 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -560,22 +560,22 @@ LinkerScript::computeInputSections(const InputSectionDescription *cmd, return ret; } -void LinkerScript::discard(InputSectionBase *s) { - if (s == in.shStrTab || s == mainPart->relrDyn) - error("discarding " + s->name + " section is not allowed"); +void LinkerScript::discard(InputSectionBase &s) { + if (&s == in.shStrTab || &s == mainPart->relrDyn) + error("discarding " + s.name + " section is not allowed"); // You can discard .hash and .gnu.hash sections by linker scripts. Since // they are synthesized sections, we need to handle them differently than // other regular sections. - if (s == mainPart->gnuHashTab) + if (&s == mainPart->gnuHashTab) mainPart->gnuHashTab = nullptr; - if (s == mainPart->hashTab) + if (&s == mainPart->hashTab) mainPart->hashTab = nullptr; - s->markDead(); - s->parent = nullptr; - for (InputSection *ds : s->dependentSections) - discard(ds); + s.markDead(); + s.parent = nullptr; + for (InputSection *sec : s.dependentSections) + discard(*sec); } void LinkerScript::discardSynthetic(OutputSection &outCmd) { @@ -589,7 +589,7 @@ void LinkerScript::discardSynthetic(OutputSection &outCmd) { std::vector matches = computeInputSections(isd, secs); for (InputSectionBase *s : matches) - discard(s); + discard(*s); } } } @@ -618,7 +618,7 @@ void LinkerScript::processSectionCommands() { // Any input section assigned to it is discarded. if (osec->name == "/DISCARD/") { for (InputSectionBase *s : v) - discard(s); + discard(*s); discardSynthetic(*osec); osec->commands.clear(); return false; diff --git a/lld/ELF/LinkerScript.h b/lld/ELF/LinkerScript.h index 5e9a3542c426..f385c8320978 100644 --- a/lld/ELF/LinkerScript.h +++ b/lld/ELF/LinkerScript.h @@ -312,7 +312,7 @@ class LinkerScript final { bool hasPhdrsCommands() { return !phdrsCommands.empty(); } uint64_t getDot() { return dot; } - void discard(InputSectionBase *s); + void discard(InputSectionBase &s); ExprValue getSymbolValue(StringRef name, const Twine &loc); diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 79ba3194d73b..466c80d91dd6 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -833,10 +833,10 @@ class OffsetGetter { }; } // namespace -static void addRelativeReloc(InputSectionBase *isec, uint64_t offsetInSec, +static void addRelativeReloc(InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym, int64_t addend, RelExpr expr, RelType type) { - Partition &part = isec->getPartition(); + Partition &part = isec.getPartition(); // Add a relative relocation. If relrDyn section is enabled, and the // relocation offset is guaranteed to be even, add the relocation to @@ -844,9 +844,9 @@ static void addRelativeReloc(InputSectionBase *isec, uint64_t offsetInSec, // relrDyn sections don't support odd offsets. Also, relrDyn sections // don't store the addend values, so we must write it to the relocated // address. - if (part.relrDyn && isec->alignment >= 2 && offsetInSec % 2 == 0) { - isec->relocations.push_back({expr, type, offsetInSec, addend, &sym}); - part.relrDyn->relocs.push_back({isec, offsetInSec}); + if (part.relrDyn && isec.alignment >= 2 && offsetInSec % 2 == 0) { + isec.relocations.push_back({expr, type, offsetInSec, addend, &sym}); + part.relrDyn->relocs.push_back({&isec, offsetInSec}); return; } part.relaDyn->addRelativeReloc(target->relativeRel, isec, offsetInSec, sym, @@ -854,14 +854,14 @@ static void addRelativeReloc(InputSectionBase *isec, uint64_t offsetInSec, } template -static void addPltEntry(PltSection *plt, GotPltSection *gotPlt, - RelocationBaseSection *rel, RelType type, Symbol &sym) { - plt->addEntry(sym); - gotPlt->addEntry(sym); - rel->addReloc({type, gotPlt, sym.getGotPltOffset(), - sym.isPreemptible ? DynamicReloc::AgainstSymbol - : DynamicReloc::AddendOnlyWithTargetVA, - sym, 0, R_ABS}); +static void addPltEntry(PltSection &plt, GotPltSection &gotPlt, + RelocationBaseSection &rel, RelType type, Symbol &sym) { + plt.addEntry(sym); + gotPlt.addEntry(sym); + rel.addReloc({type, &gotPlt, sym.getGotPltOffset(), + sym.isPreemptible ? DynamicReloc::AgainstSymbol + : DynamicReloc::AddendOnlyWithTargetVA, + sym, 0, R_ABS}); } static void addGotEntry(Symbol &sym) { @@ -880,7 +880,7 @@ static void addGotEntry(Symbol &sym) { if (!config->isPic || isAbsolute(sym)) in.got->relocations.push_back({R_ABS, target->symbolicRel, off, 0, &sym}); else - addRelativeReloc(in.got, off, sym, 0, R_ABS, target->symbolicRel); + addRelativeReloc(*in.got, off, sym, 0, R_ABS, target->symbolicRel); } static void addTpOffsetGotEntry(Symbol &sym) { @@ -891,7 +891,7 @@ static void addTpOffsetGotEntry(Symbol &sym) { return; } mainPart->relaDyn->addAddendOnlyRelocIfNonPreemptible( - target->tlsGotRel, in.got, off, sym, target->symbolicRel); + target->tlsGotRel, *in.got, off, sym, target->symbolicRel); } // Return true if we can define a symbol in the executable that @@ -1019,7 +1019,7 @@ static void processRelocAux(InputSectionBase &sec, RelExpr expr, RelType type, if (canWrite) { RelType rel = target->getDynRel(type); if (expr == R_GOT || (rel == target->symbolicRel && !sym.isPreemptible)) { - addRelativeReloc(&sec, offset, sym, addend, expr, type); + addRelativeReloc(sec, offset, sym, addend, expr, type); return; } else if (rel != 0) { if (config->emachine == EM_MIPS && rel == target->symbolicRel) @@ -1161,7 +1161,7 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, if (in.got->addDynTlsEntry(sym)) { uint64_t off = in.got->getGlobalDynOffset(sym); mainPart->relaDyn->addAddendOnlyRelocIfNonPreemptible( - target->tlsDescRel, in.got, off, sym, target->tlsDescRel); + target->tlsDescRel, *in.got, off, sym, target->tlsDescRel); } if (expr != R_TLSDESC_CALL) c.relocations.push_back({expr, type, offset, addend, &sym}); @@ -1292,7 +1292,7 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, addTpOffsetGotEntry(sym); // R_GOT needs a relative relocation for PIC on i386 and Hexagon. if (expr == R_GOT && config->isPic && !target->usesOnlyLowPageBits(type)) - addRelativeReloc(&c, offset, sym, addend, expr, type); + addRelativeReloc(c, offset, sym, addend, expr, type); else c.relocations.push_back({expr, type, offset, addend, &sym}); } @@ -1585,7 +1585,7 @@ static bool handleNonPreemptibleIfunc(Symbol &sym) { // may alter section/value, so create a copy of the symbol to make // section/value fixed. auto *directSym = makeDefined(cast(sym)); - addPltEntry(in.iplt, in.igotPlt, in.relaIplt, target->iRelativeRel, + addPltEntry(*in.iplt, *in.igotPlt, *in.relaIplt, target->iRelativeRel, *directSym); sym.pltIndex = directSym->pltIndex; @@ -1615,7 +1615,7 @@ void elf::postScanRelocations() { if (sym.needsGot) addGotEntry(sym); if (sym.needsPlt) - addPltEntry(in.plt, in.gotPlt, in.relaPlt, target->pltRel, sym); + addPltEntry(*in.plt, *in.gotPlt, *in.relaPlt, target->pltRel, sym); if (sym.needsCopy) { if (sym.isObject()) { addCopyRelSymbol(cast(sym)); @@ -1633,7 +1633,7 @@ void elf::postScanRelocations() { // PPC32 canonical PLT entries are at the beginning of .glink cast(sym).value = in.plt->headerSize; in.plt->headerSize += 16; - cast(in.plt)->canonical_plts.push_back(&sym); + cast(*in.plt).canonical_plts.push_back(&sym); } } } @@ -2159,7 +2159,7 @@ void elf::hexagonTLSSymbolUpdate(ArrayRef outputSections) { for (Relocation &rel : isec->relocations) if (rel.sym->type == llvm::ELF::STT_TLS && rel.expr == R_PLT_PC) { if (needEntry) { - addPltEntry(in.plt, in.gotPlt, in.relaPlt, target->pltRel, + addPltEntry(*in.plt, *in.gotPlt, *in.relaPlt, target->pltRel, *sym); needEntry = false; } diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index e25401baabcb..9691bac76e44 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -1297,8 +1297,8 @@ DynamicSection::computeContents() { auto addInt = [&](int32_t tag, uint64_t val) { entries.emplace_back(tag, val); }; - auto addInSec = [&](int32_t tag, const InputSection *sec) { - entries.emplace_back(tag, sec->getVA()); + auto addInSec = [&](int32_t tag, const InputSection &sec) { + entries.emplace_back(tag, sec.getVA()); }; for (StringRef s : config->filterList) @@ -1374,7 +1374,7 @@ DynamicSection::computeContents() { if (part.relaDyn->isNeeded() || (in.relaIplt->isNeeded() && part.relaDyn->getParent() == in.relaIplt->getParent())) { - addInSec(part.relaDyn->dynamicTag, part.relaDyn); + addInSec(part.relaDyn->dynamicTag, *part.relaDyn); entries.emplace_back(part.relaDyn->sizeDynamicTag, addRelaSz(part.relaDyn)); bool isRela = config->isRela; @@ -1392,7 +1392,7 @@ DynamicSection::computeContents() { } if (part.relrDyn && !part.relrDyn->relocs.empty()) { addInSec(config->useAndroidRelrTags ? DT_ANDROID_RELR : DT_RELR, - part.relrDyn); + *part.relrDyn); addInt(config->useAndroidRelrTags ? DT_ANDROID_RELRSZ : DT_RELRSZ, part.relrDyn->getParent()->size); addInt(config->useAndroidRelrTags ? DT_ANDROID_RELRENT : DT_RELRENT, @@ -1405,14 +1405,14 @@ DynamicSection::computeContents() { // case, so here we always use relaPlt as marker for the beginning of // .rel[a].plt section. if (isMain && (in.relaPlt->isNeeded() || in.relaIplt->isNeeded())) { - addInSec(DT_JMPREL, in.relaPlt); + addInSec(DT_JMPREL, *in.relaPlt); entries.emplace_back(DT_PLTRELSZ, addPltRelSz()); switch (config->emachine) { case EM_MIPS: - addInSec(DT_MIPS_PLTGOT, in.gotPlt); + addInSec(DT_MIPS_PLTGOT, *in.gotPlt); break; case EM_SPARCV9: - addInSec(DT_PLTGOT, in.plt); + addInSec(DT_PLTGOT, *in.plt); break; case EM_AARCH64: if (llvm::find_if(in.relaPlt->relocs, [](const DynamicReloc &r) { @@ -1422,7 +1422,7 @@ DynamicSection::computeContents() { addInt(DT_AARCH64_VARIANT_PCS, 0); LLVM_FALLTHROUGH; default: - addInSec(DT_PLTGOT, in.gotPlt); + addInSec(DT_PLTGOT, *in.gotPlt); break; } addInt(DT_PLTREL, config->isRela ? DT_RELA : DT_REL); @@ -1435,16 +1435,16 @@ DynamicSection::computeContents() { addInt(DT_AARCH64_PAC_PLT, 0); } - addInSec(DT_SYMTAB, part.dynSymTab); + addInSec(DT_SYMTAB, *part.dynSymTab); addInt(DT_SYMENT, sizeof(Elf_Sym)); - addInSec(DT_STRTAB, part.dynStrTab); + addInSec(DT_STRTAB, *part.dynStrTab); addInt(DT_STRSZ, part.dynStrTab->getSize()); if (!config->zText) addInt(DT_TEXTREL, 0); if (part.gnuHashTab) - addInSec(DT_GNU_HASH, part.gnuHashTab); + addInSec(DT_GNU_HASH, *part.gnuHashTab); if (part.hashTab) - addInSec(DT_HASH, part.hashTab); + addInSec(DT_HASH, *part.hashTab); if (isMain) { if (Out::preinitArray) { @@ -1469,13 +1469,13 @@ DynamicSection::computeContents() { } if (part.verSym && part.verSym->isNeeded()) - addInSec(DT_VERSYM, part.verSym); + addInSec(DT_VERSYM, *part.verSym); if (part.verDef && part.verDef->isLive()) { - addInSec(DT_VERDEF, part.verDef); + addInSec(DT_VERDEF, *part.verDef); addInt(DT_VERDEFNUM, getVerDefNum()); } if (part.verNeed && part.verNeed->isNeeded()) { - addInSec(DT_VERNEED, part.verNeed); + addInSec(DT_VERNEED, *part.verNeed); unsigned needNum = 0; for (SharedFile *f : sharedFiles) if (!f->vernauxs.empty()) @@ -1494,10 +1494,10 @@ DynamicSection::computeContents() { addInt(DT_MIPS_GOTSYM, b->dynsymIndex); else addInt(DT_MIPS_GOTSYM, part.dynSymTab->getNumSymbols()); - addInSec(DT_PLTGOT, in.mipsGot); + addInSec(DT_PLTGOT, *in.mipsGot); if (in.mipsRldMap) { if (!config->pie) - addInSec(DT_MIPS_RLD_MAP, in.mipsRldMap); + addInSec(DT_MIPS_RLD_MAP, *in.mipsRldMap); // Store the offset to the .rld_map section // relative to the address of the tag. addInt(DT_MIPS_RLD_MAP_REL, @@ -1508,7 +1508,7 @@ DynamicSection::computeContents() { // DT_PPC_GOT indicates to glibc Secure PLT is used. If DT_PPC_GOT is absent, // glibc assumes the old-style BSS PLT layout which we don't support. if (config->emachine == EM_PPC) - addInSec(DT_PPC_GOT, in.got); + addInSec(DT_PPC_GOT, *in.got); // Glink dynamic tag is required by the V2 abi if the plt section isn't empty. if (config->emachine == EM_PPC64 && in.plt->isNeeded()) { @@ -1582,7 +1582,7 @@ void RelocationBaseSection::addSymbolReloc(RelType dynType, } void RelocationBaseSection::addRelativeReloc( - RelType dynType, InputSectionBase *inputSec, uint64_t offsetInSec, + RelType dynType, InputSectionBase &inputSec, uint64_t offsetInSec, Symbol &sym, int64_t addend, RelType addendRelType, RelExpr expr) { // This function should only be called for non-preemptible symbols or // RelExpr values that refer to an address inside the output file (e.g. the @@ -1590,19 +1590,19 @@ void RelocationBaseSection::addRelativeReloc( assert((!sym.isPreemptible || expr == R_GOT) && "cannot add relative relocation against preemptible symbol"); assert(expr != R_ADDEND && "expected non-addend relocation expression"); - addReloc(DynamicReloc::AddendOnlyWithTargetVA, dynType, inputSec, offsetInSec, - sym, addend, expr, addendRelType); + addReloc(DynamicReloc::AddendOnlyWithTargetVA, dynType, &inputSec, + offsetInSec, sym, addend, expr, addendRelType); } void RelocationBaseSection::addAddendOnlyRelocIfNonPreemptible( - RelType dynType, InputSectionBase *isec, uint64_t offsetInSec, Symbol &sym, + RelType dynType, InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym, RelType addendRelType) { // No need to write an addend to the section for preemptible symbols. if (sym.isPreemptible) - addReloc({dynType, isec, offsetInSec, DynamicReloc::AgainstSymbol, sym, 0, + addReloc({dynType, &isec, offsetInSec, DynamicReloc::AgainstSymbol, sym, 0, R_ABS}); else - addReloc(DynamicReloc::AddendOnlyWithTargetVA, dynType, isec, offsetInSec, + addReloc(DynamicReloc::AddendOnlyWithTargetVA, dynType, &isec, offsetInSec, sym, 0, R_ABS, addendRelType); } diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index 04a3b9b394af..8953601da390 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -524,13 +524,13 @@ class RelocationBaseSection : public SyntheticSection { llvm::Optional addendRelType = llvm::None); /// Add a relative dynamic relocation that uses the target address of \p sym /// (i.e. InputSection::getRelocTargetVA()) + \p addend as the addend. - void addRelativeReloc(RelType dynType, InputSectionBase *isec, + void addRelativeReloc(RelType dynType, InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym, int64_t addend, RelType addendRelType, RelExpr expr); /// Add a dynamic relocation using the target address of \p sym as the addend /// if \p sym is non-preemptible. Otherwise add a relocation against \p sym. void addAddendOnlyRelocIfNonPreemptible(RelType dynType, - InputSectionBase *isec, + InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym, RelType addendRelType); void addReloc(DynamicReloc::Kind kind, RelType dynType, diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp index ffbc8d94a800..85a0a67d613c 100644 --- a/lld/ELF/Thunks.cpp +++ b/lld/ELF/Thunks.cpp @@ -384,7 +384,7 @@ class PPC64PILongBranchThunk final : public PPC64LongBranchThunk { if (Optional index = in.ppc64LongBranchTarget->addEntry(&dest, addend)) { mainPart->relaDyn->addRelativeReloc( - target->relativeRel, in.ppc64LongBranchTarget, *index * UINT64_C(8), + target->relativeRel, *in.ppc64LongBranchTarget, *index * UINT64_C(8), dest, addend + getPPC64GlobalEntryToLocalEntryOffset(dest.stOther), target->symbolicRel, R_ABS); } diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 9390027487ed..bce524811b84 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -297,7 +297,7 @@ template void elf::createSyntheticSections() { } } - auto add = [](SyntheticSection *sec) { inputSections.push_back(sec); }; + auto add = [](SyntheticSection &sec) { inputSections.push_back(&sec); }; in.shStrTab = make(".shstrtab", false); @@ -311,7 +311,7 @@ template void elf::createSyntheticSections() { } in.bss = make(".bss", 0, 1); - add(in.bss); + add(*in.bss); // If there is a SECTIONS command and a .data.rel.ro section name use name // .data.rel.ro.bss so that we match in the .data.rel.ro output section. @@ -320,42 +320,42 @@ template void elf::createSyntheticSections() { script->hasSectionsCommand && findSection(".data.rel.ro", 0); in.bssRelRo = make(hasDataRelRo ? ".data.rel.ro.bss" : ".bss.rel.ro", 0, 1); - add(in.bssRelRo); + add(*in.bssRelRo); // Add MIPS-specific sections. if (config->emachine == EM_MIPS) { if (!config->shared && config->hasDynSymTab) { in.mipsRldMap = make(); - add(in.mipsRldMap); + add(*in.mipsRldMap); } if (auto *sec = MipsAbiFlagsSection::create()) - add(sec); + add(*sec); if (auto *sec = MipsOptionsSection::create()) - add(sec); + add(*sec); if (auto *sec = MipsReginfoSection::create()) - add(sec); + add(*sec); } StringRef relaDynName = config->isRela ? ".rela.dyn" : ".rel.dyn"; for (Partition &part : partitions) { - auto add = [&](SyntheticSection *sec) { - sec->partition = part.getNumber(); - inputSections.push_back(sec); + auto add = [&](SyntheticSection &sec) { + sec.partition = part.getNumber(); + inputSections.push_back(&sec); }; if (!part.name.empty()) { part.elfHeader = make>(); part.elfHeader->name = part.name; - add(part.elfHeader); + add(*part.elfHeader); part.programHeaders = make>(); - add(part.programHeaders); + add(*part.programHeaders); } if (config->buildId != BuildIdKind::None) { part.buildId = make(); - add(part.buildId); + add(*part.buildId); } part.dynStrTab = make(".dynstr", true); @@ -368,53 +368,53 @@ template void elf::createSyntheticSections() { make>(relaDynName, config->zCombreloc); if (config->hasDynSymTab) { - add(part.dynSymTab); + add(*part.dynSymTab); part.verSym = make(); - add(part.verSym); + add(*part.verSym); if (!namedVersionDefs().empty()) { part.verDef = make(); - add(part.verDef); + add(*part.verDef); } part.verNeed = make>(); - add(part.verNeed); + add(*part.verNeed); if (config->gnuHash) { part.gnuHashTab = make(); - add(part.gnuHashTab); + add(*part.gnuHashTab); } if (config->sysvHash) { part.hashTab = make(); - add(part.hashTab); + add(*part.hashTab); } - add(part.dynamic); - add(part.dynStrTab); - add(part.relaDyn); + add(*part.dynamic); + add(*part.dynStrTab); + add(*part.relaDyn); } if (config->relrPackDynRelocs) { part.relrDyn = make>(); - add(part.relrDyn); + add(*part.relrDyn); } if (!config->relocatable) { if (config->ehFrameHdr) { part.ehFrameHdr = make(); - add(part.ehFrameHdr); + add(*part.ehFrameHdr); } part.ehFrame = make(); - add(part.ehFrame); + add(*part.ehFrame); } if (config->emachine == EM_ARM && !config->relocatable) { // The ARMExidxsyntheticsection replaces all the individual .ARM.exidx // InputSections. part.armExidx = make(); - add(part.armExidx); + add(*part.armExidx); } } @@ -424,39 +424,39 @@ template void elf::createSyntheticSections() { // special handling (see createPhdrs() and combineEhSections()). in.partEnd = make(".part.end", config->maxPageSize, 1); in.partEnd->partition = 255; - add(in.partEnd); + add(*in.partEnd); in.partIndex = make(); addOptionalRegular("__part_index_begin", in.partIndex, 0); addOptionalRegular("__part_index_end", in.partIndex, in.partIndex->getSize()); - add(in.partIndex); + add(*in.partIndex); } // Add .got. MIPS' .got is so different from the other archs, // it has its own class. if (config->emachine == EM_MIPS) { in.mipsGot = make(); - add(in.mipsGot); + add(*in.mipsGot); } else { in.got = make(); - add(in.got); + add(*in.got); } if (config->emachine == EM_PPC) { in.ppc32Got2 = make(); - add(in.ppc32Got2); + add(*in.ppc32Got2); } if (config->emachine == EM_PPC64) { in.ppc64LongBranchTarget = make(); - add(in.ppc64LongBranchTarget); + add(*in.ppc64LongBranchTarget); } in.gotPlt = make(); - add(in.gotPlt); + add(*in.gotPlt); in.igotPlt = make(); - add(in.igotPlt); + add(*in.igotPlt); // _GLOBAL_OFFSET_TABLE_ is defined relative to either .got.plt or .got. Treat // it as a relocation and ensure the referenced section is created. @@ -468,13 +468,13 @@ template void elf::createSyntheticSections() { } if (config->gdbIndex) - add(GdbIndexSection::create()); + add(*GdbIndexSection::create()); // We always need to add rel[a].plt to output if it has entries. // Even for static linking it can contain R_[*]_IRELATIVE relocations. in.relaPlt = make>( config->isRela ? ".rela.plt" : ".rel.plt", /*sort=*/false); - add(in.relaPlt); + add(*in.relaPlt); // The relaIplt immediately follows .rel[a].dyn to ensure that the IRelative // relocations are processed last by the dynamic loader. We cannot place the @@ -485,22 +485,22 @@ template void elf::createSyntheticSections() { in.relaIplt = make>( config->androidPackDynRelocs ? in.relaPlt->name : relaDynName, /*sort=*/false); - add(in.relaIplt); + add(*in.relaIplt); if ((config->emachine == EM_386 || config->emachine == EM_X86_64) && (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT)) { in.ibtPlt = make(); - add(in.ibtPlt); + add(*in.ibtPlt); } in.plt = config->emachine == EM_PPC ? make() : make(); - add(in.plt); + add(*in.plt); in.iplt = make(); - add(in.iplt); + add(*in.iplt); if (config->andFeatures) - add(make()); + add(*make()); // .note.GNU-stack is always added when we are creating a re-linkable // object file. Other linkers are using the presence of this marker @@ -508,15 +508,15 @@ template void elf::createSyntheticSections() { // is irrelevant these days. Stack area should always be non-executable // by default. So we emit this section unconditionally. if (config->relocatable) - add(make()); + add(*make()); if (in.symTab) - add(in.symTab); + add(*in.symTab); if (in.symTabShndx) - add(in.symTabShndx); - add(in.shStrTab); + add(*in.symTabShndx); + add(*in.shStrTab); if (in.strTab) - add(in.strTab); + add(*in.strTab); } // The main function of the writer. From 5c75cc51b339e9c742ff5d1e88e330cdec88c983 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 22 Dec 2021 21:09:57 -0800 Subject: [PATCH 107/293] [ELF] Change nonnull pointer parameters to references. NFC --- lld/ELF/Relocations.cpp | 20 ++++++++++---------- lld/ELF/SyntheticSections.cpp | 12 ++++++------ lld/ELF/SyntheticSections.h | 2 +- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 466c80d91dd6..c4438be0cb59 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -295,12 +295,12 @@ static SmallSet getSymbolsAt(SharedSymbol &ss) { // in .bss and in the case of a canonical plt entry it is in .plt. This function // replaces the existing symbol with a Defined pointing to the appropriate // location. -static void replaceWithDefined(Symbol &sym, SectionBase *sec, uint64_t value, +static void replaceWithDefined(Symbol &sym, SectionBase &sec, uint64_t value, uint64_t size) { Symbol old = sym; sym.replace(Defined{sym.file, sym.getName(), sym.binding, sym.stOther, - sym.type, value, size, sec}); + sym.type, value, size, &sec}); sym.pltIndex = old.pltIndex; sym.gotIndex = old.gotIndex; @@ -379,9 +379,9 @@ template static void addCopyRelSymbolImpl(SharedSymbol &ss) { // dynamic symbol for each one. This causes the copy relocation to correctly // interpose any aliases. for (SharedSymbol *sym : getSymbolsAt(ss)) - replaceWithDefined(*sym, sec, 0, sym->size); + replaceWithDefined(*sym, *sec, 0, sym->size); - mainPart->relaDyn->addSymbolReloc(target->copyRel, sec, 0, ss); + mainPart->relaDyn->addSymbolReloc(target->copyRel, *sec, 0, ss); } static void addCopyRelSymbol(SharedSymbol &ss) { @@ -1024,7 +1024,7 @@ static void processRelocAux(InputSectionBase &sec, RelExpr expr, RelType type, } else if (rel != 0) { if (config->emachine == EM_MIPS && rel == target->symbolicRel) rel = target->relativeRel; - sec.getPartition().relaDyn->addSymbolReloc(rel, &sec, offset, sym, addend, + sec.getPartition().relaDyn->addSymbolReloc(rel, sec, offset, sym, addend, type); // MIPS ABI turns using of GOT and dynamic relocations inside out. @@ -1244,14 +1244,14 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, in.got->relocations.push_back( {R_ADDEND, target->symbolicRel, off, 1, &sym}); else - mainPart->relaDyn->addSymbolReloc(target->tlsModuleIndexRel, in.got, + mainPart->relaDyn->addSymbolReloc(target->tlsModuleIndexRel, *in.got, off, sym); // If the symbol is preemptible we need the dynamic linker to write // the offset too. uint64_t offsetOff = off + config->wordsize; if (sym.isPreemptible) - mainPart->relaDyn->addSymbolReloc(target->tlsOffsetRel, in.got, + mainPart->relaDyn->addSymbolReloc(target->tlsOffsetRel, *in.got, offsetOff, sym); else in.got->relocations.push_back( @@ -1269,7 +1269,7 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, addend, &sym}); if (!sym.isInGot()) { in.got->addEntry(sym); - mainPart->relaDyn->addSymbolReloc(target->tlsGotRel, in.got, + mainPart->relaDyn->addSymbolReloc(target->tlsGotRel, *in.got, sym.getGotOffset(), sym); } } else { @@ -1430,7 +1430,7 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, // direct relocation on through. if (sym.isGnuIFunc() && config->zIfuncNoplt) { sym.exportDynamic = true; - mainPart->relaDyn->addSymbolReloc(type, &sec, offset, sym, addend, type); + mainPart->relaDyn->addSymbolReloc(type, sec, offset, sym, addend, type); return; } @@ -1626,7 +1626,7 @@ void elf::postScanRelocations() { assert(sym.isFunc() && sym.needsPlt); if (!sym.isDefined()) { replaceWithDefined( - sym, in.plt, + sym, *in.plt, target->pltHeaderSize + target->pltEntrySize * sym.pltIndex, 0); sym.needsCopy = true; if (config->emachine == EM_PPC) { diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 9691bac76e44..def06da2bcbc 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -1007,14 +1007,14 @@ void MipsGotSection::build() { // thread-locals that have been marked as local through a linker script) if (!s->isPreemptible && !config->shared) continue; - mainPart->relaDyn->addSymbolReloc(target->tlsModuleIndexRel, this, + mainPart->relaDyn->addSymbolReloc(target->tlsModuleIndexRel, *this, offset, *s); // However, we can skip writing the TLS offset reloc for non-preemptible // symbols since it is known even in shared libraries if (!s->isPreemptible) continue; offset += config->wordsize; - mainPart->relaDyn->addSymbolReloc(target->tlsOffsetRel, this, offset, + mainPart->relaDyn->addSymbolReloc(target->tlsOffsetRel, *this, offset, *s); } } @@ -1027,7 +1027,7 @@ void MipsGotSection::build() { // Dynamic relocations for "global" entries. for (const std::pair &p : got.global) { uint64_t offset = p.second * config->wordsize; - mainPart->relaDyn->addSymbolReloc(target->relativeRel, this, offset, + mainPart->relaDyn->addSymbolReloc(target->relativeRel, *this, offset, *p.first); } if (!config->isPic) @@ -1573,12 +1573,12 @@ RelocationBaseSection::RelocationBaseSection(StringRef name, uint32_t type, dynamicTag(dynamicTag), sizeDynamicTag(sizeDynamicTag) {} void RelocationBaseSection::addSymbolReloc(RelType dynType, - InputSectionBase *isec, + InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym, int64_t addend, Optional addendRelType) { - addReloc(DynamicReloc::AgainstSymbol, dynType, isec, offsetInSec, sym, addend, - R_ADDEND, addendRelType ? *addendRelType : target->noneRel); + addReloc(DynamicReloc::AgainstSymbol, dynType, &isec, offsetInSec, sym, + addend, R_ADDEND, addendRelType ? *addendRelType : target->noneRel); } void RelocationBaseSection::addRelativeReloc( diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index 8953601da390..e49f2e324536 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -519,7 +519,7 @@ class RelocationBaseSection : public SyntheticSection { /// using relocations on the input section (e.g. MipsGotSection::writeTo()). void addReloc(const DynamicReloc &reloc); /// Add a dynamic relocation against \p sym with an optional addend. - void addSymbolReloc(RelType dynType, InputSectionBase *isec, + void addSymbolReloc(RelType dynType, InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym, int64_t addend = 0, llvm::Optional addendRelType = llvm::None); /// Add a relative dynamic relocation that uses the target address of \p sym From d019de23a1d761225fdaf0c47394ba58143aea9a Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 22 Dec 2021 21:11:25 -0800 Subject: [PATCH 108/293] [ELF] Make InStruct members unique_ptr and remove associate make See D116143 for benefits. My lld executable (x86-64) is 24+KiB smaller. --- lld/ELF/InputFiles.cpp | 8 ++--- lld/ELF/LinkerScript.cpp | 2 +- lld/ELF/Relocations.cpp | 8 ++--- lld/ELF/SyntheticSections.cpp | 9 ++--- lld/ELF/SyntheticSections.h | 42 +++++++++++----------- lld/ELF/Writer.cpp | 66 +++++++++++++++++++---------------- 6 files changed, 71 insertions(+), 64 deletions(-) diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index cf8fd1dfc313..cf7c1c228b43 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -878,8 +878,8 @@ InputSectionBase *ObjFile::createInputSection(uint32_t idx, // to work. In a full implementation we would merge all attribute // sections. if (in.attributes == nullptr) { - in.attributes = make(*this, sec, name); - return in.attributes; + in.attributes = std::make_unique(*this, sec, name); + return in.attributes.get(); } return &InputSection::discarded; } @@ -900,8 +900,8 @@ InputSectionBase *ObjFile::createInputSection(uint32_t idx, // standard extensions to enable. In a full implementation we would merge // all attribute sections. if (in.attributes == nullptr) { - in.attributes = make(*this, sec, name); - return in.attributes; + in.attributes = std::make_unique(*this, sec, name); + return in.attributes.get(); } return &InputSection::discarded; } diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index 999bb94d6416..a362dd3809d0 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -561,7 +561,7 @@ LinkerScript::computeInputSections(const InputSectionDescription *cmd, } void LinkerScript::discard(InputSectionBase &s) { - if (&s == in.shStrTab || &s == mainPart->relrDyn) + if (&s == in.shStrTab.get() || &s == mainPart->relrDyn) error("discarding " + s.name + " section is not allowed"); // You can discard .hash and .gnu.hash sections by linker scripts. Since diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index c4438be0cb59..d6f7d0a935c8 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -870,7 +870,7 @@ static void addGotEntry(Symbol &sym) { // If preemptible, emit a GLOB_DAT relocation. if (sym.isPreemptible) { - mainPart->relaDyn->addReloc({target->gotRel, in.got, off, + mainPart->relaDyn->addReloc({target->gotRel, in.got.get(), off, DynamicReloc::AgainstSymbol, sym, 0, R_ABS}); return; } @@ -1205,8 +1205,8 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, in.got->relocations.push_back( {R_ADDEND, target->symbolicRel, in.got->getTlsIndexOff(), 1, &sym}); else - mainPart->relaDyn->addReloc( - {target->tlsModuleIndexRel, in.got, in.got->getTlsIndexOff()}); + mainPart->relaDyn->addReloc({target->tlsModuleIndexRel, in.got.get(), + in.got->getTlsIndexOff()}); } c.relocations.push_back({expr, type, offset, addend, &sym}); return 1; @@ -1592,7 +1592,7 @@ static bool handleNonPreemptibleIfunc(Symbol &sym) { if (sym.hasDirectReloc) { // Change the value to the IPLT and redirect all references to it. auto &d = cast(sym); - d.section = in.iplt; + d.section = in.iplt.get(); d.value = sym.pltIndex * target->ipltEntrySize; d.size = 0; // It's important to set the symbol type here so that dynamic loaders diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index def06da2bcbc..047546e28ed0 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -1636,11 +1636,11 @@ void RelocationBaseSection::finalizeContents() { else getParent()->link = 0; - if (in.relaPlt == this && in.gotPlt->getParent()) { + if (in.relaPlt.get() == this && in.gotPlt->getParent()) { getParent()->flags |= ELF::SHF_INFO_LINK; getParent()->info = in.gotPlt->getParent()->sectionIndex; } - if (in.relaIplt == this && in.igotPlt->getParent()) { + if (in.relaIplt.get() == this && in.igotPlt->getParent()) { getParent()->flags |= ELF::SHF_INFO_LINK; getParent()->info = in.igotPlt->getParent()->sectionIndex; } @@ -3801,8 +3801,9 @@ void PartitionIndexSection::writeTo(uint8_t *buf) { write32(buf, mainPart->dynStrTab->getVA() + partitions[i].nameStrTab - va); write32(buf + 4, partitions[i].elfHeader->getVA() - (va + 4)); - SyntheticSection *next = - i == partitions.size() - 1 ? in.partEnd : partitions[i + 1].elfHeader; + SyntheticSection *next = i == partitions.size() - 1 + ? in.partEnd.get() + : partitions[i + 1].elfHeader; write32(buf + 8, next->getVA() - partitions[i].elfHeader->getVA()); va += 12; diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index e49f2e324536..15b5657f8037 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -1235,27 +1235,27 @@ inline Partition &SectionBase::getPartition() const { // Linker generated sections which can be used as inputs and are not specific to // a partition. struct InStruct { - InputSection *attributes; - BssSection *bss; - BssSection *bssRelRo; - GotSection *got; - GotPltSection *gotPlt; - IgotPltSection *igotPlt; - PPC64LongBranchTargetSection *ppc64LongBranchTarget; - MipsGotSection *mipsGot; - MipsRldMapSection *mipsRldMap; - SyntheticSection *partEnd; - SyntheticSection *partIndex; - PltSection *plt; - IpltSection *iplt; - PPC32Got2Section *ppc32Got2; - IBTPltSection *ibtPlt; - RelocationBaseSection *relaPlt; - RelocationBaseSection *relaIplt; - StringTableSection *shStrTab; - StringTableSection *strTab; - SymbolTableBaseSection *symTab; - SymtabShndxSection *symTabShndx; + std::unique_ptr attributes; + std::unique_ptr bss; + std::unique_ptr bssRelRo; + std::unique_ptr got; + std::unique_ptr gotPlt; + std::unique_ptr igotPlt; + std::unique_ptr ppc64LongBranchTarget; + std::unique_ptr mipsGot; + std::unique_ptr mipsRldMap; + std::unique_ptr partEnd; + std::unique_ptr partIndex; + std::unique_ptr plt; + std::unique_ptr iplt; + std::unique_ptr ppc32Got2; + std::unique_ptr ibtPlt; + std::unique_ptr relaPlt; + std::unique_ptr relaIplt; + std::unique_ptr shStrTab; + std::unique_ptr strTab; + std::unique_ptr symTab; + std::unique_ptr symTabShndx; }; extern InStruct in; diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index bce524811b84..15cb7603524e 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -299,18 +299,18 @@ template void elf::createSyntheticSections() { auto add = [](SyntheticSection &sec) { inputSections.push_back(&sec); }; - in.shStrTab = make(".shstrtab", false); + in.shStrTab = std::make_unique(".shstrtab", false); Out::programHeaders = make("", 0, SHF_ALLOC); Out::programHeaders->alignment = config->wordsize; if (config->strip != StripPolicy::All) { - in.strTab = make(".strtab", false); - in.symTab = make>(*in.strTab); - in.symTabShndx = make(); + in.strTab = std::make_unique(".strtab", false); + in.symTab = std::make_unique>(*in.strTab); + in.symTabShndx = std::make_unique(); } - in.bss = make(".bss", 0, 1); + in.bss = std::make_unique(".bss", 0, 1); add(*in.bss); // If there is a SECTIONS command and a .data.rel.ro section name use name @@ -318,14 +318,14 @@ template void elf::createSyntheticSections() { // This makes sure our relro is contiguous. bool hasDataRelRo = script->hasSectionsCommand && findSection(".data.rel.ro", 0); - in.bssRelRo = - make(hasDataRelRo ? ".data.rel.ro.bss" : ".bss.rel.ro", 0, 1); + in.bssRelRo = std::make_unique( + hasDataRelRo ? ".data.rel.ro.bss" : ".bss.rel.ro", 0, 1); add(*in.bssRelRo); // Add MIPS-specific sections. if (config->emachine == EM_MIPS) { if (!config->shared && config->hasDynSymTab) { - in.mipsRldMap = make(); + in.mipsRldMap = std::make_unique(); add(*in.mipsRldMap); } if (auto *sec = MipsAbiFlagsSection::create()) @@ -422,13 +422,14 @@ template void elf::createSyntheticSections() { // Create the partition end marker. This needs to be in partition number 255 // so that it is sorted after all other partitions. It also has other // special handling (see createPhdrs() and combineEhSections()). - in.partEnd = make(".part.end", config->maxPageSize, 1); + in.partEnd = + std::make_unique(".part.end", config->maxPageSize, 1); in.partEnd->partition = 255; add(*in.partEnd); - in.partIndex = make(); - addOptionalRegular("__part_index_begin", in.partIndex, 0); - addOptionalRegular("__part_index_end", in.partIndex, + in.partIndex = std::make_unique(); + addOptionalRegular("__part_index_begin", in.partIndex.get(), 0); + addOptionalRegular("__part_index_end", in.partIndex.get(), in.partIndex->getSize()); add(*in.partIndex); } @@ -436,26 +437,26 @@ template void elf::createSyntheticSections() { // Add .got. MIPS' .got is so different from the other archs, // it has its own class. if (config->emachine == EM_MIPS) { - in.mipsGot = make(); + in.mipsGot = std::make_unique(); add(*in.mipsGot); } else { - in.got = make(); + in.got = std::make_unique(); add(*in.got); } if (config->emachine == EM_PPC) { - in.ppc32Got2 = make(); + in.ppc32Got2 = std::make_unique(); add(*in.ppc32Got2); } if (config->emachine == EM_PPC64) { - in.ppc64LongBranchTarget = make(); + in.ppc64LongBranchTarget = std::make_unique(); add(*in.ppc64LongBranchTarget); } - in.gotPlt = make(); + in.gotPlt = std::make_unique(); add(*in.gotPlt); - in.igotPlt = make(); + in.igotPlt = std::make_unique(); add(*in.igotPlt); // _GLOBAL_OFFSET_TABLE_ is defined relative to either .got.plt or .got. Treat @@ -472,7 +473,7 @@ template void elf::createSyntheticSections() { // We always need to add rel[a].plt to output if it has entries. // Even for static linking it can contain R_[*]_IRELATIVE relocations. - in.relaPlt = make>( + in.relaPlt = std::make_unique>( config->isRela ? ".rela.plt" : ".rel.plt", /*sort=*/false); add(*in.relaPlt); @@ -482,21 +483,23 @@ template void elf::createSyntheticSections() { // that would cause a section type mismatch. However, because the Android // dynamic loader reads .rel.plt after .rel.dyn, we can get the desired // behaviour by placing the iplt section in .rel.plt. - in.relaIplt = make>( + in.relaIplt = std::make_unique>( config->androidPackDynRelocs ? in.relaPlt->name : relaDynName, /*sort=*/false); add(*in.relaIplt); if ((config->emachine == EM_386 || config->emachine == EM_X86_64) && (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT)) { - in.ibtPlt = make(); + in.ibtPlt = std::make_unique(); add(*in.ibtPlt); } - in.plt = config->emachine == EM_PPC ? make() - : make(); + if (config->emachine == EM_PPC) + in.plt = std::make_unique(); + else + in.plt = std::make_unique(); add(*in.plt); - in.iplt = make(); + in.iplt = std::make_unique(); add(*in.iplt); if (config->andFeatures) @@ -1066,17 +1069,17 @@ template void Writer::setReservedSymbolSections() { if (ElfSym::globalOffsetTable) { // The _GLOBAL_OFFSET_TABLE_ symbol is defined by target convention usually // to the start of the .got or .got.plt section. - InputSection *gotSection = in.gotPlt; + InputSection *sec = in.gotPlt.get(); if (!target->gotBaseSymInGotPlt) - gotSection = in.mipsGot ? cast(in.mipsGot) - : cast(in.got); - ElfSym::globalOffsetTable->section = gotSection; + sec = in.mipsGot.get() ? cast(in.mipsGot.get()) + : cast(in.got.get()); + ElfSym::globalOffsetTable->section = sec; } // .rela_iplt_{start,end} mark the start and the end of in.relaIplt. if (ElfSym::relaIpltStart && in.relaIplt->isNeeded()) { - ElfSym::relaIpltStart->section = in.relaIplt; - ElfSym::relaIpltEnd->section = in.relaIplt; + ElfSym::relaIpltStart->section = in.relaIplt.get(); + ElfSym::relaIpltEnd->section = in.relaIplt.get(); ElfSym::relaIpltEnd->value = in.relaIplt->getSize(); } @@ -1644,6 +1647,9 @@ static void finalizeSynthetic(SyntheticSection *sec) { sec->finalizeContents(); } } +template static void finalizeSynthetic(std::unique_ptr &sec) { + finalizeSynthetic(sec.get()); +} // We need to generate and finalize the content that depends on the address of // InputSections. As the generation of the content may also alter InputSection From 0489e891199afe766e6340be890b0d6cd91f0938 Mon Sep 17 00:00:00 2001 From: Shivam Gupta Date: Thu, 23 Dec 2021 10:47:16 +0530 Subject: [PATCH 109/293] [DAGCombiner] Avoid combining adjacent stores at -O0 to improve debug experience When the source has a series of assignments, users reasonably want to have the debugger step through each one individually. Turn off the combine for adjacent stores so we get this behavior at -O0. Similar to D7181. Reviewed By: spatel, xgupta Differential Revision: https://reviews.llvm.org/D115808 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 ++-- .../CodeGen/RISCV/optnone-store-no-combine.ll | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/optnone-store-no-combine.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8055eb9a82d6..067ad819e0d2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -18395,8 +18395,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (StoreSDNode *ST1 = dyn_cast(Chain)) { if (ST->isUnindexed() && ST->isSimple() && ST1->isUnindexed() && ST1->isSimple()) { - if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value && - ST->getMemoryVT() == ST1->getMemoryVT() && + if (OptLevel != CodeGenOpt::None && ST1->getBasePtr() == Ptr && + ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() && ST->getAddressSpace() == ST1->getAddressSpace()) { // If this is a store followed by a store with the same value to the // same location, then the store is dead/noop. diff --git a/llvm/test/CodeGen/RISCV/optnone-store-no-combine.ll b/llvm/test/CodeGen/RISCV/optnone-store-no-combine.ll new file mode 100644 index 000000000000..48e39a2593b7 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/optnone-store-no-combine.ll @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s + +; This test verifies that a repeated store is not eliminated with optnone (improves debugging). + +define void @foo(i32* %p) noinline optnone { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 8 +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: sw a1, 0(a0) +; CHECK-NEXT: ret + store i32 8, i32* %p, align 4 + store i32 8, i32* %p, align 4 + ret void +} From 4126b0866213c614608e892a2fe71a714f313ff2 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 22 Dec 2021 19:34:04 -0800 Subject: [PATCH 110/293] [NFC][sanitizer] Format a part of the file --- .../lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp index 03f5cfbb7eac..79687ca5f72d 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp @@ -274,9 +274,8 @@ class LLVMSymbolizerProcess final : public SymbolizerProcess { const char* const kSymbolizerArch = "--default-arch=unknown"; #endif - const char *const inline_flag = common_flags()->symbolize_inline_frames - ? "--inlines" - : "--no-inlines"; + const char *const inline_flag = + common_flags()->symbolize_inline_frames ? "--inlines" : "--no-inlines"; int i = 0; argv[i++] = path_to_binary; argv[i++] = inline_flag; From 4316859e6a1d48f856e73507e5738b9c08642554 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 22 Dec 2021 20:06:35 -0800 Subject: [PATCH 111/293] [NFC][sanitizer] Format the test --- .../test/sanitizer_common/TestCases/symbolize_pc_inline.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler-rt/test/sanitizer_common/TestCases/symbolize_pc_inline.cpp b/compiler-rt/test/sanitizer_common/TestCases/symbolize_pc_inline.cpp index 9567103155f0..a39f30e0633d 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/symbolize_pc_inline.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/symbolize_pc_inline.cpp @@ -1,7 +1,7 @@ // RUN: %clangxx -O3 %s -o %t // RUN: %env_tool_opts=strip_path_prefix=/TestCases/ %run %t 2>&1 | FileCheck %s -// RUN: %env_tool_opts=strip_path_prefix=/TestCases/:symbolize_inline_frames=0 \ -// RUN: %run %t 2>&1 | FileCheck %s --check-prefixes=CHECK-NOINLINE +// RUN: %env_tool_opts=strip_path_prefix=/TestCases/:symbolize_inline_frames=0 %run %t 2>&1 | FileCheck %s --check-prefixes=NOINLINE +// RUN: %env_tool_opts=strip_path_prefix=/TestCases/:symbolize_inline_frames=1 %run %t 2>&1 | FileCheck %s // XFAIL: darwin @@ -18,7 +18,7 @@ __attribute__((noinline)) static void Symbolize() { printf("%s\n", p); } -// CHECK-NOINLINE: {{0x[0-9a-f]+}} in main symbolize_pc_inline.cpp:[[@LINE+2]] +// NOINLINE: {{0x[0-9a-f]+}} in main symbolize_pc_inline.cpp:[[@LINE+2]] // CHECK: [[ADDR:0x[0-9a-f]+]] in C2 symbolize_pc_inline.cpp:[[@LINE+1]] static inline void C2() { Symbolize(); } From 9f3aca7eaefe66f501141446c44dee0447322e3d Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 22 Dec 2021 21:28:38 -0800 Subject: [PATCH 112/293] [sanitizer] Support symbolize_inline_frames by addr2line --- .../sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp index de353bc5aa09..888ee7fe89bc 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp @@ -213,7 +213,9 @@ class Addr2LineProcess final : public SymbolizerProcess { const char *(&argv)[kArgVMax]) const override { int i = 0; argv[i++] = path_to_binary; - argv[i++] = "-iCfe"; + if (common_flags()->symbolize_inline_frames) + argv[i++] = "-i"; + argv[i++] = "-Cfe"; argv[i++] = module_name_; argv[i++] = nullptr; } From e48b1c8a27f0fbd791edc8e45756b268caadfa66 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 22 Dec 2021 21:34:25 -0800 Subject: [PATCH 113/293] [ELF] Make Partition members unique_ptr and remove associate make See D116143 for benefits. My lld executable (x86-64) is 103+KiB smaller. --- lld/ELF/Driver.cpp | 6 +++-- lld/ELF/LinkerScript.cpp | 6 ++--- lld/ELF/SyntheticSections.cpp | 29 +++++++++++----------- lld/ELF/SyntheticSections.h | 32 ++++++++++++------------- lld/ELF/Writer.cpp | 45 +++++++++++++++++++---------------- 5 files changed, 62 insertions(+), 56 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 7d01b7f33dec..65ca1c9afd66 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -99,7 +99,8 @@ bool elf::link(ArrayRef args, bool canExitEarly, tar = nullptr; memset(&in, 0, sizeof(in)); - partitions = {Partition()}; + partitions.clear(); + partitions.emplace_back(); SharedFile::vernauxNum = 0; }; @@ -116,7 +117,8 @@ bool elf::link(ArrayRef args, bool canExitEarly, script = std::make_unique(); symtab = std::make_unique(); - partitions = {Partition()}; + partitions.clear(); + partitions.emplace_back(); config->progName = args[0]; diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index a362dd3809d0..341d0d1e6851 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -561,15 +561,15 @@ LinkerScript::computeInputSections(const InputSectionDescription *cmd, } void LinkerScript::discard(InputSectionBase &s) { - if (&s == in.shStrTab.get() || &s == mainPart->relrDyn) + if (&s == in.shStrTab.get() || &s == mainPart->relrDyn.get()) error("discarding " + s.name + " section is not allowed"); // You can discard .hash and .gnu.hash sections by linker scripts. Since // they are synthesized sections, we need to handle them differently than // other regular sections. - if (&s == mainPart->gnuHashTab) + if (&s == mainPart->gnuHashTab.get()) mainPart->gnuHashTab = nullptr; - if (&s == mainPart->hashTab) + if (&s == mainPart->hashTab.get()) mainPart->hashTab = nullptr; s.markDead(); diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 047546e28ed0..dd4c5f787f37 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -1265,11 +1265,11 @@ DynamicSection::DynamicSection() // .rela.dyn // // DT_RELASZ is the total size of the included sections. -static uint64_t addRelaSz(RelocationBaseSection *relaDyn) { - size_t size = relaDyn->getSize(); - if (in.relaIplt->getParent() == relaDyn->getParent()) +static uint64_t addRelaSz(const RelocationBaseSection &relaDyn) { + size_t size = relaDyn.getSize(); + if (in.relaIplt->getParent() == relaDyn.getParent()) size += in.relaIplt->getSize(); - if (in.relaPlt->getParent() == relaDyn->getParent()) + if (in.relaPlt->getParent() == relaDyn.getParent()) size += in.relaPlt->getSize(); return size; } @@ -1375,7 +1375,8 @@ DynamicSection::computeContents() { (in.relaIplt->isNeeded() && part.relaDyn->getParent() == in.relaIplt->getParent())) { addInSec(part.relaDyn->dynamicTag, *part.relaDyn); - entries.emplace_back(part.relaDyn->sizeDynamicTag, addRelaSz(part.relaDyn)); + entries.emplace_back(part.relaDyn->sizeDynamicTag, + addRelaSz(*part.relaDyn)); bool isRela = config->isRela; addInt(isRela ? DT_RELAENT : DT_RELENT, @@ -1626,7 +1627,7 @@ void RelocationBaseSection::addReloc(const DynamicReloc &reloc) { } void RelocationBaseSection::finalizeContents() { - SymbolTableBaseSection *symTab = getPartition().dynSymTab; + SymbolTableBaseSection *symTab = getPartition().dynSymTab.get(); // When linking glibc statically, .rel{,a}.plt contains R_*_IRELATIVE // relocations due to IFUNC (e.g. strcpy). sh_link will be set to 0 in that @@ -1677,7 +1678,7 @@ RelocationSection::RelocationSection(StringRef name, bool sort) } template void RelocationSection::writeTo(uint8_t *buf) { - SymbolTableBaseSection *symTab = getPartition().dynSymTab; + SymbolTableBaseSection *symTab = getPartition().dynSymTab.get(); parallelForEach(relocs, [symTab](DynamicReloc &rel) { rel.computeRaw(symTab); }); @@ -1772,8 +1773,8 @@ bool AndroidPackedRelocationSection::updateAllocSize() { for (const DynamicReloc &rel : relocs) { Elf_Rela r; r.r_offset = rel.getOffset(); - r.setSymbolAndType(rel.getSymIndex(getPartition().dynSymTab), rel.type, - false); + r.setSymbolAndType(rel.getSymIndex(getPartition().dynSymTab.get()), + rel.type, false); if (config->isRela) r.r_addend = rel.computeAddend(); @@ -2099,7 +2100,7 @@ void SymbolTableBaseSection::finalizeContents() { // Only the main partition's dynsym indexes are stored in the symbols // themselves. All other partitions use a lookup table. - if (this == mainPart->dynSymTab) { + if (this == mainPart->dynSymTab.get()) { size_t i = 0; for (const SymbolTableEntry &s : symbols) s.sym->dynsymIndex = ++i; @@ -2146,7 +2147,7 @@ void SymbolTableBaseSection::addSymbol(Symbol *b) { } size_t SymbolTableBaseSection::getSymbolIndex(Symbol *sym) { - if (this == mainPart->dynSymTab) + if (this == mainPart->dynSymTab.get()) return sym->dynsymIndex; // Initializes symbol lookup tables lazily. This is used only for -r, @@ -2481,7 +2482,7 @@ HashTableSection::HashTableSection() } void HashTableSection::finalizeContents() { - SymbolTableBaseSection *symTab = getPartition().dynSymTab; + SymbolTableBaseSection *symTab = getPartition().dynSymTab.get(); if (OutputSection *sec = symTab->getParent()) getParent()->link = sec->sectionIndex; @@ -2495,7 +2496,7 @@ void HashTableSection::finalizeContents() { } void HashTableSection::writeTo(uint8_t *buf) { - SymbolTableBaseSection *symTab = getPartition().dynSymTab; + SymbolTableBaseSection *symTab = getPartition().dynSymTab.get(); // See comment in GnuHashTableSection::writeTo. memset(buf, 0, size); @@ -3803,7 +3804,7 @@ void PartitionIndexSection::writeTo(uint8_t *buf) { SyntheticSection *next = i == partitions.size() - 1 ? in.partEnd.get() - : partitions[i + 1].elfHeader; + : partitions[i + 1].elfHeader.get(); write32(buf + 8, next->getVA() - partitions[i].elfHeader->getVA()); va += 12; diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index 15b5657f8037..02941e3dfbbb 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -1203,24 +1203,24 @@ struct Partition { StringRef name; uint64_t nameStrTab; - SyntheticSection *elfHeader; - SyntheticSection *programHeaders; + std::unique_ptr elfHeader; + std::unique_ptr programHeaders; std::vector phdrs; - ARMExidxSyntheticSection *armExidx; - BuildIdSection *buildId; - SyntheticSection *dynamic; - StringTableSection *dynStrTab; - SymbolTableBaseSection *dynSymTab; - EhFrameHeader *ehFrameHdr; - EhFrameSection *ehFrame; - GnuHashTableSection *gnuHashTab; - HashTableSection *hashTab; - RelocationBaseSection *relaDyn; - RelrBaseSection *relrDyn; - VersionDefinitionSection *verDef; - SyntheticSection *verNeed; - VersionTableSection *verSym; + std::unique_ptr armExidx; + std::unique_ptr buildId; + std::unique_ptr dynamic; + std::unique_ptr dynStrTab; + std::unique_ptr dynSymTab; + std::unique_ptr ehFrameHdr; + std::unique_ptr ehFrame; + std::unique_ptr gnuHashTab; + std::unique_ptr hashTab; + std::unique_ptr relaDyn; + std::unique_ptr relrDyn; + std::unique_ptr verDef; + std::unique_ptr verNeed; + std::unique_ptr verSym; unsigned getNumber() const { return this - &partitions[0] + 1; } }; diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 15cb7603524e..b763a328e70c 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -345,49 +345,52 @@ template void elf::createSyntheticSections() { }; if (!part.name.empty()) { - part.elfHeader = make>(); + part.elfHeader = std::make_unique>(); part.elfHeader->name = part.name; add(*part.elfHeader); - part.programHeaders = make>(); + part.programHeaders = + std::make_unique>(); add(*part.programHeaders); } if (config->buildId != BuildIdKind::None) { - part.buildId = make(); + part.buildId = std::make_unique(); add(*part.buildId); } - part.dynStrTab = make(".dynstr", true); - part.dynSymTab = make>(*part.dynStrTab); - part.dynamic = make>(); + part.dynStrTab = std::make_unique(".dynstr", true); + part.dynSymTab = + std::make_unique>(*part.dynStrTab); + part.dynamic = std::make_unique>(); if (config->androidPackDynRelocs) - part.relaDyn = make>(relaDynName); - else part.relaDyn = - make>(relaDynName, config->zCombreloc); + std::make_unique>(relaDynName); + else + part.relaDyn = std::make_unique>( + relaDynName, config->zCombreloc); if (config->hasDynSymTab) { add(*part.dynSymTab); - part.verSym = make(); + part.verSym = std::make_unique(); add(*part.verSym); if (!namedVersionDefs().empty()) { - part.verDef = make(); + part.verDef = std::make_unique(); add(*part.verDef); } - part.verNeed = make>(); + part.verNeed = std::make_unique>(); add(*part.verNeed); if (config->gnuHash) { - part.gnuHashTab = make(); + part.gnuHashTab = std::make_unique(); add(*part.gnuHashTab); } if (config->sysvHash) { - part.hashTab = make(); + part.hashTab = std::make_unique(); add(*part.hashTab); } @@ -397,23 +400,23 @@ template void elf::createSyntheticSections() { } if (config->relrPackDynRelocs) { - part.relrDyn = make>(); + part.relrDyn = std::make_unique>(); add(*part.relrDyn); } if (!config->relocatable) { if (config->ehFrameHdr) { - part.ehFrameHdr = make(); + part.ehFrameHdr = std::make_unique(); add(*part.ehFrameHdr); } - part.ehFrame = make(); + part.ehFrame = std::make_unique(); add(*part.ehFrame); } if (config->emachine == EM_ARM && !config->relocatable) { // The ARMExidxsyntheticsection replaces all the individual .ARM.exidx // InputSections. - part.armExidx = make(); + part.armExidx = std::make_unique(); add(*part.armExidx); } } @@ -1886,9 +1889,9 @@ template void Writer::finalizeSections() { // Even the author of gold doesn't remember why gold behaves that way. // https://sourceware.org/ml/binutils/2002-03/msg00360.html if (mainPart->dynamic->parent) - symtab->addSymbol(Defined{/*file=*/nullptr, "_DYNAMIC", STB_WEAK, - STV_HIDDEN, STT_NOTYPE, - /*value=*/0, /*size=*/0, mainPart->dynamic}); + symtab->addSymbol( + Defined{/*file=*/nullptr, "_DYNAMIC", STB_WEAK, STV_HIDDEN, STT_NOTYPE, + /*value=*/0, /*size=*/0, mainPart->dynamic.get()}); // Define __rel[a]_iplt_{start,end} symbols if needed. addRelIpltSymbols(); From 12779edd711845eccd83ef7d5c30ac8e316a1226 Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Tue, 21 Dec 2021 14:19:03 -0500 Subject: [PATCH 114/293] [libc] Add performance tests for hypotf and hypot. Add performance tests for hypotf and hypot. Reviewed By: sivachandra, michaelrj Differential Revision: https://reviews.llvm.org/D116122 --- .../BinaryOpSingleOutputDiff.h | 99 +++++++++++++++++++ .../math/differential_testing/CMakeLists.txt | 28 ++++++ .../math/differential_testing/hypot_perf.cpp | 16 +++ .../math/differential_testing/hypotf_perf.cpp | 16 +++ 4 files changed, 159 insertions(+) create mode 100644 libc/test/src/math/differential_testing/BinaryOpSingleOutputDiff.h create mode 100644 libc/test/src/math/differential_testing/hypot_perf.cpp create mode 100644 libc/test/src/math/differential_testing/hypotf_perf.cpp diff --git a/libc/test/src/math/differential_testing/BinaryOpSingleOutputDiff.h b/libc/test/src/math/differential_testing/BinaryOpSingleOutputDiff.h new file mode 100644 index 000000000000..0fdfdf069aa9 --- /dev/null +++ b/libc/test/src/math/differential_testing/BinaryOpSingleOutputDiff.h @@ -0,0 +1,99 @@ +//===-- Common utility class for differential analysis --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/FPUtil/FPBits.h" +#include "utils/testutils/StreamWrapper.h" +#include "utils/testutils/Timer.h" + +namespace __llvm_libc { +namespace testing { + +template class BinaryOpSingleOutputDiff { + using FPBits = fputil::FPBits; + using UIntType = typename FPBits::UIntType; + static constexpr UIntType MSBIT = UIntType(1) << (8 * sizeof(UIntType) - 1); + static constexpr UIntType UINTMAX = (MSBIT - 1) + MSBIT; + +public: + typedef T Func(T, T); + + static void run_perf_in_range(Func myFunc, Func otherFunc, + UIntType startingBit, UIntType endingBit, + UIntType N, testutils::OutputFileStream &log) { + auto runner = [=](Func func) { + volatile T result; + if (endingBit < startingBit) { + return; + } + + UIntType step = (endingBit - startingBit) / N; + for (UIntType bitsX = startingBit, bitsY = endingBit;; + bitsX += step, bitsY -= step) { + T x = T(FPBits(bitsX)); + T y = T(FPBits(bitsY)); + result = func(x, y); + if (endingBit - bitsX < step) { + break; + } + } + }; + + Timer timer; + timer.start(); + runner(myFunc); + timer.stop(); + + double my_average = static_cast(timer.nanoseconds()) / N; + log << "-- My function --\n"; + log << " Total time : " << timer.nanoseconds() << " ns \n"; + log << " Average runtime : " << my_average << " ns/op \n"; + log << " Ops per second : " + << static_cast(1'000'000'000.0 / my_average) << " op/s \n"; + + timer.start(); + runner(otherFunc); + timer.stop(); + + double other_average = static_cast(timer.nanoseconds()) / N; + log << "-- Other function --\n"; + log << " Total time : " << timer.nanoseconds() << " ns \n"; + log << " Average runtime : " << other_average << " ns/op \n"; + log << " Ops per second : " + << static_cast(1'000'000'000.0 / other_average) << " op/s \n"; + + log << "-- Average runtime ratio --\n"; + log << " Mine / Other's : " << my_average / other_average << " \n"; + } + + static void run_perf(Func myFunc, Func otherFunc, const char *logFile) { + testutils::OutputFileStream log(logFile); + log << " Performance tests with inputs in denormal range:\n"; + run_perf_in_range(myFunc, otherFunc, /* startingBit= */ UIntType(0), + /* endingBit= */ FPBits::MAX_SUBNORMAL, 1'000'001, log); + log << "\n Performance tests with inputs in normal range:\n"; + run_perf_in_range(myFunc, otherFunc, /* startingBit= */ FPBits::MIN_NORMAL, + /* endingBit= */ FPBits::MAX_NORMAL, 100'000'001, log); + } +}; + +} // namespace testing +} // namespace __llvm_libc + +#define BINARY_OP_SINGLE_OUTPUT_DIFF(T, myFunc, otherFunc, filename) \ + int main() { \ + __llvm_libc::testing::BinaryOpSingleOutputDiff::run_diff( \ + &myFunc, &otherFunc, filename); \ + return 0; \ + } + +#define BINARY_OP_SINGLE_OUTPUT_PERF(T, myFunc, otherFunc, filename) \ + int main() { \ + __llvm_libc::testing::BinaryOpSingleOutputDiff::run_perf( \ + &myFunc, &otherFunc, filename); \ + return 0; \ + } diff --git a/libc/test/src/math/differential_testing/CMakeLists.txt b/libc/test/src/math/differential_testing/CMakeLists.txt index e553bf5bfbef..d3e0e64e03fb 100644 --- a/libc/test/src/math/differential_testing/CMakeLists.txt +++ b/libc/test/src/math/differential_testing/CMakeLists.txt @@ -67,6 +67,12 @@ add_header_library( SingleInputSingleOutputDiff.h ) +add_header_library( + binary_op_single_output_diff + HDRS + BinaryOpSingleOutputDiff.h +) + add_diff_binary( sinf_diff SRCS @@ -368,3 +374,25 @@ add_diff_binary( COMPILE_OPTIONS -fno-builtin ) + +add_diff_binary( + hypotf_perf + SRCS + hypotf_perf.cpp + DEPENDS + .binary_op_single_output_diff + libc.src.math.hypotf + COMPILE_OPTIONS + -fno-builtin +) + +add_diff_binary( + hypot_perf + SRCS + hypot_perf.cpp + DEPENDS + .binary_op_single_output_diff + libc.src.math.hypot + COMPILE_OPTIONS + -fno-builtin +) diff --git a/libc/test/src/math/differential_testing/hypot_perf.cpp b/libc/test/src/math/differential_testing/hypot_perf.cpp new file mode 100644 index 000000000000..f90605455f76 --- /dev/null +++ b/libc/test/src/math/differential_testing/hypot_perf.cpp @@ -0,0 +1,16 @@ +//===-- Differential test for hypot ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "BinaryOpSingleOutputDiff.h" + +#include "src/math/hypot.h" + +#include + +BINARY_OP_SINGLE_OUTPUT_PERF(double, __llvm_libc::hypot, ::hypot, + "hypot_perf.log") diff --git a/libc/test/src/math/differential_testing/hypotf_perf.cpp b/libc/test/src/math/differential_testing/hypotf_perf.cpp new file mode 100644 index 000000000000..75c61cb0592a --- /dev/null +++ b/libc/test/src/math/differential_testing/hypotf_perf.cpp @@ -0,0 +1,16 @@ +//===-- Differential test for hypotf --------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "BinaryOpSingleOutputDiff.h" + +#include "src/math/hypotf.h" + +#include + +BINARY_OP_SINGLE_OUTPUT_PERF(float, __llvm_libc::hypotf, ::hypotf, + "hypotf_perf.log") From ba6973c89b017671b1a1cdebd6d46352e350c1e9 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 22 Dec 2021 22:02:29 -0800 Subject: [PATCH 115/293] [ELF] Change nonnull pointer parameters to references --- lld/ELF/SyntheticSections.cpp | 16 ++++++++-------- lld/ELF/SyntheticSections.h | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index dd4c5f787f37..93d6ed32016f 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -1578,8 +1578,8 @@ void RelocationBaseSection::addSymbolReloc(RelType dynType, uint64_t offsetInSec, Symbol &sym, int64_t addend, Optional addendRelType) { - addReloc(DynamicReloc::AgainstSymbol, dynType, &isec, offsetInSec, sym, - addend, R_ADDEND, addendRelType ? *addendRelType : target->noneRel); + addReloc(DynamicReloc::AgainstSymbol, dynType, isec, offsetInSec, sym, addend, + R_ADDEND, addendRelType ? *addendRelType : target->noneRel); } void RelocationBaseSection::addRelativeReloc( @@ -1591,8 +1591,8 @@ void RelocationBaseSection::addRelativeReloc( assert((!sym.isPreemptible || expr == R_GOT) && "cannot add relative relocation against preemptible symbol"); assert(expr != R_ADDEND && "expected non-addend relocation expression"); - addReloc(DynamicReloc::AddendOnlyWithTargetVA, dynType, &inputSec, - offsetInSec, sym, addend, expr, addendRelType); + addReloc(DynamicReloc::AddendOnlyWithTargetVA, dynType, inputSec, offsetInSec, + sym, addend, expr, addendRelType); } void RelocationBaseSection::addAddendOnlyRelocIfNonPreemptible( @@ -1603,21 +1603,21 @@ void RelocationBaseSection::addAddendOnlyRelocIfNonPreemptible( addReloc({dynType, &isec, offsetInSec, DynamicReloc::AgainstSymbol, sym, 0, R_ABS}); else - addReloc(DynamicReloc::AddendOnlyWithTargetVA, dynType, &isec, offsetInSec, + addReloc(DynamicReloc::AddendOnlyWithTargetVA, dynType, isec, offsetInSec, sym, 0, R_ABS, addendRelType); } void RelocationBaseSection::addReloc(DynamicReloc::Kind kind, RelType dynType, - InputSectionBase *inputSec, + InputSectionBase &inputSec, uint64_t offsetInSec, Symbol &sym, int64_t addend, RelExpr expr, RelType addendRelType) { // Write the addends to the relocated address if required. We skip // it if the written value would be zero. if (config->writeAddends && (expr != R_ADDEND || addend != 0)) - inputSec->relocations.push_back( + inputSec.relocations.push_back( {expr, addendRelType, offsetInSec, addend, &sym}); - addReloc({dynType, inputSec, offsetInSec, kind, sym, addend, expr}); + addReloc({dynType, &inputSec, offsetInSec, kind, sym, addend, expr}); } void RelocationBaseSection::addReloc(const DynamicReloc &reloc) { diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index 02941e3dfbbb..8c5d1ae88672 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -534,7 +534,7 @@ class RelocationBaseSection : public SyntheticSection { uint64_t offsetInSec, Symbol &sym, RelType addendRelType); void addReloc(DynamicReloc::Kind kind, RelType dynType, - InputSectionBase *inputSec, uint64_t offsetInSec, Symbol &sym, + InputSectionBase &inputSec, uint64_t offsetInSec, Symbol &sym, int64_t addend, RelExpr expr, RelType addendRelType); bool isNeeded() const override { return !relocs.empty(); } size_t getSize() const override { return relocs.size() * this->entsize; } From 856550ca79ba558fe776adc8777a4b7264b66dc8 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 22 Dec 2021 22:10:27 -0800 Subject: [PATCH 116/293] [Sanitizer] Fix setbuffer() interceptor (it accept size, not mode) Fixes: 0c81a62d9d76 ("[Sanitizer] Adding setvbuf in supported platforms and other stream buffer functions") Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D116176 --- .../sanitizer_common/sanitizer_common_interceptors.inc | 8 ++++---- .../test/sanitizer_common/TestCases/Posix/setvbuf.cpp | 4 +++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index d219734fa0a3..b0ab08dff1db 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -7857,12 +7857,12 @@ INTERCEPTOR(void, setbuf, __sanitizer_FILE *stream, char *buf) { unpoison_file(stream); } -INTERCEPTOR(void, setbuffer, __sanitizer_FILE *stream, char *buf, int mode) { +INTERCEPTOR(void, setbuffer, __sanitizer_FILE *stream, char *buf, SIZE_T size) { void *ctx; - COMMON_INTERCEPTOR_ENTER(ctx, setbuffer, stream, buf, mode); - REAL(setbuffer)(stream, buf, mode); + COMMON_INTERCEPTOR_ENTER(ctx, setbuffer, stream, buf, size); + REAL(setbuffer)(stream, buf, size); if (buf) { - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, __sanitizer_bufsiz); + COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, size); } if (stream) unpoison_file(stream); diff --git a/compiler-rt/test/sanitizer_common/TestCases/Posix/setvbuf.cpp b/compiler-rt/test/sanitizer_common/TestCases/Posix/setvbuf.cpp index bc29ba45d89a..f789bfd727d5 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Posix/setvbuf.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Posix/setvbuf.cpp @@ -34,7 +34,9 @@ void test_setbuffer() { print_something(); - setbuffer(stdout, buf, BUFSIZ); + // Ensure that interceptor reads correct size + // (not BUFSIZ as by default, hence BUFSIZ/2). + setbuffer(stdout, buf, BUFSIZ / 2); print_something(); From ba948c5a9c524b46e6c1f31090f0b85275947867 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 22 Dec 2021 22:30:07 -0800 Subject: [PATCH 117/293] [ELF] Use SmallVector for some global variables (*Files and *Sections). NFC My lld executable is 26+KiB smaller. --- lld/ELF/InputFiles.cpp | 12 ++++++------ lld/ELF/InputFiles.h | 12 ++++++------ lld/ELF/InputSection.cpp | 2 +- lld/ELF/InputSection.h | 2 +- lld/ELF/OutputSections.cpp | 2 +- lld/ELF/OutputSections.h | 2 +- lld/ELF/Writer.cpp | 2 +- 7 files changed, 17 insertions(+), 17 deletions(-) diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index cf7c1c228b43..f981f33d963d 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -43,12 +43,12 @@ using namespace lld::elf; bool InputFile::isInGroup; uint32_t InputFile::nextGroupId; -std::vector elf::archiveFiles; -std::vector elf::binaryFiles; -std::vector elf::bitcodeFiles; -std::vector elf::lazyBitcodeFiles; -std::vector elf::objectFiles; -std::vector elf::sharedFiles; +SmallVector elf::archiveFiles; +SmallVector elf::binaryFiles; +SmallVector elf::bitcodeFiles; +SmallVector elf::lazyBitcodeFiles; +SmallVector elf::objectFiles; +SmallVector elf::sharedFiles; std::unique_ptr elf::tar; diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index dd39bc397a0f..1fee39371cca 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -396,12 +396,12 @@ inline bool isBitcode(MemoryBufferRef mb) { std::string replaceThinLTOSuffix(StringRef path); -extern std::vector archiveFiles; -extern std::vector binaryFiles; -extern std::vector bitcodeFiles; -extern std::vector lazyBitcodeFiles; -extern std::vector objectFiles; -extern std::vector sharedFiles; +extern SmallVector archiveFiles; +extern SmallVector binaryFiles; +extern SmallVector bitcodeFiles; +extern SmallVector lazyBitcodeFiles; +extern SmallVector objectFiles; +extern SmallVector sharedFiles; } // namespace elf } // namespace lld diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 61654b95704f..56a3510e41bb 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -40,7 +40,7 @@ using namespace llvm::sys; using namespace lld; using namespace lld::elf; -std::vector elf::inputSections; +SmallVector elf::inputSections; DenseSet> elf::ppc64noTocRelax; // Returns a string to construct an error message. diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index a5967b500f90..5a0dd78f0e55 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -394,7 +394,7 @@ inline bool isDebugSection(const InputSectionBase &sec) { } // The list of all input sections. -extern std::vector inputSections; +extern SmallVector inputSections; // The set of TOC entries (.toc + addend) for which we should not apply // toc-indirect to toc-relative relaxation. const Symbol * refers to the diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp index 71ef547a5f38..4a03ac387814 100644 --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -40,7 +40,7 @@ OutputSection *Out::preinitArray; OutputSection *Out::initArray; OutputSection *Out::finiArray; -std::vector elf::outputSections; +SmallVector elf::outputSections; uint32_t OutputSection::getPhdrFlags() const { uint32_t ret = 0; diff --git a/lld/ELF/OutputSections.h b/lld/ELF/OutputSections.h index a5b05cf28aa8..fb3eb0059909 100644 --- a/lld/ELF/OutputSections.h +++ b/lld/ELF/OutputSections.h @@ -138,7 +138,7 @@ struct Out { uint64_t getHeaderSize(); -extern std::vector outputSections; +extern llvm::SmallVector outputSections; } // namespace elf } // namespace lld diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index b763a328e70c..5fc3f6cb3efa 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1246,7 +1246,7 @@ static void maybeShuffle(DenseMap &order) { if (config->shuffleSections.empty()) return; - std::vector matched, sections = inputSections; + SmallVector matched, sections = inputSections; matched.reserve(sections.size()); for (const auto &patAndSeed : config->shuffleSections) { matched.clear(); From f66d602c3f58bd8a312c79bb404eac9dabf1a730 Mon Sep 17 00:00:00 2001 From: Marek Kurdej Date: Wed, 22 Dec 2021 20:41:20 +0100 Subject: [PATCH 118/293] [clang-format] Fix wrong indentation after trailing requires clause. Fixes https://github.com/llvm/llvm-project/issues/52834. Before this patch, clang-format would wrongly parse top-level entities (e.g. namespaces) and format: ``` template constexpr void foo requires(I == 42) {} namespace ns { void foo() {} } // namespace ns ``` into: `````` template constexpr void foo requires(I == 42) {} namespace ns { void foo() {} } // namespace ns ``` with configuration: ``` NamespaceIndentation: None ```` Reviewed By: MyDeveloperDay, HazardyKnusperkeks, owenpan Differential Revision: https://reviews.llvm.org/D116183 --- clang/lib/Format/UnwrappedLineParser.cpp | 2 +- clang/unittests/Format/FormatTest.cpp | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 6010d276af05..4f300bb63a42 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -1439,7 +1439,7 @@ void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) { return; case tok::kw_requires: parseRequires(); - break; + return; case tok::kw_enum: // Ignore if this is part of "template is(tok::less)) { diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index a787e26ccd5a..ee486f452194 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -3556,6 +3556,11 @@ TEST_F(FormatTest, FormatsNamespaces) { "struct b_struct {};\n" "} // namespace B\n", Style); + verifyFormat("template constexpr void foo requires(I == 42) {}\n" + "namespace ns {\n" + "void foo() {}\n" + "} // namespace ns\n", + Style); } TEST_F(FormatTest, NamespaceMacros) { From d39d2acfdd9f4d09d29752a75f37e756ddb60764 Mon Sep 17 00:00:00 2001 From: Andrew Browne Date: Wed, 22 Dec 2021 16:56:37 -0800 Subject: [PATCH 119/293] [DFSan] Make dfsan_read_origin_of_first_taint public. Makes origins easier to use with dfsan_read_label(addr, size). Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D116197 --- .../include/sanitizer/dfsan_interface.h | 4 +++ compiler-rt/lib/dfsan/done_abilist.txt | 2 ++ .../test/dfsan/origin_of_first_taint.c | 34 +++++++++++++++++++ 3 files changed, 40 insertions(+) create mode 100644 compiler-rt/test/dfsan/origin_of_first_taint.c diff --git a/compiler-rt/include/sanitizer/dfsan_interface.h b/compiler-rt/include/sanitizer/dfsan_interface.h index 769aecfb0d70..bc0652c99a14 100644 --- a/compiler-rt/include/sanitizer/dfsan_interface.h +++ b/compiler-rt/include/sanitizer/dfsan_interface.h @@ -54,6 +54,10 @@ dfsan_origin dfsan_get_origin(long data); /// Retrieves the label associated with the data at the given address. dfsan_label dfsan_read_label(const void *addr, size_t size); +/// Return the origin associated with the first taint byte in the size bytes +/// from the address addr. +dfsan_origin dfsan_read_origin_of_first_taint(const void *addr, size_t size); + /// Returns whether the given label label contains the label elem. int dfsan_has_label(dfsan_label label, dfsan_label elem); diff --git a/compiler-rt/lib/dfsan/done_abilist.txt b/compiler-rt/lib/dfsan/done_abilist.txt index b944b799b152..fc2dd02ccf5f 100644 --- a/compiler-rt/lib/dfsan/done_abilist.txt +++ b/compiler-rt/lib/dfsan/done_abilist.txt @@ -40,6 +40,8 @@ fun:dfsan_sprint_stack_trace=uninstrumented fun:dfsan_sprint_stack_trace=discard fun:dfsan_get_origin=uninstrumented fun:dfsan_get_origin=custom +fun:dfsan_read_origin_of_first_taint=uninstrumented +fun:dfsan_read_origin_of_first_taint=discard fun:dfsan_get_init_origin=uninstrumented fun:dfsan_get_init_origin=discard fun:dfsan_get_track_origins=uninstrumented diff --git a/compiler-rt/test/dfsan/origin_of_first_taint.c b/compiler-rt/test/dfsan/origin_of_first_taint.c new file mode 100644 index 000000000000..45cd6146f031 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_of_first_taint.c @@ -0,0 +1,34 @@ +// RUN: %clang_dfsan -gmlt -mllvm -dfsan-track-origins=1 %s -o %t && \ +// RUN: %run %t 2>&1 | FileCheck %s +// +// REQUIRES: x86_64-target-arch + +#include +#include +#include + +__attribute__((noinline)) uint64_t foo(uint64_t a, uint64_t b) { return a + b; } + +int main(int argc, char *argv[]) { + uint64_t a = 10; + uint64_t b = 20; + dfsan_set_label(8, &a, sizeof(a)); + uint64_t c = foo(a, b); + + dfsan_origin c_orig = dfsan_get_origin(c); + fprintf(stderr, "c_orig 0x%x\n", c_orig); + // CHECK: c_orig 0x[[#%x,C_ORIG:]] + assert(c_orig != 0); + dfsan_print_origin_id_trace(c_orig); + // CHECK: Origin value: 0x[[#%x,C_ORIG]], Taint value was created at + + uint64_t d[4] = {1, 2, 3, c}; + dfsan_origin d_orig = dfsan_read_origin_of_first_taint(d, sizeof(d)); + fprintf(stderr, "d_orig 0x%x\n", d_orig); + // CHECK: d_orig 0x[[#%x,D_ORIG:]] + assert(d_orig != 0); + dfsan_print_origin_id_trace(d_orig); + // CHECK: Origin value: 0x[[#%x,D_ORIG]], Taint value was stored to memory at + // CHECK: Origin value: 0x[[#%x,C_ORIG]], Taint value was created at + return 0; +} From ad26b0b233d5dc4b792012e7f42e04b1a865c865 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 22 Dec 2021 23:55:11 -0800 Subject: [PATCH 120/293] Revert "[ELF] Make Partition/InStruct members unique_ptr and remove associate make" This reverts commit e48b1c8a27f0fbd791edc8e45756b268caadfa66. This reverts commit d019de23a1d761225fdaf0c47394ba58143aea9a. The changes caused memory leaks (non-final classes cannot use unique_ptr). --- lld/ELF/Driver.cpp | 6 +- lld/ELF/InputFiles.cpp | 8 +-- lld/ELF/LinkerScript.cpp | 6 +- lld/ELF/Relocations.cpp | 8 +-- lld/ELF/SyntheticSections.cpp | 36 ++++++----- lld/ELF/SyntheticSections.h | 74 +++++++++++------------ lld/ELF/Writer.cpp | 111 ++++++++++++++++------------------ 7 files changed, 118 insertions(+), 131 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 65ca1c9afd66..7d01b7f33dec 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -99,8 +99,7 @@ bool elf::link(ArrayRef args, bool canExitEarly, tar = nullptr; memset(&in, 0, sizeof(in)); - partitions.clear(); - partitions.emplace_back(); + partitions = {Partition()}; SharedFile::vernauxNum = 0; }; @@ -117,8 +116,7 @@ bool elf::link(ArrayRef args, bool canExitEarly, script = std::make_unique(); symtab = std::make_unique(); - partitions.clear(); - partitions.emplace_back(); + partitions = {Partition()}; config->progName = args[0]; diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index f981f33d963d..2e5bab7b9fa2 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -878,8 +878,8 @@ InputSectionBase *ObjFile::createInputSection(uint32_t idx, // to work. In a full implementation we would merge all attribute // sections. if (in.attributes == nullptr) { - in.attributes = std::make_unique(*this, sec, name); - return in.attributes.get(); + in.attributes = make(*this, sec, name); + return in.attributes; } return &InputSection::discarded; } @@ -900,8 +900,8 @@ InputSectionBase *ObjFile::createInputSection(uint32_t idx, // standard extensions to enable. In a full implementation we would merge // all attribute sections. if (in.attributes == nullptr) { - in.attributes = std::make_unique(*this, sec, name); - return in.attributes.get(); + in.attributes = make(*this, sec, name); + return in.attributes; } return &InputSection::discarded; } diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index 341d0d1e6851..999bb94d6416 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -561,15 +561,15 @@ LinkerScript::computeInputSections(const InputSectionDescription *cmd, } void LinkerScript::discard(InputSectionBase &s) { - if (&s == in.shStrTab.get() || &s == mainPart->relrDyn.get()) + if (&s == in.shStrTab || &s == mainPart->relrDyn) error("discarding " + s.name + " section is not allowed"); // You can discard .hash and .gnu.hash sections by linker scripts. Since // they are synthesized sections, we need to handle them differently than // other regular sections. - if (&s == mainPart->gnuHashTab.get()) + if (&s == mainPart->gnuHashTab) mainPart->gnuHashTab = nullptr; - if (&s == mainPart->hashTab.get()) + if (&s == mainPart->hashTab) mainPart->hashTab = nullptr; s.markDead(); diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index d6f7d0a935c8..c4438be0cb59 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -870,7 +870,7 @@ static void addGotEntry(Symbol &sym) { // If preemptible, emit a GLOB_DAT relocation. if (sym.isPreemptible) { - mainPart->relaDyn->addReloc({target->gotRel, in.got.get(), off, + mainPart->relaDyn->addReloc({target->gotRel, in.got, off, DynamicReloc::AgainstSymbol, sym, 0, R_ABS}); return; } @@ -1205,8 +1205,8 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, in.got->relocations.push_back( {R_ADDEND, target->symbolicRel, in.got->getTlsIndexOff(), 1, &sym}); else - mainPart->relaDyn->addReloc({target->tlsModuleIndexRel, in.got.get(), - in.got->getTlsIndexOff()}); + mainPart->relaDyn->addReloc( + {target->tlsModuleIndexRel, in.got, in.got->getTlsIndexOff()}); } c.relocations.push_back({expr, type, offset, addend, &sym}); return 1; @@ -1592,7 +1592,7 @@ static bool handleNonPreemptibleIfunc(Symbol &sym) { if (sym.hasDirectReloc) { // Change the value to the IPLT and redirect all references to it. auto &d = cast(sym); - d.section = in.iplt.get(); + d.section = in.iplt; d.value = sym.pltIndex * target->ipltEntrySize; d.size = 0; // It's important to set the symbol type here so that dynamic loaders diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 93d6ed32016f..9bf6118ebc3f 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -1265,11 +1265,11 @@ DynamicSection::DynamicSection() // .rela.dyn // // DT_RELASZ is the total size of the included sections. -static uint64_t addRelaSz(const RelocationBaseSection &relaDyn) { - size_t size = relaDyn.getSize(); - if (in.relaIplt->getParent() == relaDyn.getParent()) +static uint64_t addRelaSz(RelocationBaseSection *relaDyn) { + size_t size = relaDyn->getSize(); + if (in.relaIplt->getParent() == relaDyn->getParent()) size += in.relaIplt->getSize(); - if (in.relaPlt->getParent() == relaDyn.getParent()) + if (in.relaPlt->getParent() == relaDyn->getParent()) size += in.relaPlt->getSize(); return size; } @@ -1375,8 +1375,7 @@ DynamicSection::computeContents() { (in.relaIplt->isNeeded() && part.relaDyn->getParent() == in.relaIplt->getParent())) { addInSec(part.relaDyn->dynamicTag, *part.relaDyn); - entries.emplace_back(part.relaDyn->sizeDynamicTag, - addRelaSz(*part.relaDyn)); + entries.emplace_back(part.relaDyn->sizeDynamicTag, addRelaSz(part.relaDyn)); bool isRela = config->isRela; addInt(isRela ? DT_RELAENT : DT_RELENT, @@ -1627,7 +1626,7 @@ void RelocationBaseSection::addReloc(const DynamicReloc &reloc) { } void RelocationBaseSection::finalizeContents() { - SymbolTableBaseSection *symTab = getPartition().dynSymTab.get(); + SymbolTableBaseSection *symTab = getPartition().dynSymTab; // When linking glibc statically, .rel{,a}.plt contains R_*_IRELATIVE // relocations due to IFUNC (e.g. strcpy). sh_link will be set to 0 in that @@ -1637,11 +1636,11 @@ void RelocationBaseSection::finalizeContents() { else getParent()->link = 0; - if (in.relaPlt.get() == this && in.gotPlt->getParent()) { + if (in.relaPlt == this && in.gotPlt->getParent()) { getParent()->flags |= ELF::SHF_INFO_LINK; getParent()->info = in.gotPlt->getParent()->sectionIndex; } - if (in.relaIplt.get() == this && in.igotPlt->getParent()) { + if (in.relaIplt == this && in.igotPlt->getParent()) { getParent()->flags |= ELF::SHF_INFO_LINK; getParent()->info = in.igotPlt->getParent()->sectionIndex; } @@ -1678,7 +1677,7 @@ RelocationSection::RelocationSection(StringRef name, bool sort) } template void RelocationSection::writeTo(uint8_t *buf) { - SymbolTableBaseSection *symTab = getPartition().dynSymTab.get(); + SymbolTableBaseSection *symTab = getPartition().dynSymTab; parallelForEach(relocs, [symTab](DynamicReloc &rel) { rel.computeRaw(symTab); }); @@ -1773,8 +1772,8 @@ bool AndroidPackedRelocationSection::updateAllocSize() { for (const DynamicReloc &rel : relocs) { Elf_Rela r; r.r_offset = rel.getOffset(); - r.setSymbolAndType(rel.getSymIndex(getPartition().dynSymTab.get()), - rel.type, false); + r.setSymbolAndType(rel.getSymIndex(getPartition().dynSymTab), rel.type, + false); if (config->isRela) r.r_addend = rel.computeAddend(); @@ -2100,7 +2099,7 @@ void SymbolTableBaseSection::finalizeContents() { // Only the main partition's dynsym indexes are stored in the symbols // themselves. All other partitions use a lookup table. - if (this == mainPart->dynSymTab.get()) { + if (this == mainPart->dynSymTab) { size_t i = 0; for (const SymbolTableEntry &s : symbols) s.sym->dynsymIndex = ++i; @@ -2147,7 +2146,7 @@ void SymbolTableBaseSection::addSymbol(Symbol *b) { } size_t SymbolTableBaseSection::getSymbolIndex(Symbol *sym) { - if (this == mainPart->dynSymTab.get()) + if (this == mainPart->dynSymTab) return sym->dynsymIndex; // Initializes symbol lookup tables lazily. This is used only for -r, @@ -2482,7 +2481,7 @@ HashTableSection::HashTableSection() } void HashTableSection::finalizeContents() { - SymbolTableBaseSection *symTab = getPartition().dynSymTab.get(); + SymbolTableBaseSection *symTab = getPartition().dynSymTab; if (OutputSection *sec = symTab->getParent()) getParent()->link = sec->sectionIndex; @@ -2496,7 +2495,7 @@ void HashTableSection::finalizeContents() { } void HashTableSection::writeTo(uint8_t *buf) { - SymbolTableBaseSection *symTab = getPartition().dynSymTab.get(); + SymbolTableBaseSection *symTab = getPartition().dynSymTab; // See comment in GnuHashTableSection::writeTo. memset(buf, 0, size); @@ -3802,9 +3801,8 @@ void PartitionIndexSection::writeTo(uint8_t *buf) { write32(buf, mainPart->dynStrTab->getVA() + partitions[i].nameStrTab - va); write32(buf + 4, partitions[i].elfHeader->getVA() - (va + 4)); - SyntheticSection *next = i == partitions.size() - 1 - ? in.partEnd.get() - : partitions[i + 1].elfHeader.get(); + SyntheticSection *next = + i == partitions.size() - 1 ? in.partEnd : partitions[i + 1].elfHeader; write32(buf + 8, next->getVA() - partitions[i].elfHeader->getVA()); va += 12; diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index 8c5d1ae88672..c35e19cf2fb4 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -1203,24 +1203,24 @@ struct Partition { StringRef name; uint64_t nameStrTab; - std::unique_ptr elfHeader; - std::unique_ptr programHeaders; + SyntheticSection *elfHeader; + SyntheticSection *programHeaders; std::vector phdrs; - std::unique_ptr armExidx; - std::unique_ptr buildId; - std::unique_ptr dynamic; - std::unique_ptr dynStrTab; - std::unique_ptr dynSymTab; - std::unique_ptr ehFrameHdr; - std::unique_ptr ehFrame; - std::unique_ptr gnuHashTab; - std::unique_ptr hashTab; - std::unique_ptr relaDyn; - std::unique_ptr relrDyn; - std::unique_ptr verDef; - std::unique_ptr verNeed; - std::unique_ptr verSym; + ARMExidxSyntheticSection *armExidx; + BuildIdSection *buildId; + SyntheticSection *dynamic; + StringTableSection *dynStrTab; + SymbolTableBaseSection *dynSymTab; + EhFrameHeader *ehFrameHdr; + EhFrameSection *ehFrame; + GnuHashTableSection *gnuHashTab; + HashTableSection *hashTab; + RelocationBaseSection *relaDyn; + RelrBaseSection *relrDyn; + VersionDefinitionSection *verDef; + SyntheticSection *verNeed; + VersionTableSection *verSym; unsigned getNumber() const { return this - &partitions[0] + 1; } }; @@ -1235,27 +1235,27 @@ inline Partition &SectionBase::getPartition() const { // Linker generated sections which can be used as inputs and are not specific to // a partition. struct InStruct { - std::unique_ptr attributes; - std::unique_ptr bss; - std::unique_ptr bssRelRo; - std::unique_ptr got; - std::unique_ptr gotPlt; - std::unique_ptr igotPlt; - std::unique_ptr ppc64LongBranchTarget; - std::unique_ptr mipsGot; - std::unique_ptr mipsRldMap; - std::unique_ptr partEnd; - std::unique_ptr partIndex; - std::unique_ptr plt; - std::unique_ptr iplt; - std::unique_ptr ppc32Got2; - std::unique_ptr ibtPlt; - std::unique_ptr relaPlt; - std::unique_ptr relaIplt; - std::unique_ptr shStrTab; - std::unique_ptr strTab; - std::unique_ptr symTab; - std::unique_ptr symTabShndx; + InputSection *attributes; + BssSection *bss; + BssSection *bssRelRo; + GotSection *got; + GotPltSection *gotPlt; + IgotPltSection *igotPlt; + PPC64LongBranchTargetSection *ppc64LongBranchTarget; + MipsGotSection *mipsGot; + MipsRldMapSection *mipsRldMap; + SyntheticSection *partEnd; + SyntheticSection *partIndex; + PltSection *plt; + IpltSection *iplt; + PPC32Got2Section *ppc32Got2; + IBTPltSection *ibtPlt; + RelocationBaseSection *relaPlt; + RelocationBaseSection *relaIplt; + StringTableSection *shStrTab; + StringTableSection *strTab; + SymbolTableBaseSection *symTab; + SymtabShndxSection *symTabShndx; }; extern InStruct in; diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 5fc3f6cb3efa..653a2266b531 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -299,18 +299,18 @@ template void elf::createSyntheticSections() { auto add = [](SyntheticSection &sec) { inputSections.push_back(&sec); }; - in.shStrTab = std::make_unique(".shstrtab", false); + in.shStrTab = make(".shstrtab", false); Out::programHeaders = make("", 0, SHF_ALLOC); Out::programHeaders->alignment = config->wordsize; if (config->strip != StripPolicy::All) { - in.strTab = std::make_unique(".strtab", false); - in.symTab = std::make_unique>(*in.strTab); - in.symTabShndx = std::make_unique(); + in.strTab = make(".strtab", false); + in.symTab = make>(*in.strTab); + in.symTabShndx = make(); } - in.bss = std::make_unique(".bss", 0, 1); + in.bss = make(".bss", 0, 1); add(*in.bss); // If there is a SECTIONS command and a .data.rel.ro section name use name @@ -318,14 +318,14 @@ template void elf::createSyntheticSections() { // This makes sure our relro is contiguous. bool hasDataRelRo = script->hasSectionsCommand && findSection(".data.rel.ro", 0); - in.bssRelRo = std::make_unique( - hasDataRelRo ? ".data.rel.ro.bss" : ".bss.rel.ro", 0, 1); + in.bssRelRo = + make(hasDataRelRo ? ".data.rel.ro.bss" : ".bss.rel.ro", 0, 1); add(*in.bssRelRo); // Add MIPS-specific sections. if (config->emachine == EM_MIPS) { if (!config->shared && config->hasDynSymTab) { - in.mipsRldMap = std::make_unique(); + in.mipsRldMap = make(); add(*in.mipsRldMap); } if (auto *sec = MipsAbiFlagsSection::create()) @@ -345,52 +345,49 @@ template void elf::createSyntheticSections() { }; if (!part.name.empty()) { - part.elfHeader = std::make_unique>(); + part.elfHeader = make>(); part.elfHeader->name = part.name; add(*part.elfHeader); - part.programHeaders = - std::make_unique>(); + part.programHeaders = make>(); add(*part.programHeaders); } if (config->buildId != BuildIdKind::None) { - part.buildId = std::make_unique(); + part.buildId = make(); add(*part.buildId); } - part.dynStrTab = std::make_unique(".dynstr", true); - part.dynSymTab = - std::make_unique>(*part.dynStrTab); - part.dynamic = std::make_unique>(); + part.dynStrTab = make(".dynstr", true); + part.dynSymTab = make>(*part.dynStrTab); + part.dynamic = make>(); if (config->androidPackDynRelocs) - part.relaDyn = - std::make_unique>(relaDynName); + part.relaDyn = make>(relaDynName); else - part.relaDyn = std::make_unique>( - relaDynName, config->zCombreloc); + part.relaDyn = + make>(relaDynName, config->zCombreloc); if (config->hasDynSymTab) { add(*part.dynSymTab); - part.verSym = std::make_unique(); + part.verSym = make(); add(*part.verSym); if (!namedVersionDefs().empty()) { - part.verDef = std::make_unique(); + part.verDef = make(); add(*part.verDef); } - part.verNeed = std::make_unique>(); + part.verNeed = make>(); add(*part.verNeed); if (config->gnuHash) { - part.gnuHashTab = std::make_unique(); + part.gnuHashTab = make(); add(*part.gnuHashTab); } if (config->sysvHash) { - part.hashTab = std::make_unique(); + part.hashTab = make(); add(*part.hashTab); } @@ -400,23 +397,23 @@ template void elf::createSyntheticSections() { } if (config->relrPackDynRelocs) { - part.relrDyn = std::make_unique>(); + part.relrDyn = make>(); add(*part.relrDyn); } if (!config->relocatable) { if (config->ehFrameHdr) { - part.ehFrameHdr = std::make_unique(); + part.ehFrameHdr = make(); add(*part.ehFrameHdr); } - part.ehFrame = std::make_unique(); + part.ehFrame = make(); add(*part.ehFrame); } if (config->emachine == EM_ARM && !config->relocatable) { // The ARMExidxsyntheticsection replaces all the individual .ARM.exidx // InputSections. - part.armExidx = std::make_unique(); + part.armExidx = make(); add(*part.armExidx); } } @@ -425,14 +422,13 @@ template void elf::createSyntheticSections() { // Create the partition end marker. This needs to be in partition number 255 // so that it is sorted after all other partitions. It also has other // special handling (see createPhdrs() and combineEhSections()). - in.partEnd = - std::make_unique(".part.end", config->maxPageSize, 1); + in.partEnd = make(".part.end", config->maxPageSize, 1); in.partEnd->partition = 255; add(*in.partEnd); - in.partIndex = std::make_unique(); - addOptionalRegular("__part_index_begin", in.partIndex.get(), 0); - addOptionalRegular("__part_index_end", in.partIndex.get(), + in.partIndex = make(); + addOptionalRegular("__part_index_begin", in.partIndex, 0); + addOptionalRegular("__part_index_end", in.partIndex, in.partIndex->getSize()); add(*in.partIndex); } @@ -440,26 +436,26 @@ template void elf::createSyntheticSections() { // Add .got. MIPS' .got is so different from the other archs, // it has its own class. if (config->emachine == EM_MIPS) { - in.mipsGot = std::make_unique(); + in.mipsGot = make(); add(*in.mipsGot); } else { - in.got = std::make_unique(); + in.got = make(); add(*in.got); } if (config->emachine == EM_PPC) { - in.ppc32Got2 = std::make_unique(); + in.ppc32Got2 = make(); add(*in.ppc32Got2); } if (config->emachine == EM_PPC64) { - in.ppc64LongBranchTarget = std::make_unique(); + in.ppc64LongBranchTarget = make(); add(*in.ppc64LongBranchTarget); } - in.gotPlt = std::make_unique(); + in.gotPlt = make(); add(*in.gotPlt); - in.igotPlt = std::make_unique(); + in.igotPlt = make(); add(*in.igotPlt); // _GLOBAL_OFFSET_TABLE_ is defined relative to either .got.plt or .got. Treat @@ -476,7 +472,7 @@ template void elf::createSyntheticSections() { // We always need to add rel[a].plt to output if it has entries. // Even for static linking it can contain R_[*]_IRELATIVE relocations. - in.relaPlt = std::make_unique>( + in.relaPlt = make>( config->isRela ? ".rela.plt" : ".rel.plt", /*sort=*/false); add(*in.relaPlt); @@ -486,23 +482,21 @@ template void elf::createSyntheticSections() { // that would cause a section type mismatch. However, because the Android // dynamic loader reads .rel.plt after .rel.dyn, we can get the desired // behaviour by placing the iplt section in .rel.plt. - in.relaIplt = std::make_unique>( + in.relaIplt = make>( config->androidPackDynRelocs ? in.relaPlt->name : relaDynName, /*sort=*/false); add(*in.relaIplt); if ((config->emachine == EM_386 || config->emachine == EM_X86_64) && (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT)) { - in.ibtPlt = std::make_unique(); + in.ibtPlt = make(); add(*in.ibtPlt); } - if (config->emachine == EM_PPC) - in.plt = std::make_unique(); - else - in.plt = std::make_unique(); + in.plt = config->emachine == EM_PPC ? make() + : make(); add(*in.plt); - in.iplt = std::make_unique(); + in.iplt = make(); add(*in.iplt); if (config->andFeatures) @@ -1072,17 +1066,17 @@ template void Writer::setReservedSymbolSections() { if (ElfSym::globalOffsetTable) { // The _GLOBAL_OFFSET_TABLE_ symbol is defined by target convention usually // to the start of the .got or .got.plt section. - InputSection *sec = in.gotPlt.get(); + InputSection *gotSection = in.gotPlt; if (!target->gotBaseSymInGotPlt) - sec = in.mipsGot.get() ? cast(in.mipsGot.get()) - : cast(in.got.get()); - ElfSym::globalOffsetTable->section = sec; + gotSection = in.mipsGot ? cast(in.mipsGot) + : cast(in.got); + ElfSym::globalOffsetTable->section = gotSection; } // .rela_iplt_{start,end} mark the start and the end of in.relaIplt. if (ElfSym::relaIpltStart && in.relaIplt->isNeeded()) { - ElfSym::relaIpltStart->section = in.relaIplt.get(); - ElfSym::relaIpltEnd->section = in.relaIplt.get(); + ElfSym::relaIpltStart->section = in.relaIplt; + ElfSym::relaIpltEnd->section = in.relaIplt; ElfSym::relaIpltEnd->value = in.relaIplt->getSize(); } @@ -1650,9 +1644,6 @@ static void finalizeSynthetic(SyntheticSection *sec) { sec->finalizeContents(); } } -template static void finalizeSynthetic(std::unique_ptr &sec) { - finalizeSynthetic(sec.get()); -} // We need to generate and finalize the content that depends on the address of // InputSections. As the generation of the content may also alter InputSection @@ -1889,9 +1880,9 @@ template void Writer::finalizeSections() { // Even the author of gold doesn't remember why gold behaves that way. // https://sourceware.org/ml/binutils/2002-03/msg00360.html if (mainPart->dynamic->parent) - symtab->addSymbol( - Defined{/*file=*/nullptr, "_DYNAMIC", STB_WEAK, STV_HIDDEN, STT_NOTYPE, - /*value=*/0, /*size=*/0, mainPart->dynamic.get()}); + symtab->addSymbol(Defined{/*file=*/nullptr, "_DYNAMIC", STB_WEAK, + STV_HIDDEN, STT_NOTYPE, + /*value=*/0, /*size=*/0, mainPart->dynamic}); // Define __rel[a]_iplt_{start,end} symbols if needed. addRelIpltSymbols(); From 8043beb8901cb95fce4266b498262acf08494510 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 22 Dec 2021 17:23:10 +0100 Subject: [PATCH 121/293] [JSONNodeDumper] Do not print mangled names for local variables (PR49111) Mangled names are not meaningful for variables with local storage, and may not be well defined (getting the mangled name for VLA crashes the mangler). As such, do not include them in the JSON dump. This allows running update_cc_test_checks on some OpenMP tests again. Fixes https://github.com/llvm/llvm-project/issues/49111. Differential Revision: https://reviews.llvm.org/D116169 --- clang/lib/AST/JSONNodeDumper.cpp | 17 ++-- clang/test/AST/ast-dump-decl-json.c | 5 -- clang/test/AST/ast-dump-decl-json.m | 16 ---- clang/test/AST/ast-dump-expr-json.c | 10 --- clang/test/AST/ast-dump-expr-json.cpp | 18 ----- clang/test/AST/ast-dump-expr-json.m | 9 --- clang/test/AST/ast-dump-funcs-json.cpp | 4 - clang/test/AST/ast-dump-if-json.cpp | 2 - clang/test/AST/ast-dump-stmt-json.c | 81 ++++++++++++++++--- clang/test/AST/ast-dump-stmt-json.cpp | 29 ------- clang/test/AST/ast-dump-stmt-json.m | 4 - .../test/AST/ast-dump-template-decls-json.cpp | 1 - 12 files changed, 82 insertions(+), 114 deletions(-) diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp index 86879b8c3533..ae585def4d07 100644 --- a/clang/lib/AST/JSONNodeDumper.cpp +++ b/clang/lib/AST/JSONNodeDumper.cpp @@ -744,11 +744,18 @@ void JSONNodeDumper::VisitNamedDecl(const NamedDecl *ND) { JOS.attribute("name", ND->getNameAsString()); // FIXME: There are likely other contexts in which it makes no sense to ask // for a mangled name. - if (!isa(ND->getDeclContext())) { - std::string MangledName = ASTNameGen.getName(ND); - if (!MangledName.empty()) - JOS.attribute("mangledName", MangledName); - } + if (isa(ND->getDeclContext())) + return; + + // Mangled names are not meaningful for locals, and may not be well-defined + // in the case of VLAs. + auto *VD = dyn_cast(ND); + if (VD && VD->hasLocalStorage()) + return; + + std::string MangledName = ASTNameGen.getName(ND); + if (!MangledName.empty()) + JOS.attribute("mangledName", MangledName); } } diff --git a/clang/test/AST/ast-dump-decl-json.c b/clang/test/AST/ast-dump-decl-json.c index 7bc37a76b800..c958a624c314 100644 --- a/clang/test/AST/ast-dump-decl-json.c +++ b/clang/test/AST/ast-dump-decl-json.c @@ -1178,7 +1178,6 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "x", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -1204,7 +1203,6 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "y", -// CHECK-NEXT: "mangledName": "y", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "enum (unnamed at {{.*}}:69:29)", // CHECK-NEXT: "qualType": "enum (unnamed enum at {{.*}}:69:29)" @@ -1352,7 +1350,6 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "x", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "enum Enum", // CHECK-NEXT: "qualType": "enum Enum" @@ -1523,7 +1520,6 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "x", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -1888,7 +1884,6 @@ void testParmVarDecl(int TestParmVarDecl); // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "TestParmVarDecl", -// CHECK-NEXT: "mangledName": "TestParmVarDecl", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } diff --git a/clang/test/AST/ast-dump-decl-json.m b/clang/test/AST/ast-dump-decl-json.m index 9778ff5bb6ac..dbe20d8edc73 100644 --- a/clang/test/AST/ast-dump-decl-json.m +++ b/clang/test/AST/ast-dump-decl-json.m @@ -344,7 +344,6 @@ void f() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "i", -// CHECK-NEXT: "mangledName": "_i", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -393,7 +392,6 @@ void f() { // CHECK-NEXT: }, // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "name": "self", -// CHECK-NEXT: "mangledName": "_self", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "testObjCMethodDecl *" // CHECK-NEXT: } @@ -408,7 +406,6 @@ void f() { // CHECK-NEXT: }, // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "name": "_cmd", -// CHECK-NEXT: "mangledName": "__cmd", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "SEL *", // CHECK-NEXT: "qualType": "SEL", @@ -437,7 +434,6 @@ void f() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "i", -// CHECK-NEXT: "mangledName": "_i", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -735,7 +731,6 @@ void f() { // CHECK-NEXT: }, // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "name": "self", -// CHECK-NEXT: "mangledName": "_self", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "TestObjCClass *" // CHECK-NEXT: } @@ -750,7 +745,6 @@ void f() { // CHECK-NEXT: }, // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "name": "_cmd", -// CHECK-NEXT: "mangledName": "__cmd", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "SEL *", // CHECK-NEXT: "qualType": "SEL", @@ -999,7 +993,6 @@ void f() { // CHECK-NEXT: }, // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "name": "self", -// CHECK-NEXT: "mangledName": "_self", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "TestObjCClass *" // CHECK-NEXT: } @@ -1014,7 +1007,6 @@ void f() { // CHECK-NEXT: }, // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "name": "_cmd", -// CHECK-NEXT: "mangledName": "__cmd", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "SEL *", // CHECK-NEXT: "qualType": "SEL", @@ -1258,7 +1250,6 @@ void f() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "foo", -// CHECK-NEXT: "mangledName": "_foo", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -1343,7 +1334,6 @@ void f() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "bar", -// CHECK-NEXT: "mangledName": "_bar", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -1593,7 +1583,6 @@ void f() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "foo", -// CHECK-NEXT: "mangledName": "_foo", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -1679,7 +1668,6 @@ void f() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "bar", -// CHECK-NEXT: "mangledName": "_bar", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -1741,7 +1729,6 @@ void f() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "_x", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -1854,7 +1841,6 @@ void f() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "y", -// CHECK-NEXT: "mangledName": "_y", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -1960,7 +1946,6 @@ void f() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "y", -// CHECK-NEXT: "mangledName": "_y", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -1999,7 +1984,6 @@ void f() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "Test", -// CHECK-NEXT: "mangledName": "_Test", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "int", // CHECK-NEXT: "qualType": "typeof (B.foo)" diff --git a/clang/test/AST/ast-dump-expr-json.c b/clang/test/AST/ast-dump-expr-json.c index a018066ac514..6409ed5fe216 100644 --- a/clang/test/AST/ast-dump-expr-json.c +++ b/clang/test/AST/ast-dump-expr-json.c @@ -322,7 +322,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -582,7 +581,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -1017,7 +1015,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -1044,7 +1041,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "b", -// CHECK-NEXT: "mangledName": "b", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -3391,7 +3387,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -3418,7 +3413,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "b", -// CHECK-NEXT: "mangledName": "b", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: } @@ -4059,7 +4053,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: } @@ -4086,7 +4079,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "b", -// CHECK-NEXT: "mangledName": "b", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "struct S", // CHECK-NEXT: "qualType": "struct S" @@ -4114,7 +4106,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "c", -// CHECK-NEXT: "mangledName": "c", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "struct S *" // CHECK-NEXT: } @@ -4957,7 +4948,6 @@ void PrimaryExpressions(int a) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } diff --git a/clang/test/AST/ast-dump-expr-json.cpp b/clang/test/AST/ast-dump-expr-json.cpp index 110dfd70ebad..47787d5da667 100644 --- a/clang/test/AST/ast-dump-expr-json.cpp +++ b/clang/test/AST/ast-dump-expr-json.cpp @@ -324,7 +324,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "obj1", -// CHECK-NEXT: "mangledName": "_ZZ19TestPointerToMember1SPS_MS_iMS_FviEE4obj1", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: } @@ -351,7 +350,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "obj2", -// CHECK-NEXT: "mangledName": "_ZZ19TestPointerToMember1SPS_MS_iMS_FviEE4obj2", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "S *" // CHECK-NEXT: } @@ -378,7 +376,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "data", -// CHECK-NEXT: "mangledName": "_ZZ19TestPointerToMember1SPS_MS_iMS_FviEE4data", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int S::*" // CHECK-NEXT: } @@ -405,7 +402,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "call", -// CHECK-NEXT: "mangledName": "_ZZ19TestPointerToMember1SPS_MS_iMS_FviEE4call", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "void (S::*)(int)" // CHECK-NEXT: } @@ -1072,7 +1068,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "s", -// CHECK-NEXT: "mangledName": "_ZZ11TestCastingPK1SE1s", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "const S *" // CHECK-NEXT: } @@ -1512,7 +1507,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isReferenced": true, // CHECK-NEXT: "name": "p", -// CHECK-NEXT: "mangledName": "_ZZ20TestUnaryExpressionsPiE1p", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: } @@ -2539,7 +2533,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "_ZZ22TestPostfixExpressions1SPS_P1UIiEE1a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "S" // CHECK-NEXT: } @@ -2566,7 +2559,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "p", -// CHECK-NEXT: "mangledName": "_ZZ22TestPostfixExpressions1SPS_P1UIiEE1p", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "S *" // CHECK-NEXT: } @@ -2593,7 +2585,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "r", -// CHECK-NEXT: "mangledName": "_ZZ22TestPostfixExpressions1SPS_P1UIiEE1r", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "U *" // CHECK-NEXT: } @@ -3669,7 +3660,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isReferenced": true, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "_ZZ22TestPrimaryExpressionsDpT_E1a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "Ts..." // CHECK-NEXT: }, @@ -4369,7 +4359,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isReferenced": true, // CHECK-NEXT: "name": "b", -// CHECK-NEXT: "mangledName": "_ZZ22TestPrimaryExpressionsDpT_E1b", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -4396,7 +4385,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isReferenced": true, // CHECK-NEXT: "name": "c", -// CHECK-NEXT: "mangledName": "_ZZ22TestPrimaryExpressionsDpT_E1c", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -4735,7 +4723,6 @@ void TestNonADLCall3() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "_ZZZ22TestPrimaryExpressionsDpT_ENKUlizE_clEizE1a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -4836,7 +4823,6 @@ void TestNonADLCall3() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "_ZZZ22TestPrimaryExpressionsDpT_ENUlizE_8__invokeEizE1a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -7851,7 +7837,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "_ZZ11TestADLCallvE1x", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" @@ -8290,7 +8275,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "_ZZ14TestNonADLCallvE1x", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" @@ -8572,7 +8556,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "_ZZ15TestNonADLCall2vE1x", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "NS::X", // CHECK-NEXT: "qualType": "NS::X" @@ -9056,7 +9039,6 @@ void TestNonADLCall3() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "_ZZN19test_adl_call_three15TestNonADLCall3EvE1x", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "NS::X" // CHECK-NEXT: }, diff --git a/clang/test/AST/ast-dump-expr-json.m b/clang/test/AST/ast-dump-expr-json.m index aff2d279916b..cc63b1ffd249 100644 --- a/clang/test/AST/ast-dump-expr-json.m +++ b/clang/test/AST/ast-dump-expr-json.m @@ -296,7 +296,6 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "Obj", -// CHECK-NEXT: "mangledName": "Obj", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "I *" // CHECK-NEXT: } @@ -673,7 +672,6 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "s", -// CHECK-NEXT: "mangledName": "s", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "SEL *", // CHECK-NEXT: "qualType": "SEL", @@ -765,7 +763,6 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "Obj", -// CHECK-NEXT: "mangledName": "Obj", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", @@ -953,7 +950,6 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "Obj", -// CHECK-NEXT: "mangledName": "Obj", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "J *" // CHECK-NEXT: } @@ -1593,7 +1589,6 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "i", -// CHECK-NEXT: "mangledName": "i", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -1977,7 +1972,6 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "Array", -// CHECK-NEXT: "mangledName": "Array", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "NSMutableArray *" // CHECK-NEXT: } @@ -2004,7 +1998,6 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "Dict", -// CHECK-NEXT: "mangledName": "Dict", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "NSMutableDictionary *" // CHECK-NEXT: } @@ -2870,7 +2863,6 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "i", -// CHECK-NEXT: "mangledName": "i", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "id", // CHECK-NEXT: "qualType": "id", @@ -4826,7 +4818,6 @@ void TestObjCBoolLiteral() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "Ptr", -// CHECK-NEXT: "mangledName": "Ptr", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "I *" // CHECK-NEXT: } diff --git a/clang/test/AST/ast-dump-funcs-json.cpp b/clang/test/AST/ast-dump-funcs-json.cpp index ecc2701c400d..45216274d5df 100644 --- a/clang/test/AST/ast-dump-funcs-json.cpp +++ b/clang/test/AST/ast-dump-funcs-json.cpp @@ -628,7 +628,6 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "_ZZ5Test3iiE1a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -654,7 +653,6 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "b", -// CHECK-NEXT: "mangledName": "_ZZ5Test3iiE1b", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -711,7 +709,6 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "_ZZ5Test4iiE1a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -737,7 +734,6 @@ int main() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "b", -// CHECK-NEXT: "mangledName": "_ZZ5Test4iiE1b", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, diff --git a/clang/test/AST/ast-dump-if-json.cpp b/clang/test/AST/ast-dump-if-json.cpp index 6c8065b54e33..3f3817b88ba4 100644 --- a/clang/test/AST/ast-dump-if-json.cpp +++ b/clang/test/AST/ast-dump-if-json.cpp @@ -711,7 +711,6 @@ void func(int val) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "i", -// CHECK-NEXT: "mangledName": "_ZZ4funciE1i", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -894,7 +893,6 @@ void func(int val) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "i", -// CHECK-NEXT: "mangledName": "_ZZ4funciE1i_0", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, diff --git a/clang/test/AST/ast-dump-stmt-json.c b/clang/test/AST/ast-dump-stmt-json.c index 2990cfa64079..899bdc8e8580 100644 --- a/clang/test/AST/ast-dump-stmt-json.c +++ b/clang/test/AST/ast-dump-stmt-json.c @@ -147,6 +147,10 @@ void TestLineNumbers(void) { #undef FOO } +void TestVLA(int n) { + double vla[n]; +} + // NOTE: CHECK lines have been autogenerated by gen_ast_dump_json_test.py // using --filters=VarDecl,CompoundStmt @@ -377,7 +381,6 @@ void TestLineNumbers(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "x", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -446,7 +449,6 @@ void TestLineNumbers(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "y", -// CHECK-NEXT: "mangledName": "y", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -472,7 +474,6 @@ void TestLineNumbers(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "z", -// CHECK-NEXT: "mangledName": "z", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -720,7 +721,6 @@ void TestLineNumbers(void) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "T1", -// CHECK-NEXT: "mangledName": "T1", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "char" // CHECK-NEXT: }, @@ -813,7 +813,6 @@ void TestLineNumbers(void) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "T2", -// CHECK-NEXT: "mangledName": "T2", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -1937,7 +1936,6 @@ void TestLineNumbers(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "j", -// CHECK-NEXT: "mangledName": "j", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -2221,7 +2219,6 @@ void TestLineNumbers(void) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "ptr", -// CHECK-NEXT: "mangledName": "ptr", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "void *" // CHECK-NEXT: }, @@ -3883,7 +3880,6 @@ void TestLineNumbers(void) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "i", -// CHECK-NEXT: "mangledName": "i", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -5179,7 +5175,6 @@ void TestLineNumbers(void) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -5325,7 +5320,6 @@ void TestLineNumbers(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -5371,7 +5365,6 @@ void TestLineNumbers(void) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "b", -// CHECK-NEXT: "mangledName": "b", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -5419,3 +5412,69 @@ void TestLineNumbers(void) { // CHECK-NEXT: } // CHECK-NEXT: ] // CHECK-NEXT: } + + +// CHECK-NOT: {{^}}Dumping +// CHECK: "kind": "CompoundStmt", +// CHECK-NEXT: "range": { +// CHECK-NEXT: "begin": { +// CHECK-NEXT: "offset": 1683, +// CHECK-NEXT: "col": 21, +// CHECK-NEXT: "tokLen": 1 +// CHECK-NEXT: }, +// CHECK-NEXT: "end": { +// CHECK-NEXT: "offset": 1702, +// CHECK-NEXT: "line": 152, +// CHECK-NEXT: "presumedLine": 200007, +// CHECK-NEXT: "col": 1, +// CHECK-NEXT: "tokLen": 1 +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: "inner": [ +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "DeclStmt", +// CHECK-NEXT: "range": { +// CHECK-NEXT: "begin": { +// CHECK-NEXT: "offset": 1687, +// CHECK-NEXT: "line": 151, +// CHECK-NEXT: "presumedLine": 200006, +// CHECK-NEXT: "col": 3, +// CHECK-NEXT: "tokLen": 6 +// CHECK-NEXT: }, +// CHECK-NEXT: "end": { +// CHECK-NEXT: "offset": 1700, +// CHECK-NEXT: "col": 16, +// CHECK-NEXT: "tokLen": 1 +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: "inner": [ +// CHECK-NEXT: { +// CHECK-NEXT: "id": "0x{{.*}}", +// CHECK-NEXT: "kind": "VarDecl", +// CHECK-NEXT: "loc": { +// CHECK-NEXT: "offset": 1694, +// CHECK-NEXT: "col": 10, +// CHECK-NEXT: "tokLen": 3 +// CHECK-NEXT: }, +// CHECK-NEXT: "range": { +// CHECK-NEXT: "begin": { +// CHECK-NEXT: "offset": 1687, +// CHECK-NEXT: "col": 3, +// CHECK-NEXT: "tokLen": 6 +// CHECK-NEXT: }, +// CHECK-NEXT: "end": { +// CHECK-NEXT: "offset": 1699, +// CHECK-NEXT: "col": 15, +// CHECK-NEXT: "tokLen": 1 +// CHECK-NEXT: } +// CHECK-NEXT: }, +// CHECK-NEXT: "name": "vla", +// CHECK-NEXT: "type": { +// CHECK-NEXT: "qualType": "double[n]" +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: } +// CHECK-NEXT: ] +// CHECK-NEXT: } diff --git a/clang/test/AST/ast-dump-stmt-json.cpp b/clang/test/AST/ast-dump-stmt-json.cpp index 2283f38eebd5..5284553448a5 100644 --- a/clang/test/AST/ast-dump-stmt-json.cpp +++ b/clang/test/AST/ast-dump-stmt-json.cpp @@ -366,7 +366,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "f", -// CHECK-NEXT: "mangledName": "_ZZ12TestFunctionvE1f", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "void (*)()" // CHECK-NEXT: }, @@ -639,7 +638,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "_ZZ10TestCatch1vE1x", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -885,7 +883,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "p", -// CHECK-NEXT: "mangledName": "_ZZ19TestAllocationExprsvE1p", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int *" // CHECK-NEXT: } @@ -2036,7 +2033,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isReferenced": true, // CHECK-NEXT: "name": "p", -// CHECK-NEXT: "mangledName": "_ZZ27TestDependentAllocationExprvE1p", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "T *" // CHECK-NEXT: }, @@ -2260,7 +2256,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isReferenced": true, // CHECK-NEXT: "name": "obj", -// CHECK-NEXT: "mangledName": "_ZZ28TestDependentScopeMemberExprvE3obj", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "DependentScopeMemberExprWrapper" // CHECK-NEXT: } @@ -2768,7 +2763,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isReferenced": true, // CHECK-NEXT: "name": "obj", -// CHECK-NEXT: "mangledName": "_ZZ36TestDependentScopeTemplateMemberExprvE3obj", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "OtherDependentScopeMemberExprWrapper" // CHECK-NEXT: } @@ -2984,7 +2978,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "us", -// CHECK-NEXT: "mangledName": "_ZZ17TestUnionInitListvE2us", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "U[3]" // CHECK-NEXT: }, @@ -3151,7 +3144,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "i", -// CHECK-NEXT: "mangledName": "_ZZ10TestSwitchiE1i", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -3230,7 +3222,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "_ZZ10TestSwitchiE1a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -3363,7 +3354,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isReferenced": true, // CHECK-NEXT: "name": "b", -// CHECK-NEXT: "mangledName": "_ZZ6TestIfbE1b", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "bool" // CHECK-NEXT: } @@ -3443,7 +3433,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isReferenced": true, // CHECK-NEXT: "name": "i", -// CHECK-NEXT: "mangledName": "_ZZ6TestIfbE1i", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "const int" // CHECK-NEXT: }, @@ -4100,7 +4089,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "i", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE1i", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -4169,7 +4157,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "j", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE1j", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -4414,7 +4401,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "vals", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE4vals", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int[10]" // CHECK-NEXT: } @@ -4480,7 +4466,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__range1", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE8__range1", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int (&)[10]" // CHECK-NEXT: }, @@ -4557,7 +4542,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__begin1", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE8__begin1", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" @@ -4658,7 +4642,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__end1", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE6__end1", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" @@ -4987,7 +4970,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "v", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE1v", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -5157,7 +5139,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "C", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE1C", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "Container" // CHECK-NEXT: }, @@ -5251,7 +5232,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__range1", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE8__range1_0", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "Container &" // CHECK-NEXT: }, @@ -5328,7 +5308,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__begin1", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE8__begin1_0", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" @@ -5476,7 +5455,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__end1", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE6__end1_0", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" @@ -5808,7 +5786,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "v", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE1v_0", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -5995,7 +5972,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE1a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -6041,7 +6017,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__range1", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE8__range1_1", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int (&)[10]" // CHECK-NEXT: }, @@ -6118,7 +6093,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__begin1", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE8__begin1_1", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" @@ -6219,7 +6193,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "__end1", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE6__end1_1", // CHECK-NEXT: "type": { // CHECK-NEXT: "desugaredQualType": "int *", // CHECK-NEXT: "qualType": "int *" @@ -6548,7 +6521,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "v", -// CHECK-NEXT: "mangledName": "_ZZ13TestIterationvE1v_1", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, @@ -6786,7 +6758,6 @@ void TestDependentGenericSelectionExpr(Ty T) { // CHECK-NEXT: }, // CHECK-NEXT: "isReferenced": true, // CHECK-NEXT: "name": "T", -// CHECK-NEXT: "mangledName": "_ZZ33TestDependentGenericSelectionExprT_E1T", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "Ty" // CHECK-NEXT: } diff --git a/clang/test/AST/ast-dump-stmt-json.m b/clang/test/AST/ast-dump-stmt-json.m index 333bdd529f97..c1898bd13d65 100644 --- a/clang/test/AST/ast-dump-stmt-json.m +++ b/clang/test/AST/ast-dump-stmt-json.m @@ -73,7 +73,6 @@ void TestObjCAtCatchStmt() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "x", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -307,7 +306,6 @@ void TestObjCAtCatchStmt() { // CHECK-NEXT: }, // CHECK-NEXT: "isUsed": true, // CHECK-NEXT: "name": "x", -// CHECK-NEXT: "mangledName": "x", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: } @@ -611,7 +609,6 @@ void TestObjCAtCatchStmt() { // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "a", -// CHECK-NEXT: "mangledName": "a", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "A *" // CHECK-NEXT: } @@ -758,7 +755,6 @@ void TestObjCAtCatchStmt() { // CHECK-NEXT: }, // CHECK-NEXT: "isImplicit": true, // CHECK-NEXT: "name": "__context", -// CHECK-NEXT: "mangledName": "__context", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "struct (unnamed at {{.*}}:18:14) *" // CHECK-NEXT: } diff --git a/clang/test/AST/ast-dump-template-decls-json.cpp b/clang/test/AST/ast-dump-template-decls-json.cpp index 9871f6dd9b6e..fc1b883f5dac 100644 --- a/clang/test/AST/ast-dump-template-decls-json.cpp +++ b/clang/test/AST/ast-dump-template-decls-json.cpp @@ -1040,7 +1040,6 @@ void i(); // CHECK-NEXT: } // CHECK-NEXT: }, // CHECK-NEXT: "name": "i", -// CHECK-NEXT: "mangledName": "_ZZ1fiE1i", // CHECK-NEXT: "type": { // CHECK-NEXT: "qualType": "int" // CHECK-NEXT: }, From 1201a0f3955b7b564db3e5a913a7ec9255f9275d Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 22 Dec 2021 16:30:22 +0100 Subject: [PATCH 122/293] [OpenMP] Fix incorrect type when casting from uintptr MakeNaturalAlignAddrLValue() expects the pointee type, but the pointer type was passed. As a result, the natural alignment of the pointer (usually 8) was always used in place of the natural alignment of the value type. Differential Revision: https://reviews.llvm.org/D116171 --- clang/lib/CodeGen/CGStmtOpenMP.cpp | 3 +- .../test/OpenMP/debug-info-complex-byval.cpp | 2 +- .../distribute_parallel_for_codegen.cpp | 24 +- ...bute_parallel_for_firstprivate_codegen.cpp | 16 +- ...te_parallel_for_reduction_task_codegen.cpp | 4 +- .../distribute_parallel_for_simd_codegen.cpp | 24 +- ...parallel_for_simd_firstprivate_codegen.cpp | 16 +- ...istribute_parallel_for_simd_if_codegen.cpp | 16 +- ...stribute_parallel_generic_mode_codegen.cpp | 4 +- clang/test/OpenMP/nvptx_parallel_codegen.cpp | 20 +- .../OpenMP/nvptx_parallel_for_codegen.cpp | 2 +- clang/test/OpenMP/nvptx_target_codegen.cpp | 74 +- ...tx_target_parallel_num_threads_codegen.cpp | 4 +- .../test/OpenMP/nvptx_target_printf_codegen.c | 2 +- .../OpenMP/nvptx_target_teams_codegen.cpp | 30 +- ..._teams_distribute_parallel_for_codegen.cpp | 112 +-- ...bute_parallel_for_generic_mode_codegen.cpp | 28 +- ...s_distribute_parallel_for_simd_codegen.cpp | 36 +- clang/test/OpenMP/nvptx_teams_codegen.cpp | 4 +- .../OpenMP/nvptx_teams_reduction_codegen.cpp | 8 +- .../OpenMP/parallel_firstprivate_codegen.cpp | 36 +- clang/test/OpenMP/parallel_for_codegen.cpp | 24 +- clang/test/OpenMP/parallel_master_codegen.cpp | 24 +- .../parallel_master_taskloop_codegen.cpp | 20 +- .../parallel_master_taskloop_simd_codegen.cpp | 44 +- clang/test/OpenMP/single_codegen.cpp | 20 +- .../OpenMP/target_codegen_global_capture.cpp | 108 +-- clang/test/OpenMP/target_parallel_codegen.cpp | 728 +++++++-------- .../OpenMP/target_parallel_debug_codegen.cpp | 8 +- .../OpenMP/target_parallel_for_codegen.cpp | 696 +++++++------- .../target_parallel_for_debug_codegen.cpp | 10 +- .../target_parallel_for_simd_codegen.cpp | 752 +++++++-------- .../OpenMP/target_parallel_if_codegen.cpp | 288 +++--- .../target_parallel_num_threads_codegen.cpp | 224 ++--- clang/test/OpenMP/target_teams_codegen.cpp | 840 ++++++++--------- .../target_teams_distribute_codegen.cpp | 872 +++++++++--------- ...rget_teams_distribute_collapse_codegen.cpp | 16 +- ...teams_distribute_dist_schedule_codegen.cpp | 32 +- ..._teams_distribute_firstprivate_codegen.cpp | 44 +- ...t_teams_distribute_lastprivate_codegen.cpp | 40 +- ..._teams_distribute_parallel_for_codegen.cpp | 100 +- ...stribute_parallel_for_collapse_codegen.cpp | 32 +- ...ute_parallel_for_dist_schedule_codegen.cpp | 72 +- ...bute_parallel_for_firstprivate_codegen.cpp | 116 +-- ...ams_distribute_parallel_for_if_codegen.cpp | 64 +- ...ibute_parallel_for_lastprivate_codegen.cpp | 60 +- ...stribute_parallel_for_schedule_codegen.cpp | 256 ++--- ...s_distribute_parallel_for_simd_codegen.cpp | 120 +-- ...ute_parallel_for_simd_collapse_codegen.cpp | 32 +- ...arallel_for_simd_dist_schedule_codegen.cpp | 72 +- ...parallel_for_simd_firstprivate_codegen.cpp | 116 +-- ...istribute_parallel_for_simd_if_codegen.cpp | 160 ++-- ..._parallel_for_simd_lastprivate_codegen.cpp | 60 +- ...ute_parallel_for_simd_schedule_codegen.cpp | 256 ++--- .../target_teams_distribute_simd_codegen.cpp | 872 +++++++++--------- ...teams_distribute_simd_collapse_codegen.cpp | 16 +- ..._distribute_simd_dist_schedule_codegen.cpp | 32 +- ...s_distribute_simd_firstprivate_codegen.cpp | 44 +- ...ms_distribute_simd_lastprivate_codegen.cpp | 40 +- .../OpenMP/target_teams_num_teams_codegen.cpp | 224 ++--- .../target_teams_thread_limit_codegen.cpp | 240 ++--- clang/test/OpenMP/teams_codegen.cpp | 36 +- .../test/OpenMP/teams_distribute_codegen.cpp | 16 +- ...teams_distribute_dist_schedule_codegen.cpp | 8 +- .../teams_distribute_firstprivate_codegen.cpp | 44 +- .../teams_distribute_parallel_for_codegen.cpp | 16 +- ...ute_parallel_for_dist_schedule_codegen.cpp | 24 +- ...bute_parallel_for_firstprivate_codegen.cpp | 64 +- ...ams_distribute_parallel_for_if_codegen.cpp | 64 +- ...ibute_parallel_for_num_threads_codegen.cpp | 24 +- ...stribute_parallel_for_schedule_codegen.cpp | 96 +- ...s_distribute_parallel_for_simd_codegen.cpp | 16 +- ...arallel_for_simd_dist_schedule_codegen.cpp | 24 +- ...parallel_for_simd_firstprivate_codegen.cpp | 64 +- ...istribute_parallel_for_simd_if_codegen.cpp | 112 +-- ..._parallel_for_simd_num_threads_codegen.cpp | 24 +- ...ute_parallel_for_simd_schedule_codegen.cpp | 96 +- .../OpenMP/teams_distribute_simd_codegen.cpp | 44 +- ..._distribute_simd_dist_schedule_codegen.cpp | 8 +- ...s_distribute_simd_firstprivate_codegen.cpp | 44 +- .../OpenMP/teams_firstprivate_codegen.cpp | 28 +- .../Inputs/generated-funcs-regex.c.expected | 2 +- 82 files changed, 4546 insertions(+), 4547 deletions(-) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 5677fce355d5..1bc086ffb1c6 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -378,8 +378,7 @@ static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, AddrLV.getAddress(CGF).getPointer(), Ctx.getUIntPtrType(), Ctx.getPointerType(DstType), Loc); Address TmpAddr = - CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType)) - .getAddress(CGF); + CGF.MakeNaturalAlignAddrLValue(CastedPtr, DstType).getAddress(CGF); return TmpAddr; } diff --git a/clang/test/OpenMP/debug-info-complex-byval.cpp b/clang/test/OpenMP/debug-info-complex-byval.cpp index cc39d65bc414..c66cc0b384da 100644 --- a/clang/test/OpenMP/debug-info-complex-byval.cpp +++ b/clang/test/OpenMP/debug-info-complex-byval.cpp @@ -57,7 +57,7 @@ void a() { // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG38]] // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG38]] // CHECK1-NEXT: [[TMP2:%.*]] = bitcast { float, float }* [[CONV]] to <2 x float>*, !dbg [[DBG38]] -// CHECK1-NEXT: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 8, !dbg [[DBG38]] +// CHECK1-NEXT: [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 4, !dbg [[DBG38]] // CHECK1-NEXT: call void @.omp_outlined._debug__(i32* [[TMP0]], i32* [[TMP1]], <2 x float> [[TMP3]]) #[[ATTR4:[0-9]+]], !dbg [[DBG38]] // CHECK1-NEXT: ret void, !dbg [[DBG38]] // diff --git a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp index d1a3f33c33d9..1ed8db15f979 100644 --- a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp @@ -1969,7 +1969,7 @@ int main() { // CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -2482,7 +2482,7 @@ int main() { // CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -3761,7 +3761,7 @@ int main() { // CHECK2-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -4274,7 +4274,7 @@ int main() { // CHECK2-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -9456,7 +9456,7 @@ int main() { // CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -9949,7 +9949,7 @@ int main() { // CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -11673,7 +11673,7 @@ int main() { // CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -12166,7 +12166,7 @@ int main() { // CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -13885,7 +13885,7 @@ int main() { // CHECK10-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -14378,7 +14378,7 @@ int main() { // CHECK10-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -16102,7 +16102,7 @@ int main() { // CHECK10-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -16595,7 +16595,7 @@ int main() { // CHECK10-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 diff --git a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp index 9982bb790447..4eda6d07645b 100644 --- a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp @@ -489,8 +489,8 @@ int main() { // CHECK1-NEXT: store double 1.000000e+00, double* [[CONV]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = load double*, double** [[TMP]], align 8 // CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP10]], align 8 -// CHECK1-NEXT: store i32 3, i32* [[CONV2]], align 8 -// CHECK1-NEXT: store float 4.000000e+00, float* [[CONV3]], align 8 +// CHECK1-NEXT: store i32 3, i32* [[CONV2]], align 4 +// CHECK1-NEXT: store float 4.000000e+00, float* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK1-NEXT: store double* [[CONV]], double** [[TMP11]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -749,8 +749,8 @@ int main() { // CHECK2-NEXT: store double 1.000000e+00, double* [[CONV]], align 8 // CHECK2-NEXT: [[TMP10:%.*]] = load double*, double** [[TMP]], align 8 // CHECK2-NEXT: store volatile double 1.000000e+00, double* [[TMP10]], align 8 -// CHECK2-NEXT: store i32 3, i32* [[CONV2]], align 8 -// CHECK2-NEXT: store float 4.000000e+00, float* [[CONV3]], align 8 +// CHECK2-NEXT: store i32 3, i32* [[CONV2]], align 4 +// CHECK2-NEXT: store float 4.000000e+00, float* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK2-NEXT: store double* [[CONV]], double** [[TMP11]], align 8 // CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -1702,7 +1702,7 @@ int main() { // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 // CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC5]], i64 0, i64 [[IDXPROM]] @@ -2150,7 +2150,7 @@ int main() { // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 // CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -2640,7 +2640,7 @@ int main() { // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC5]], i64 0, i64 [[IDXPROM]] @@ -3088,7 +3088,7 @@ int main() { // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] diff --git a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp index dec264ff9ea1..21c59d5ae125 100644 --- a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp @@ -62,7 +62,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 @@ -681,7 +681,7 @@ int main(int argc, char **argv) { // CHECK2-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 // CHECK2-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp index 60b4685ed9b4..9c21a8789bc0 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp @@ -2077,7 +2077,7 @@ int main() { // CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -2638,7 +2638,7 @@ int main() { // CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -4037,7 +4037,7 @@ int main() { // CHECK2-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -4598,7 +4598,7 @@ int main() { // CHECK2-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -10344,7 +10344,7 @@ int main() { // CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -10885,7 +10885,7 @@ int main() { // CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -12714,7 +12714,7 @@ int main() { // CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -13255,7 +13255,7 @@ int main() { // CHECK9-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -15094,7 +15094,7 @@ int main() { // CHECK10-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -15635,7 +15635,7 @@ int main() { // CHECK10-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -17464,7 +17464,7 @@ int main() { // CHECK10-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP12]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP10]]) @@ -18005,7 +18005,7 @@ int main() { // CHECK10-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp index 56640954d514..887117773d6e 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -495,8 +495,8 @@ int main() { // CHECK1-NEXT: store double 1.000000e+00, double* [[CONV]], align 8, !llvm.access.group !8 // CHECK1-NEXT: [[TMP10:%.*]] = load double*, double** [[TMP]], align 8, !llvm.access.group !8 // CHECK1-NEXT: store volatile double 1.000000e+00, double* [[TMP10]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: store i32 3, i32* [[CONV2]], align 8, !llvm.access.group !8 -// CHECK1-NEXT: store float 4.000000e+00, float* [[CONV3]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: store i32 3, i32* [[CONV2]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: store float 4.000000e+00, float* [[CONV3]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK1-NEXT: store double* [[CONV]], double** [[TMP11]], align 8, !llvm.access.group !8 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -769,8 +769,8 @@ int main() { // CHECK2-NEXT: store double 1.000000e+00, double* [[CONV]], align 8, !llvm.access.group !8 // CHECK2-NEXT: [[TMP10:%.*]] = load double*, double** [[TMP]], align 8, !llvm.access.group !8 // CHECK2-NEXT: store volatile double 1.000000e+00, double* [[TMP10]], align 8, !llvm.access.group !8 -// CHECK2-NEXT: store i32 3, i32* [[CONV2]], align 8, !llvm.access.group !8 -// CHECK2-NEXT: store float 4.000000e+00, float* [[CONV3]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: store i32 3, i32* [[CONV2]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: store float 4.000000e+00, float* [[CONV3]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK2-NEXT: store double* [[CONV]], double** [[TMP11]], align 8, !llvm.access.group !8 // CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -1818,7 +1818,7 @@ int main() { // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC5]], i64 0, i64 [[IDXPROM]] @@ -2280,7 +2280,7 @@ int main() { // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !17 +// CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !17 // CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK8-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -2784,7 +2784,7 @@ int main() { // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC5]], i64 0, i64 [[IDXPROM]] @@ -3246,7 +3246,7 @@ int main() { // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp index 52d2c6cc7768..a5c1ca79380a 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp @@ -3817,7 +3817,7 @@ int main() { // CHECK3-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: @@ -3945,7 +3945,7 @@ int main() { // CHECK3-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: @@ -5465,7 +5465,7 @@ int main() { // CHECK4-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: @@ -5593,7 +5593,7 @@ int main() { // CHECK4-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: @@ -11083,7 +11083,7 @@ int main() { // CHECK11-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: @@ -11211,7 +11211,7 @@ int main() { // CHECK11-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: @@ -12731,7 +12731,7 @@ int main() { // CHECK12-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: @@ -12859,7 +12859,7 @@ int main() { // CHECK12-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: diff --git a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp index 287119f3f6a9..c8e95951a61f 100644 --- a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp @@ -2999,7 +2999,7 @@ int main(int argc, char **argv) { // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: // CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32* // CHECK4-NEXT: store i32 [[TMP6]], i32* [[CONV1]], align 4 // CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 @@ -3048,7 +3048,7 @@ int main(int argc, char **argv) { // CHECK4-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 8 // CHECK4-NEXT: [[C1:%.*]] = call i8* @__kmpc_alloc_shared(i64 40) // CHECK4-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C1]] to [10 x i32]* -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp index 15379282e853..5c33af387296 100644 --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -1457,9 +1457,9 @@ int bar(int n){ // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 0, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP3]], i64 0) // CHECK1-NEXT: [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS2]] to i8** // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** [[TMP4]], i64 0) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -1568,19 +1568,19 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 1000 // CHECK1-NEXT: [[TMP4:%.*]] = zext i1 [[CMP]] to i32 // CHECK1-NEXT: [[TMP5:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 [[TMP4]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP5]], i64 0) -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV1]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV1]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP7]] to i32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV2]], align 8 +// CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV2]], align 2 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], 1 @@ -1632,7 +1632,7 @@ int bar(int n){ // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[A1:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A1]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[A_ON_STACK]], align 4 @@ -1849,11 +1849,11 @@ int bar(int n){ // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP7]] to i32 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK2-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK2-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 diff --git a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp index fdd3cd6db8cf..b0738928a013 100644 --- a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp @@ -472,7 +472,7 @@ int bar(int n){ // CHECK-NEXT: [[D:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) // CHECK-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D]] to i32* // CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK-NEXT: store i32 [[TMP3]], i32* [[D_ON_STACK]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK-NEXT: [[TMP5:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp index 34fc9d94501f..c3a90d523110 100644 --- a/clang/test/OpenMP/nvptx_target_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_codegen.cpp @@ -215,16 +215,16 @@ void unreachable_call() { // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 2 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 8 +// CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -265,9 +265,9 @@ void unreachable_call() { // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP10:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = fpext float [[TMP10]] to double @@ -342,19 +342,19 @@ void unreachable_call() { // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP3]] to i32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i32 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK1-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK1-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK1-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP5]], 1 @@ -387,7 +387,7 @@ void unreachable_call() { // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP4]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP5]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -468,14 +468,14 @@ void unreachable_call() { // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP3]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP4]], 1 @@ -602,16 +602,16 @@ void unreachable_call() { // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK2-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK2-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 2 // CHECK2-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK2-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK2-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -730,16 +730,16 @@ void unreachable_call() { // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP3]] to i32 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK2-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK2-NEXT: [[CONV5:%.*]] = sext i8 [[TMP4]] to i32 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK2-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK2-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK2-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP5]], 1 @@ -854,11 +854,11 @@ void unreachable_call() { // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP3]] to i32 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK2-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK2-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP4]], 1 @@ -985,16 +985,16 @@ void unreachable_call() { // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 2 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -1113,16 +1113,16 @@ void unreachable_call() { // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP3]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK3-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK3-NEXT: [[CONV5:%.*]] = sext i8 [[TMP4]] to i32 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK3-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK3-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK3-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP5]], 1 @@ -1237,11 +1237,11 @@ void unreachable_call() { // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP3]] to i32 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP4]], 1 diff --git a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp index 5049500516f6..6e9385456492 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp @@ -108,7 +108,7 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP0]] to i8* // CHECK1-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 @@ -439,7 +439,7 @@ int bar(int n){ // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: // CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK4-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP0]] to i8* // CHECK4-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 diff --git a/clang/test/OpenMP/nvptx_target_printf_codegen.c b/clang/test/OpenMP/nvptx_target_printf_codegen.c index 99b48cc792b2..b3e258d30d77 100644 --- a/clang/test/OpenMP/nvptx_target_printf_codegen.c +++ b/clang/test/OpenMP/nvptx_target_printf_codegen.c @@ -93,7 +93,7 @@ void CheckAllocaIsInEntryBlock() { // CHECK-64-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK-64-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK-64: user_code.entry: -// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK-64-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK-64-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] // CHECK-64: if.then: diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp index fbe62f191eec..53ca71dbcd9b 100644 --- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp @@ -63,7 +63,7 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i8* // CHECK1-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -86,7 +86,7 @@ int bar(int n){ // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK1-NEXT: store i8 49, i8* [[CONV]], align 8 +// CHECK1-NEXT: store i8 49, i8* [[CONV]], align 1 // CHECK1-NEXT: ret void // // @@ -104,7 +104,7 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -127,7 +127,7 @@ int bar(int n){ // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: store i16 1, i16* [[CONV]], align 8 +// CHECK1-NEXT: store i16 1, i16* [[CONV]], align 2 // CHECK1-NEXT: ret void // // @@ -145,7 +145,7 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -228,7 +228,7 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[A_CASTED]] to i8* // CHECK2-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_CASTED]], align 4 @@ -251,7 +251,7 @@ int bar(int n){ // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[A_ADDR]] to i8* -// CHECK2-NEXT: store i8 49, i8* [[CONV]], align 4 +// CHECK2-NEXT: store i8 49, i8* [[CONV]], align 1 // CHECK2-NEXT: ret void // // @@ -269,7 +269,7 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -292,7 +292,7 @@ int bar(int n){ // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK2-NEXT: store i16 1, i16* [[CONV]], align 4 +// CHECK2-NEXT: store i16 1, i16* [[CONV]], align 2 // CHECK2-NEXT: ret void // // @@ -310,7 +310,7 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -393,7 +393,7 @@ int bar(int n){ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[A_CASTED]] to i8* // CHECK3-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_CASTED]], align 4 @@ -416,7 +416,7 @@ int bar(int n){ // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[A_ADDR]] to i8* -// CHECK3-NEXT: store i8 49, i8* [[CONV]], align 4 +// CHECK3-NEXT: store i8 49, i8* [[CONV]], align 1 // CHECK3-NEXT: ret void // // @@ -434,7 +434,7 @@ int bar(int n){ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -457,7 +457,7 @@ int bar(int n){ // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: store i16 1, i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 1, i16* [[CONV]], align 2 // CHECK3-NEXT: ret void // // @@ -475,7 +475,7 @@ int bar(int n){ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp index d0b57801530b..0cd88fb9b12c 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -18488,11 +18488,11 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[L_CASTED]], align 8 @@ -18536,7 +18536,7 @@ int bar(int n){ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* // CHECK1-NEXT: [[L2:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L2]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -18583,11 +18583,11 @@ int bar(int n){ // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[L_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[L_CASTED]], align 8 @@ -18650,8 +18650,8 @@ int bar(int n){ // CHECK1-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 // CHECK1-NEXT: br i1 [[TMP49]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK1-NEXT: store i32 [[TMP50]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP50]], i32* [[CONV1]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -18690,7 +18690,7 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -18756,7 +18756,7 @@ int bar(int n){ // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -18785,8 +18785,8 @@ int bar(int n){ // CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK1-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -18811,7 +18811,7 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -18849,7 +18849,7 @@ int bar(int n){ // CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -18897,7 +18897,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -18985,7 +18985,7 @@ int bar(int n){ // CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -19257,7 +19257,7 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[F_CASTED]], align 8 @@ -19326,7 +19326,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[F_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8 @@ -19445,7 +19445,7 @@ int bar(int n){ // CHECK1-NEXT: store i32 10, i32* [[K]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP11]], [[MUL9]] // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 @@ -19490,7 +19490,7 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -19532,9 +19532,9 @@ int bar(int n){ // CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -19592,7 +19592,7 @@ int bar(int n){ // CHECK1: omp.inner.for.body: // CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 // CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV15:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP19]], i32* [[CONV15]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -19684,9 +19684,9 @@ int bar(int n){ // CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -19809,7 +19809,7 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -19850,7 +19850,7 @@ int bar(int n){ // CHECK1-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -19898,7 +19898,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -19992,7 +19992,7 @@ int bar(int n){ // CHECK1-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -20082,11 +20082,11 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[L_CASTED]], align 8 @@ -20130,7 +20130,7 @@ int bar(int n){ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* // CHECK2-NEXT: [[L2:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) // CHECK2-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L2]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -20177,11 +20177,11 @@ int bar(int n){ // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[L_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[L_CASTED]], align 8 @@ -20244,8 +20244,8 @@ int bar(int n){ // CHECK2-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 // CHECK2-NEXT: br i1 [[TMP49]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK2: .omp.lastprivate.then: -// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK2-NEXT: store i32 [[TMP50]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK2-NEXT: store i32 [[TMP50]], i32* [[CONV1]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -20284,7 +20284,7 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -20350,7 +20350,7 @@ int bar(int n){ // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] // CHECK2-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -20379,8 +20379,8 @@ int bar(int n){ // CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 // CHECK2-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK2: .omp.lastprivate.then: -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK2-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -20405,7 +20405,7 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -20443,7 +20443,7 @@ int bar(int n){ // CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -20491,7 +20491,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -20579,7 +20579,7 @@ int bar(int n){ // CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -20851,7 +20851,7 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[F_CASTED]], align 8 @@ -20920,7 +20920,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[F_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8 @@ -21039,7 +21039,7 @@ int bar(int n){ // CHECK2-NEXT: store i32 10, i32* [[K]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP11]], [[MUL9]] // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 @@ -21084,7 +21084,7 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -21126,9 +21126,9 @@ int bar(int n){ // CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -21185,7 +21185,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV12:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP21]], i32* [[CONV12]], align 4 // CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -21277,9 +21277,9 @@ int bar(int n){ // CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -21398,7 +21398,7 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -21439,7 +21439,7 @@ int bar(int n){ // CHECK2-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -21487,7 +21487,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -21581,7 +21581,7 @@ int bar(int n){ // CHECK2-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp index 98cfc7aa0db5..fd65a077c5d8 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp @@ -50,11 +50,11 @@ int main(int argc, char **argv) { // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -96,7 +96,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -144,11 +144,11 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -242,7 +242,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -265,7 +265,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -948,11 +948,11 @@ int main(int argc, char **argv) { // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32* // CHECK4-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK4-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -994,7 +994,7 @@ int main(int argc, char **argv) { // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* // CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -1042,11 +1042,11 @@ int main(int argc, char **argv) { // CHECK4-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: [[CONV8:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32* // CHECK4-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK4-NEXT: [[TMP19:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK4-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK4-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK4-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -1140,7 +1140,7 @@ int main(int argc, char **argv) { // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* // CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -1163,7 +1163,7 @@ int main(int argc, char **argv) { // CHECK4-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK4-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp index 754f4ce08ca0..0339a6d21899 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -9401,11 +9401,11 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4:[0-9]+]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[L_CASTED]], align 8 @@ -9449,7 +9449,7 @@ int bar(int n){ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* // CHECK1-NEXT: [[L2:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L2]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -9496,11 +9496,11 @@ int bar(int n){ // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[L_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[L_CASTED]], align 8, !llvm.access.group !12 @@ -9575,8 +9575,8 @@ int bar(int n){ // CHECK1-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 // CHECK1-NEXT: br i1 [[TMP52]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK1-NEXT: store i32 [[TMP53]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP53]], i32* [[CONV1]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -9615,7 +9615,7 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -9681,7 +9681,7 @@ int bar(int n){ // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] // CHECK1-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !16 -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8, !llvm.access.group !16 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4, !llvm.access.group !16 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9722,8 +9722,8 @@ int bar(int n){ // CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 // CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK1-NEXT: store i32 [[TMP33]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[CONV1]], align 4 +// CHECK1-NEXT: store i32 [[TMP33]], i32* [[CONV1]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -9748,7 +9748,7 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -9786,7 +9786,7 @@ int bar(int n){ // CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -9834,7 +9834,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !19 @@ -9934,7 +9934,7 @@ int bar(int n){ // CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -10232,7 +10232,7 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[F_CASTED]], align 8 @@ -10301,7 +10301,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[F_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8, !llvm.access.group !31 @@ -10428,7 +10428,7 @@ int bar(int n){ // CHECK1-NEXT: store i32 10, i32* [[K]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4, !llvm.access.group !34 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP11]], [[MUL9]] // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4, !llvm.access.group !34 diff --git a/clang/test/OpenMP/nvptx_teams_codegen.cpp b/clang/test/OpenMP/nvptx_teams_codegen.cpp index cc7050a7cb07..52d94ab9b682 100644 --- a/clang/test/OpenMP/nvptx_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_codegen.cpp @@ -902,7 +902,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ARGC1:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 @@ -1069,7 +1069,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK3-NEXT: [[ARGC3:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) // CHECK3-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC3]] to i32* // CHECK3-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp index d0a19bdb4396..f584b2b4676c 100644 --- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -4520,10 +4520,10 @@ int bar(int n){ // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[C2:%.*]] = call i8* @__kmpc_alloc_shared(i64 1) // CHECK1-NEXT: store i8 [[TMP1]], i8* [[C2]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load float, float* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load float, float* [[CONV1]], align 4 // CHECK1-NEXT: [[D3:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) // CHECK1-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D3]] to float* // CHECK1-NEXT: store float [[TMP2]], float* [[D_ON_STACK]], align 4 @@ -5839,7 +5839,7 @@ int bar(int n){ // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[C2:%.*]] = call i8* @__kmpc_alloc_shared(i32 1) // CHECK2-NEXT: store i8 [[TMP1]], i8* [[C2]], align 1 // CHECK2-NEXT: [[TMP2:%.*]] = load float, float* [[CONV1]], align 4 @@ -7157,7 +7157,7 @@ int bar(int n){ // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[C2:%.*]] = call i8* @__kmpc_alloc_shared(i32 1) // CHECK3-NEXT: store i8 [[TMP1]], i8* [[C2]], align 1 // CHECK3-NEXT: [[TMP2:%.*]] = load float, float* [[CONV1]], align 4 diff --git a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp index 7542169b574b..a777faf5f03d 100644 --- a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp @@ -2300,14 +2300,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) // CHECK9-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S* nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* [[AGG_TMP6]]) // CHECK9-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 0 // CHECK9-NEXT: store i32 [[TMP7]], i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i64 0, i64 0 // CHECK9-NEXT: [[TMP8:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK9-NEXT: [[TMP9:%.*]] = bitcast %struct.S* [[VAR5]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 4, i1 false) -// CHECK9-NEXT: store i32 2, i32* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 2, i32* [[CONV1]], align 4 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 @@ -2379,7 +2379,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK9-NEXT: [[DOTT_VAR__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP1]], i64 4, i8* inttoptr (i64 1 to i8*)) // CHECK9-NEXT: [[DOTT_VAR__ADDR:%.*]] = bitcast i8* [[DOTT_VAR__VOID_ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTT_VAR__ADDR]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTT_VAR__ADDR]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTT_VAR__ADDR]] to i8* @@ -2521,9 +2521,9 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 // CHECK9-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK9-NEXT: store i32 [[INC]], i32* [[TMP5]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP7]], -1 -// CHECK9-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP3]], align 8 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK9-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP9]], 1 @@ -2999,14 +2999,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK10-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) // CHECK10-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S* nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* [[AGG_TMP6]]) // CHECK10-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 0 // CHECK10-NEXT: store i32 [[TMP7]], i32* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i64 0, i64 0 // CHECK10-NEXT: [[TMP8:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK10-NEXT: [[TMP9:%.*]] = bitcast %struct.S* [[VAR5]] to i8* // CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 4, i1 false) -// CHECK10-NEXT: store i32 2, i32* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 2, i32* [[CONV1]], align 4 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] // CHECK10-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 // CHECK10-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 @@ -3078,7 +3078,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK10-NEXT: [[DOTT_VAR__VOID_ADDR:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP1]], i64 4, i8* inttoptr (i64 1 to i8*)) // CHECK10-NEXT: [[DOTT_VAR__ADDR:%.*]] = bitcast i8* [[DOTT_VAR__VOID_ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTT_VAR__ADDR]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTT_VAR__ADDR]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTT_VAR__ADDR]] to i8* @@ -3220,9 +3220,9 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 // CHECK10-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK10-NEXT: store i32 [[INC]], i32* [[TMP5]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP7]], -1 -// CHECK10-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP8:%.*]] = load i32*, i32** [[_TMP3]], align 8 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK10-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP9]], 1 @@ -3763,9 +3763,9 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK11-NEXT: store i32 [[INC]], i32* [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK11-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 -// CHECK11-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 8 +// CHECK11-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4 // CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[_TMP3]], align 8 // CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 @@ -3791,7 +3791,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK11-NEXT: [[TMP1:%.*]] = load volatile i32, i32* [[TMP0]], align 128 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[G1]], align 128 // CHECK11-NEXT: store i32 1, i32* [[G1]], align 128 -// CHECK11-NEXT: store i32 2, i32* [[CONV]], align 8 +// CHECK11-NEXT: store i32 2, i32* [[CONV]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 // CHECK11-NEXT: store i32* [[G1]], i32** [[TMP2]], align 8 // CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 @@ -3879,7 +3879,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK12-NEXT: [[TMP1:%.*]] = load volatile i32, i32* [[TMP0]], align 128 // CHECK12-NEXT: store i32 [[TMP1]], i32* [[G1]], align 128 // CHECK12-NEXT: store i32 1, i32* [[G1]], align 128 -// CHECK12-NEXT: store i32 2, i32* [[CONV]], align 8 +// CHECK12-NEXT: store i32 2, i32* [[CONV]], align 4 // CHECK12-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>* [[BLOCK]], i32 0, i32 0 // CHECK12-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA]], align 128 // CHECK12-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>* [[BLOCK]], i32 0, i32 1 @@ -3894,7 +3894,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK12-NEXT: [[TMP2:%.*]] = load volatile i32, i32* [[G1]], align 128 // CHECK12-NEXT: store volatile i32 [[TMP2]], i32* [[BLOCK_CAPTURED]], align 128 // CHECK12-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>* [[BLOCK]], i32 0, i32 5 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK12-NEXT: store i32 [[TMP3]], i32* [[BLOCK_CAPTURED2]], align 32 // CHECK12-NEXT: [[TMP4:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32, [92 x i8], i32 }>* [[BLOCK]] to void ()* // CHECK12-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP4]] to %struct.__block_literal_generic* @@ -4029,7 +4029,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK12-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK12-NEXT: store i32* [[TMP5]], i32** [[BLOCK_CAPTURED]], align 8 // CHECK12-NEXT: [[BLOCK_CAPTURED7:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 8 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK12-NEXT: store i32 [[TMP6]], i32* [[BLOCK_CAPTURED7]], align 8 // CHECK12-NEXT: [[BLOCK_CAPTURED8:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %struct.SS*, i32*, i32*, i32 }>* [[BLOCK]], i32 0, i32 7 // CHECK12-NEXT: [[TMP7:%.*]] = load i32*, i32** [[_TMP3]], align 8 @@ -4119,9 +4119,9 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK12-NEXT: [[INC:%.*]] = add nsw i32 [[TMP2]], 1 // CHECK12-NEXT: store i32 [[INC]], i32* [[TMP1]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK12-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP3]], -1 -// CHECK12-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 8 +// CHECK12-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4 // CHECK12-NEXT: [[TMP4:%.*]] = load i32*, i32** [[_TMP3]], align 8 // CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP5]], 1 diff --git a/clang/test/OpenMP/parallel_for_codegen.cpp b/clang/test/OpenMP/parallel_for_codegen.cpp index 7f17f8c1d855..a4270c6493b2 100644 --- a/clang/test/OpenMP/parallel_for_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_codegen.cpp @@ -298,7 +298,7 @@ void range_for_collapsed() { // CHECK1-NEXT: store i64 [[TMP3]], i64* [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i64 // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 @@ -1304,7 +1304,7 @@ void range_for_collapsed() { // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VLA1]], i64 [[IDXPROM]] // CHECK1-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD5:%.*]] = fadd float [[CONV4]], [[TMP14]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = sitofp i32 [[TMP15]] to float // CHECK1-NEXT: [[ADD7:%.*]] = fadd float [[ADD5]], [[CONV6]] // CHECK1-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP0]], align 8 @@ -1412,7 +1412,7 @@ void range_for_collapsed() { // CHECK2-NEXT: store i64 [[TMP3]], i64* [[DOTOMP_UB]], align 8 // CHECK2-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i64 // CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 @@ -2418,7 +2418,7 @@ void range_for_collapsed() { // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VLA1]], i64 [[IDXPROM]] // CHECK2-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD5:%.*]] = fadd float [[CONV4]], [[TMP14]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = sitofp i32 [[TMP15]] to float // CHECK2-NEXT: [[ADD7:%.*]] = fadd float [[ADD5]], [[CONV6]] // CHECK2-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP0]], align 8 @@ -2526,7 +2526,7 @@ void range_for_collapsed() { // CHECK3-NEXT: store i64 [[TMP3]], i64* [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i64 // CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 @@ -3532,7 +3532,7 @@ void range_for_collapsed() { // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VLA1]], i64 [[IDXPROM]] // CHECK3-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[ADD5:%.*]] = fadd float [[CONV4]], [[TMP14]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: [[CONV6:%.*]] = sitofp i32 [[TMP15]] to float // CHECK3-NEXT: [[ADD7:%.*]] = fadd float [[ADD5]], [[CONV6]] // CHECK3-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP0]], align 8 @@ -3640,7 +3640,7 @@ void range_for_collapsed() { // CHECK4-NEXT: store i64 [[TMP3]], i64* [[DOTOMP_UB]], align 8 // CHECK4-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i64 // CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 @@ -4646,7 +4646,7 @@ void range_for_collapsed() { // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VLA1]], i64 [[IDXPROM]] // CHECK4-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK4-NEXT: [[ADD5:%.*]] = fadd float [[CONV4]], [[TMP14]] -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: [[CONV6:%.*]] = sitofp i32 [[TMP15]] to float // CHECK4-NEXT: [[ADD7:%.*]] = fadd float [[ADD5]], [[CONV6]] // CHECK4-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP0]], align 8 @@ -4754,7 +4754,7 @@ void range_for_collapsed() { // CHECK5-NEXT: store i64 [[TMP3]], i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] // CHECK5-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8, !dbg [[DBG15]] // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG15]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 8, !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1, !dbg [[DBG14]] // CHECK5-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i64, !dbg [[DBG14]] // CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG14]] // CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4, !dbg [[DBG14]] @@ -5760,7 +5760,7 @@ void range_for_collapsed() { // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VLA1]], i64 [[IDXPROM]], !dbg [[DBG111]] // CHECK5-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG111]] // CHECK5-NEXT: [[ADD5:%.*]] = fadd float [[CONV4]], [[TMP14]], !dbg [[DBG111]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG111]] // CHECK5-NEXT: [[CONV6:%.*]] = sitofp i32 [[TMP15]] to float, !dbg [[DBG111]] // CHECK5-NEXT: [[ADD7:%.*]] = fadd float [[ADD5]], [[CONV6]], !dbg [[DBG111]] // CHECK5-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP0]], align 8, !dbg [[DBG111]] @@ -5868,7 +5868,7 @@ void range_for_collapsed() { // CHECK6-NEXT: store i64 [[TMP3]], i64* [[DOTOMP_UB]], align 8 // CHECK6-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK6-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i64 // CHECK6-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 @@ -6872,7 +6872,7 @@ void range_for_collapsed() { // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VLA1]], i64 [[IDXPROM]] // CHECK6-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK6-NEXT: [[ADD5:%.*]] = fadd float [[CONV4]], [[TMP14]] -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV6:%.*]] = sitofp i32 [[TMP15]] to float // CHECK6-NEXT: [[ADD7:%.*]] = fadd float [[ADD5]], [[CONV6]] // CHECK6-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP0]], align 8 diff --git a/clang/test/OpenMP/parallel_master_codegen.cpp b/clang/test/OpenMP/parallel_master_codegen.cpp index 46df40533cfc..212fc8a6cc07 100644 --- a/clang/test/OpenMP/parallel_master_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_codegen.cpp @@ -559,9 +559,9 @@ void parallel_master_allocate() { // CHECK13-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK13-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK13: omp_if.then: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[INC]], i32* [[CONV]], align 8 +// CHECK13-NEXT: store i32 [[INC]], i32* [[CONV]], align 4 // CHECK13-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_IF_END]] // CHECK13: omp_if.end: @@ -597,9 +597,9 @@ void parallel_master_allocate() { // CHECK14-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK14-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK14: omp_if.then: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[INC]], i32* [[CONV]], align 8 +// CHECK14-NEXT: store i32 [[INC]], i32* [[CONV]], align 4 // CHECK14-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_IF_END]] // CHECK14: omp_if.end: @@ -658,9 +658,9 @@ void parallel_master_allocate() { // CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[B]], align 4 // CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK17-NEXT: store i32 [[ADD2]], i32* [[B]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK17-NEXT: store i32 [[INC]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[INC]], i32* [[CONV]], align 4 // CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* @_ZN2St1yE, align 4 // CHECK17-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK17-NEXT: store i32 [[INC3]], i32* @_ZN2St1yE, align 4 @@ -754,9 +754,9 @@ void parallel_master_allocate() { // CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[B]], align 4 // CHECK18-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK18-NEXT: store i32 [[ADD2]], i32* [[B]], align 4 -// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1 -// CHECK18-NEXT: store i32 [[INC]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[INC]], i32* [[CONV]], align 4 // CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* @_ZN2St1yE, align 4 // CHECK18-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK18-NEXT: store i32 [[INC3]], i32* @_ZN2St1yE, align 4 @@ -827,9 +827,9 @@ void parallel_master_allocate() { // CHECK21-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK21-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK21: omp_if.then: -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK21-NEXT: store i32 [[INC]], i32* [[CONV]], align 8 +// CHECK21-NEXT: store i32 [[INC]], i32* [[CONV]], align 4 // CHECK21-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: @@ -865,9 +865,9 @@ void parallel_master_allocate() { // CHECK22-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK22-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK22: omp_if.then: -// CHECK22-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK22-NEXT: store i32 [[INC]], i32* [[CONV]], align 8 +// CHECK22-NEXT: store i32 [[INC]], i32* [[CONV]], align 4 // CHECK22-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK22-NEXT: br label [[OMP_IF_END]] // CHECK22: omp_if.end: diff --git a/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp index b0b1dfee0213..4fd6b958fd28 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp @@ -139,7 +139,7 @@ struct S { // CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: // CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates* // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 0 @@ -258,7 +258,7 @@ struct S { // CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..4 to i32 (i32, i8*)*)) // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates.1* // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP6]], i32 0, i32 0 @@ -392,7 +392,7 @@ struct S { // CHECK1-NEXT: store i32* [[TMP0]], i32** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK1-NEXT: store i8*** [[TMP1]], i8**** [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_5]], align 4 @@ -430,7 +430,7 @@ struct S { // CHECK1-NEXT: [[TMP23:%.*]] = load i8*, i8** [[TMP22]], align 8 // CHECK1-NEXT: [[TMP24:%.*]] = bitcast %struct.anon.2* [[AGG_CAPTURED]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP23]], i8* align 8 [[TMP24]], i64 16, i1 false) -// CHECK1-NEXT: [[TMP25:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP25]] to i1 // CHECK1-NEXT: [[TMP26:%.*]] = sext i1 [[TOBOOL]] to i32 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP21]], i32 0, i32 5 @@ -824,7 +824,7 @@ struct S { // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK1-NEXT: store i32* [[TMP1]], i32** [[TMP7]], align 8 // CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 // CHECK1-NEXT: store i32* [[TMP]], i32** [[_TMP1]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP1]], align 4 @@ -1065,7 +1065,7 @@ struct S { // CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: // CHECK2-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates* // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 0 @@ -1184,7 +1184,7 @@ struct S { // CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..4 to i32 (i32, i8*)*)) // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates.1* // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP6]], i32 0, i32 0 @@ -1318,7 +1318,7 @@ struct S { // CHECK2-NEXT: store i32* [[TMP0]], i32** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK2-NEXT: store i8*** [[TMP1]], i8**** [[TMP7]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK2-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_5]], align 4 @@ -1356,7 +1356,7 @@ struct S { // CHECK2-NEXT: [[TMP23:%.*]] = load i8*, i8** [[TMP22]], align 8 // CHECK2-NEXT: [[TMP24:%.*]] = bitcast %struct.anon.2* [[AGG_CAPTURED]] to i8* // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP23]], i8* align 8 [[TMP24]], i64 16, i1 false) -// CHECK2-NEXT: [[TMP25:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP25]] to i1 // CHECK2-NEXT: [[TMP26:%.*]] = sext i1 [[TOBOOL]] to i32 // CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP21]], i32 0, i32 5 @@ -1750,7 +1750,7 @@ struct S { // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK2-NEXT: store i32* [[TMP1]], i32** [[TMP7]], align 8 // CHECK2-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK2-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 // CHECK2-NEXT: store i32* [[TMP]], i32** [[_TMP1]], align 8 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP1]], align 4 diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp index 2b3752cf01cd..0c20823ba68f 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp @@ -141,7 +141,7 @@ struct S { // CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: // CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates* // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 0 @@ -260,7 +260,7 @@ struct S { // CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..4 to i32 (i32, i8*)*)) // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates.1* // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP6]], i32 0, i32 0 @@ -399,7 +399,7 @@ struct S { // CHECK1-NEXT: store i32* [[TMP1]], i32** [[TMP8]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 2 // CHECK1-NEXT: store i8*** [[TMP2]], i8**** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_5]], align 4 @@ -438,7 +438,7 @@ struct S { // CHECK1-NEXT: [[TMP26:%.*]] = bitcast %struct.anon.2* [[AGG_CAPTURED]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP25]], i8* align 8 [[TMP26]], i64 24, i1 false) // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], %struct.kmp_task_t_with_privates.3* [[TMP22]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP28:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP28]] to i1 // CHECK1-NEXT: [[TMP29:%.*]] = sext i1 [[TOBOOL]] to i32 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP23]], i32 0, i32 5 @@ -746,7 +746,7 @@ struct S { // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK1-NEXT: store i32* [[TMP1]], i32** [[TMP7]], align 8 // CHECK1-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 // CHECK1-NEXT: store i32* [[TMP]], i32** [[_TMP1]], align 8 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP1]], align 4 @@ -986,7 +986,7 @@ struct S { // CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: // CHECK2-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates* // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 0 @@ -1105,7 +1105,7 @@ struct S { // CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..4 to i32 (i32, i8*)*)) // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates.1* // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP6]], i32 0, i32 0 @@ -1244,7 +1244,7 @@ struct S { // CHECK2-NEXT: store i32* [[TMP1]], i32** [[TMP8]], align 8 // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 2 // CHECK2-NEXT: store i8*** [[TMP2]], i8**** [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK2-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTCAPTURE_EXPR_5]], align 4 @@ -1283,7 +1283,7 @@ struct S { // CHECK2-NEXT: [[TMP26:%.*]] = bitcast %struct.anon.2* [[AGG_CAPTURED]] to i8* // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP25]], i8* align 8 [[TMP26]], i64 24, i1 false) // CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], %struct.kmp_task_t_with_privates.3* [[TMP22]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP28:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP28]] to i1 // CHECK2-NEXT: [[TMP29:%.*]] = sext i1 [[TOBOOL]] to i32 // CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP23]], i32 0, i32 5 @@ -1571,7 +1571,7 @@ struct S { // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK2-NEXT: store i32* [[TMP1]], i32** [[TMP7]], align 8 // CHECK2-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK2-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 // CHECK2-NEXT: store i32* [[TMP]], i32** [[_TMP1]], align 8 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP1]], align 4 @@ -1831,7 +1831,7 @@ struct S { // CHECK3-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: // CHECK3-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates* // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 0 @@ -1950,7 +1950,7 @@ struct S { // CHECK3-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK3-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..4 to i32 (i32, i8*)*)) // CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates.1* // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP6]], i32 0, i32 0 @@ -2090,11 +2090,11 @@ struct S { // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 2 // CHECK3-NEXT: store i8*** [[TMP2]], i8**** [[TMP9]], align 8 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[TMP10]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK3-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_5]], align 4 @@ -2133,7 +2133,7 @@ struct S { // CHECK3-NEXT: [[TMP28:%.*]] = bitcast %struct.anon.2* [[AGG_CAPTURED]] to i8* // CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP27]], i8* align 8 [[TMP28]], i64 32, i1 false) // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], %struct.kmp_task_t_with_privates.3* [[TMP24]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP30:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP30:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL18:%.*]] = trunc i8 [[TMP30]] to i1 // CHECK3-NEXT: [[TMP31:%.*]] = sext i1 [[TOBOOL18]] to i32 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP25]], i32 0, i32 5 @@ -2491,7 +2491,7 @@ struct S { // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK3-NEXT: store i32* [[TMP1]], i32** [[TMP7]], align 8 // CHECK3-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 // CHECK3-NEXT: store i32* [[TMP]], i32** [[_TMP1]], align 8 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP1]], align 4 @@ -2731,7 +2731,7 @@ struct S { // CHECK4-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: // CHECK4-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) // CHECK4-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates* // CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP6]], i32 0, i32 0 @@ -2850,7 +2850,7 @@ struct S { // CHECK4-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK4-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.1*)* @.omp_task_entry..4 to i32 (i32, i8*)*)) // CHECK4-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct.kmp_task_t_with_privates.1* // CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_1:%.*]], %struct.kmp_task_t_with_privates.1* [[TMP6]], i32 0, i32 0 @@ -2990,11 +2990,11 @@ struct S { // CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 2 // CHECK4-NEXT: store i8*** [[TMP2]], i8**** [[TMP9]], align 8 // CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[AGG_CAPTURED]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK4-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[TMP10]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK4-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK4-NEXT: store i32 [[TMP13]], i32* [[DOTCAPTURE_EXPR_5]], align 4 @@ -3033,7 +3033,7 @@ struct S { // CHECK4-NEXT: [[TMP28:%.*]] = bitcast %struct.anon.2* [[AGG_CAPTURED]] to i8* // CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP27]], i8* align 8 [[TMP28]], i64 32, i1 false) // CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_3]], %struct.kmp_task_t_with_privates.3* [[TMP24]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP30:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP30:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL18:%.*]] = trunc i8 [[TMP30]] to i1 // CHECK4-NEXT: [[TMP31:%.*]] = sext i1 [[TOBOOL18]] to i32 // CHECK4-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP25]], i32 0, i32 5 @@ -3391,7 +3391,7 @@ struct S { // CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[AGG_CAPTURED]], i32 0, i32 1 // CHECK4-NEXT: store i32* [[TMP1]], i32** [[TMP7]], align 8 // CHECK4-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK4-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1 // CHECK4-NEXT: store i32* [[TMP]], i32** [[_TMP1]], align 8 // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP1]], align 4 diff --git a/clang/test/OpenMP/single_codegen.cpp b/clang/test/OpenMP/single_codegen.cpp index a590aac9125d..7cb75113c13a 100644 --- a/clang/test/OpenMP/single_codegen.cpp +++ b/clang/test/OpenMP/single_codegen.cpp @@ -1031,9 +1031,9 @@ void array_func(int n, int a[n], St s[2]) { // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK1-NEXT: store i32 [[INC]], i32* [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1 -// CHECK1-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP5]], align 8 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1 @@ -1867,9 +1867,9 @@ void array_func(int n, int a[n], St s[2]) { // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK2-NEXT: store i32 [[INC]], i32* [[TMP7]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1 -// CHECK2-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP5]], align 8 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1 @@ -3049,9 +3049,9 @@ void array_func(int n, int a[n], St s[2]) { // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK3-NEXT: store i32 [[INC]], i32* [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK3-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1 -// CHECK3-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 8 +// CHECK3-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP5]], align 8 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1 @@ -3873,9 +3873,9 @@ void array_func(int n, int a[n], St s[2]) { // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK4-NEXT: store i32 [[INC]], i32* [[TMP7]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK4-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1 -// CHECK4-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 8 +// CHECK4-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4 // CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP5]], align 8 // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1 @@ -5053,9 +5053,9 @@ void array_func(int n, int a[n], St s[2]) { // CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4, !dbg [[DBG168:![0-9]+]] // CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1, !dbg [[DBG168]] // CHECK5-NEXT: store i32 [[INC]], i32* [[TMP7]], align 4, !dbg [[DBG168]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 8, !dbg [[DBG169:![0-9]+]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV1]], align 4, !dbg [[DBG169:![0-9]+]] // CHECK5-NEXT: [[DEC:%.*]] = add nsw i32 [[TMP9]], -1, !dbg [[DBG169]] -// CHECK5-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 8, !dbg [[DBG169]] +// CHECK5-NEXT: store i32 [[DEC]], i32* [[CONV1]], align 4, !dbg [[DBG169]] // CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[_TMP5]], align 8, !dbg [[DBG167]] // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4, !dbg [[DBG170:![0-9]+]] // CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP11]], 1, !dbg [[DBG170]] diff --git a/clang/test/OpenMP/target_codegen_global_capture.cpp b/clang/test/OpenMP/target_codegen_global_capture.cpp index 371160354776..ef41980136b8 100644 --- a/clang/test/OpenMP/target_codegen_global_capture.cpp +++ b/clang/test/OpenMP/target_codegen_global_capture.cpp @@ -384,29 +384,29 @@ int tbar2(short a, short b, short c, short d){ // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[D_ADDR]] to i16* // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[GD_ADDR]] to double* // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[SD_ADDR]] to float* -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV9:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 8 +// CHECK1-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = load double, double* [[CONV1]], align 8 // CHECK1-NEXT: [[ADD11:%.*]] = fadd double [[TMP2]], 1.000000e+00 // CHECK1-NEXT: store double [[ADD11]], double* [[CONV1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 4 // CHECK1-NEXT: [[CONV12:%.*]] = fpext float [[TMP3]] to double // CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00 // CHECK1-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float -// CHECK1-NEXT: store float [[CONV14]], float* [[CONV2]], align 8 +// CHECK1-NEXT: store float [[CONV14]], float* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[CONV3]], align 8 // CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 2 // CHECK1-NEXT: [[CONV15:%.*]] = sext i16 [[TMP5]] to i32 // CHECK1-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK1-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] // CHECK1: land.lhs.true17: -// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 4 // CHECK1-NEXT: [[CONV18:%.*]] = fpext float [[TMP6]] to double // CHECK1-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] @@ -697,29 +697,29 @@ int tbar2(short a, short b, short c, short d){ // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[D_ADDR]] to i16* // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[GD_ADDR]] to double* // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[SD_ADDR]] to float* -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV9:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 8 +// CHECK1-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = load double, double* [[CONV1]], align 8 // CHECK1-NEXT: [[ADD11:%.*]] = fadd double [[TMP2]], 1.000000e+00 // CHECK1-NEXT: store double [[ADD11]], double* [[CONV1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 4 // CHECK1-NEXT: [[CONV12:%.*]] = fpext float [[TMP3]] to double // CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00 // CHECK1-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float -// CHECK1-NEXT: store float [[CONV14]], float* [[CONV2]], align 8 +// CHECK1-NEXT: store float [[CONV14]], float* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[CONV3]], align 8 // CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 2 // CHECK1-NEXT: [[CONV15:%.*]] = sext i16 [[TMP5]] to i32 // CHECK1-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK1-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] // CHECK1: land.lhs.true17: -// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 4 // CHECK1-NEXT: [[CONV18:%.*]] = fpext float [[TMP6]] to double // CHECK1-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] @@ -1029,29 +1029,29 @@ int tbar2(short a, short b, short c, short d){ // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[D_ADDR]] to i16* // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[GD_ADDR]] to double* // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[SD_ADDR]] to float* -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV9:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 8 +// CHECK1-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = load double, double* [[CONV1]], align 8 // CHECK1-NEXT: [[ADD11:%.*]] = fadd double [[TMP2]], 1.000000e+00 // CHECK1-NEXT: store double [[ADD11]], double* [[CONV1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 4 // CHECK1-NEXT: [[CONV12:%.*]] = fpext float [[TMP3]] to double // CHECK1-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00 // CHECK1-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float -// CHECK1-NEXT: store float [[CONV14]], float* [[CONV2]], align 8 +// CHECK1-NEXT: store float [[CONV14]], float* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[CONV3]], align 8 // CHECK1-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 2 // CHECK1-NEXT: [[CONV15:%.*]] = sext i16 [[TMP5]] to i32 // CHECK1-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK1-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] // CHECK1: land.lhs.true17: -// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 4 // CHECK1-NEXT: [[CONV18:%.*]] = fpext float [[TMP6]] to double // CHECK1-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00 // CHECK1-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] @@ -1326,29 +1326,29 @@ int tbar2(short a, short b, short c, short d){ // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[D_ADDR]] to i16* // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i64* [[GD_ADDR]] to double* // CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[SD_ADDR]] to float* -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV9:%.*]] = sext i16 [[TMP1]] to i32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK2-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK2-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 8 +// CHECK2-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK2-NEXT: [[TMP2:%.*]] = load double, double* [[CONV1]], align 8 // CHECK2-NEXT: [[ADD11:%.*]] = fadd double [[TMP2]], 1.000000e+00 // CHECK2-NEXT: store double [[ADD11]], double* [[CONV1]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 4 // CHECK2-NEXT: [[CONV12:%.*]] = fpext float [[TMP3]] to double // CHECK2-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00 // CHECK2-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float -// CHECK2-NEXT: store float [[CONV14]], float* [[CONV2]], align 8 +// CHECK2-NEXT: store float [[CONV14]], float* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load double, double* [[CONV3]], align 8 // CHECK2-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00 // CHECK2-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: land.lhs.true: -// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 2 // CHECK2-NEXT: [[CONV15:%.*]] = sext i16 [[TMP5]] to i32 // CHECK2-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK2-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] // CHECK2: land.lhs.true17: -// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 4 // CHECK2-NEXT: [[CONV18:%.*]] = fpext float [[TMP6]] to double // CHECK2-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00 // CHECK2-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] @@ -1639,29 +1639,29 @@ int tbar2(short a, short b, short c, short d){ // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[D_ADDR]] to i16* // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i64* [[GD_ADDR]] to double* // CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[SD_ADDR]] to float* -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV9:%.*]] = sext i16 [[TMP1]] to i32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK2-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK2-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 8 +// CHECK2-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK2-NEXT: [[TMP2:%.*]] = load double, double* [[CONV1]], align 8 // CHECK2-NEXT: [[ADD11:%.*]] = fadd double [[TMP2]], 1.000000e+00 // CHECK2-NEXT: store double [[ADD11]], double* [[CONV1]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 4 // CHECK2-NEXT: [[CONV12:%.*]] = fpext float [[TMP3]] to double // CHECK2-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00 // CHECK2-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float -// CHECK2-NEXT: store float [[CONV14]], float* [[CONV2]], align 8 +// CHECK2-NEXT: store float [[CONV14]], float* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load double, double* [[CONV3]], align 8 // CHECK2-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00 // CHECK2-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: land.lhs.true: -// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 2 // CHECK2-NEXT: [[CONV15:%.*]] = sext i16 [[TMP5]] to i32 // CHECK2-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK2-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] // CHECK2: land.lhs.true17: -// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 4 // CHECK2-NEXT: [[CONV18:%.*]] = fpext float [[TMP6]] to double // CHECK2-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00 // CHECK2-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] @@ -1971,29 +1971,29 @@ int tbar2(short a, short b, short c, short d){ // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[D_ADDR]] to i16* // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i64* [[GD_ADDR]] to double* // CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[SD_ADDR]] to float* -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV9:%.*]] = sext i16 [[TMP1]] to i32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK2-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK2-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 8 +// CHECK2-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK2-NEXT: [[TMP2:%.*]] = load double, double* [[CONV1]], align 8 // CHECK2-NEXT: [[ADD11:%.*]] = fadd double [[TMP2]], 1.000000e+00 // CHECK2-NEXT: store double [[ADD11]], double* [[CONV1]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[CONV2]], align 4 // CHECK2-NEXT: [[CONV12:%.*]] = fpext float [[TMP3]] to double // CHECK2-NEXT: [[ADD13:%.*]] = fadd double [[CONV12]], 1.000000e+00 // CHECK2-NEXT: [[CONV14:%.*]] = fptrunc double [[ADD13]] to float -// CHECK2-NEXT: store float [[CONV14]], float* [[CONV2]], align 8 +// CHECK2-NEXT: store float [[CONV14]], float* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load double, double* [[CONV3]], align 8 // CHECK2-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP4]], 0.000000e+00 // CHECK2-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: land.lhs.true: -// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV4]], align 2 // CHECK2-NEXT: [[CONV15:%.*]] = sext i16 [[TMP5]] to i32 // CHECK2-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK2-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] // CHECK2: land.lhs.true17: -// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[CONV5]], align 4 // CHECK2-NEXT: [[CONV18:%.*]] = fpext float [[TMP6]] to double // CHECK2-NEXT: [[CMP19:%.*]] = fcmp ogt double [[CONV18]], 0.000000e+00 // CHECK2-NEXT: br i1 [[CMP19]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE]] @@ -2262,11 +2262,11 @@ int tbar2(short a, short b, short c, short d){ // CHECK3-NEXT: store double [[TMP5]], double* [[GC7]], align 8 // CHECK3-NEXT: [[TMP6:%.*]] = load double, double* [[TMP3]], align 8 // CHECK3-NEXT: store double [[TMP6]], double* [[GD8]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV9:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK3-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK3-NEXT: [[TMP8:%.*]] = load double, double* [[GB6]], align 8 // CHECK3-NEXT: [[ADD11:%.*]] = fadd double [[TMP8]], 1.000000e+00 // CHECK3-NEXT: store double [[ADD11]], double* [[GB6]], align 8 @@ -2279,7 +2279,7 @@ int tbar2(short a, short b, short c, short d){ // CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK3-NEXT: [[CONV15:%.*]] = sext i16 [[TMP11]] to i32 // CHECK3-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK3-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] @@ -2569,11 +2569,11 @@ int tbar2(short a, short b, short c, short d){ // CHECK3-NEXT: store double [[TMP5]], double* [[GC7]], align 8 // CHECK3-NEXT: [[TMP6:%.*]] = load double, double* [[TMP3]], align 8 // CHECK3-NEXT: store double [[TMP6]], double* [[GD8]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV9:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK3-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK3-NEXT: [[TMP8:%.*]] = load double, double* [[GB6]], align 8 // CHECK3-NEXT: [[ADD11:%.*]] = fadd double [[TMP8]], 1.000000e+00 // CHECK3-NEXT: store double [[ADD11]], double* [[GB6]], align 8 @@ -2586,7 +2586,7 @@ int tbar2(short a, short b, short c, short d){ // CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK3-NEXT: [[CONV15:%.*]] = sext i16 [[TMP11]] to i32 // CHECK3-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK3-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] @@ -2895,11 +2895,11 @@ int tbar2(short a, short b, short c, short d){ // CHECK3-NEXT: store double [[TMP5]], double* [[GC7]], align 8 // CHECK3-NEXT: [[TMP6:%.*]] = load double, double* [[TMP3]], align 8 // CHECK3-NEXT: store double [[TMP6]], double* [[GD8]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV9:%.*]] = sext i16 [[TMP7]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK3-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK3-NEXT: [[TMP8:%.*]] = load double, double* [[GB6]], align 8 // CHECK3-NEXT: [[ADD11:%.*]] = fadd double [[TMP8]], 1.000000e+00 // CHECK3-NEXT: store double [[ADD11]], double* [[GB6]], align 8 @@ -2912,7 +2912,7 @@ int tbar2(short a, short b, short c, short d){ // CHECK3-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00 // CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK3-NEXT: [[CONV15:%.*]] = sext i16 [[TMP11]] to i32 // CHECK3-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK3-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] @@ -3186,11 +3186,11 @@ int tbar2(short a, short b, short c, short d){ // CHECK4-NEXT: store double [[TMP5]], double* [[GC7]], align 8 // CHECK4-NEXT: [[TMP6:%.*]] = load double, double* [[TMP3]], align 8 // CHECK4-NEXT: store double [[TMP6]], double* [[GD8]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV9:%.*]] = sext i16 [[TMP7]] to i32 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK4-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK4-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK4-NEXT: [[TMP8:%.*]] = load double, double* [[GB6]], align 8 // CHECK4-NEXT: [[ADD11:%.*]] = fadd double [[TMP8]], 1.000000e+00 // CHECK4-NEXT: store double [[ADD11]], double* [[GB6]], align 8 @@ -3203,7 +3203,7 @@ int tbar2(short a, short b, short c, short d){ // CHECK4-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00 // CHECK4-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: land.lhs.true: -// CHECK4-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK4-NEXT: [[CONV15:%.*]] = sext i16 [[TMP11]] to i32 // CHECK4-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK4-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] @@ -3493,11 +3493,11 @@ int tbar2(short a, short b, short c, short d){ // CHECK4-NEXT: store double [[TMP5]], double* [[GC7]], align 8 // CHECK4-NEXT: [[TMP6:%.*]] = load double, double* [[TMP3]], align 8 // CHECK4-NEXT: store double [[TMP6]], double* [[GD8]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV9:%.*]] = sext i16 [[TMP7]] to i32 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK4-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK4-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK4-NEXT: [[TMP8:%.*]] = load double, double* [[GB6]], align 8 // CHECK4-NEXT: [[ADD11:%.*]] = fadd double [[TMP8]], 1.000000e+00 // CHECK4-NEXT: store double [[ADD11]], double* [[GB6]], align 8 @@ -3510,7 +3510,7 @@ int tbar2(short a, short b, short c, short d){ // CHECK4-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00 // CHECK4-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: land.lhs.true: -// CHECK4-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK4-NEXT: [[CONV15:%.*]] = sext i16 [[TMP11]] to i32 // CHECK4-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK4-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] @@ -3819,11 +3819,11 @@ int tbar2(short a, short b, short c, short d){ // CHECK4-NEXT: store double [[TMP5]], double* [[GC7]], align 8 // CHECK4-NEXT: [[TMP6:%.*]] = load double, double* [[TMP3]], align 8 // CHECK4-NEXT: store double [[TMP6]], double* [[GD8]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV9:%.*]] = sext i16 [[TMP7]] to i32 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV9]], 1 // CHECK4-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK4-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV10]], i16* [[CONV]], align 2 // CHECK4-NEXT: [[TMP8:%.*]] = load double, double* [[GB6]], align 8 // CHECK4-NEXT: [[ADD11:%.*]] = fadd double [[TMP8]], 1.000000e+00 // CHECK4-NEXT: store double [[ADD11]], double* [[GB6]], align 8 @@ -3836,7 +3836,7 @@ int tbar2(short a, short b, short c, short d){ // CHECK4-NEXT: [[CMP:%.*]] = fcmp ogt double [[TMP10]], 0.000000e+00 // CHECK4-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: land.lhs.true: -// CHECK4-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK4-NEXT: [[CONV15:%.*]] = sext i16 [[TMP11]] to i32 // CHECK4-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[CONV15]], 0 // CHECK4-NEXT: br i1 [[CMP16]], label [[LAND_LHS_TRUE17:%.*]], label [[OMP_IF_ELSE]] diff --git a/clang/test/OpenMP/target_parallel_codegen.cpp b/clang/test/OpenMP/target_parallel_codegen.cpp index 012198a5ee7f..bb478963849a 100644 --- a/clang/test/OpenMP/target_parallel_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_codegen.cpp @@ -594,7 +594,7 @@ int bar(int n){ // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -612,9 +612,9 @@ int bar(int n){ // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: ret void // // @@ -625,7 +625,7 @@ int bar(int n){ // CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -643,11 +643,11 @@ int bar(int n){ // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK1-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -671,11 +671,11 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -696,14 +696,14 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK1-NEXT: ret void // // @@ -738,7 +738,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -780,9 +780,9 @@ int bar(int n){ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -1137,7 +1137,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -1167,7 +1167,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -1202,15 +1202,15 @@ int bar(int n){ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK1-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -1237,19 +1237,19 @@ int bar(int n){ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK1-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK1-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK1-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -1271,11 +1271,11 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1299,14 +1299,14 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -1624,7 +1624,7 @@ int bar(int n){ // CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -1642,9 +1642,9 @@ int bar(int n){ // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK2-NEXT: ret void // // @@ -1655,7 +1655,7 @@ int bar(int n){ // CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1673,11 +1673,11 @@ int bar(int n){ // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK2-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK2-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -1701,11 +1701,11 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1726,14 +1726,14 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK2-NEXT: ret void // // @@ -1768,7 +1768,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -1810,9 +1810,9 @@ int bar(int n){ // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -2167,7 +2167,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2197,7 +2197,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -2232,15 +2232,15 @@ int bar(int n){ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK2-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -2267,19 +2267,19 @@ int bar(int n){ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK2-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK2-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK2-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK2-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK2-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK2-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK2-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -2301,11 +2301,11 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2329,14 +2329,14 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -2679,7 +2679,7 @@ int bar(int n){ // CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -2697,11 +2697,11 @@ int bar(int n){ // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -2727,7 +2727,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -2750,11 +2750,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK3-NEXT: ret void // // @@ -3246,11 +3246,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK3-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -3279,16 +3279,16 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK3-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK3-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK3-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK3-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -3312,7 +3312,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -3338,11 +3338,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -3685,7 +3685,7 @@ int bar(int n){ // CHECK4-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -3703,11 +3703,11 @@ int bar(int n){ // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK4-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK4-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK4-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -3733,7 +3733,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -3756,11 +3756,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK4-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK4-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK4-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK4-NEXT: ret void // // @@ -4252,11 +4252,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK4-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -4285,16 +4285,16 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK4-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK4-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK4-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK4-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK4-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK4-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK4-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK4-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK4-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -4318,7 +4318,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4344,11 +4344,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK4-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK4-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK4-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -4387,7 +4387,7 @@ int bar(int n){ // CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -4405,11 +4405,11 @@ int bar(int n){ // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK9-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK9-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK9-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -4433,11 +4433,11 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -4458,14 +4458,14 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK9-NEXT: ret void // // @@ -4500,7 +4500,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -4542,9 +4542,9 @@ int bar(int n){ // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK9-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -4599,15 +4599,15 @@ int bar(int n){ // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK9-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -4634,19 +4634,19 @@ int bar(int n){ // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK9-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK9-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK9-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK9-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK9-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK9-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK9-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK9-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -4673,7 +4673,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4703,7 +4703,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -4734,11 +4734,11 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -4762,14 +4762,14 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -4801,7 +4801,7 @@ int bar(int n){ // CHECK10-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -4819,11 +4819,11 @@ int bar(int n){ // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK10-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK10-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK10-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK10-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -4847,11 +4847,11 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -4872,14 +4872,14 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK10-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK10-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK10-NEXT: ret void // // @@ -4914,7 +4914,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK10-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK10-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -4956,9 +4956,9 @@ int bar(int n){ // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK10-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK10-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK10-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -5013,15 +5013,15 @@ int bar(int n){ // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK10-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -5048,19 +5048,19 @@ int bar(int n){ // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK10-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK10-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK10-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK10-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK10-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK10-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK10-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK10-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK10-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -5087,7 +5087,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -5117,7 +5117,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK10-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK10-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -5148,11 +5148,11 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -5176,14 +5176,14 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK10-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK10-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -5215,7 +5215,7 @@ int bar(int n){ // CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5233,11 +5233,11 @@ int bar(int n){ // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK11-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -5263,7 +5263,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5286,11 +5286,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK11-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK11-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK11-NEXT: ret void // // @@ -5423,11 +5423,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK11-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -5456,16 +5456,16 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK11-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK11-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK11-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK11-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK11-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK11-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK11-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK11-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -5552,7 +5552,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5578,11 +5578,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK11-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK11-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -5614,7 +5614,7 @@ int bar(int n){ // CHECK12-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK12-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5632,11 +5632,11 @@ int bar(int n){ // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK12-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK12-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK12-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK12-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK12-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -5662,7 +5662,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5685,11 +5685,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK12-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK12-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK12-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK12-NEXT: ret void // // @@ -5822,11 +5822,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK12-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK12-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -5855,16 +5855,16 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK12-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK12-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK12-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK12-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK12-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK12-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK12-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK12-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK12-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -5951,7 +5951,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5977,11 +5977,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK12-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK12-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK12-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -6292,7 +6292,7 @@ int bar(int n){ // CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -6310,9 +6310,9 @@ int bar(int n){ // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK17-NEXT: ret void // // @@ -6323,7 +6323,7 @@ int bar(int n){ // CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -6341,11 +6341,11 @@ int bar(int n){ // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK17-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK17-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK17-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -6369,11 +6369,11 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -6394,14 +6394,14 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK17-NEXT: ret void // // @@ -6436,7 +6436,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK17-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -6478,9 +6478,9 @@ int bar(int n){ // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK17-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -6835,7 +6835,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -6865,7 +6865,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -6900,15 +6900,15 @@ int bar(int n){ // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK17-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -6935,19 +6935,19 @@ int bar(int n){ // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK17-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK17-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK17-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK17-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK17-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK17-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK17-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK17-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -6969,11 +6969,11 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -6997,14 +6997,14 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -7322,7 +7322,7 @@ int bar(int n){ // CHECK18-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK18-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -7340,9 +7340,9 @@ int bar(int n){ // CHECK18-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK18-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK18-NEXT: ret void // // @@ -7353,7 +7353,7 @@ int bar(int n){ // CHECK18-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7371,11 +7371,11 @@ int bar(int n){ // CHECK18-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK18-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK18-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK18-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -7399,11 +7399,11 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7424,14 +7424,14 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK18-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK18-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK18-NEXT: ret void // // @@ -7466,7 +7466,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK18-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK18-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK18-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -7508,9 +7508,9 @@ int bar(int n){ // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK18-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK18-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK18-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -7865,7 +7865,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK18-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -7895,7 +7895,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK18-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK18-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK18-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -7930,15 +7930,15 @@ int bar(int n){ // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK18-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK18-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK18-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -7965,19 +7965,19 @@ int bar(int n){ // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK18-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK18-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK18-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK18-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK18-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK18-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK18-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK18-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK18-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -7999,11 +7999,11 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8027,14 +8027,14 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK18-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK18-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -8377,7 +8377,7 @@ int bar(int n){ // CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -8395,11 +8395,11 @@ int bar(int n){ // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK19-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK19-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK19-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK19-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -8425,7 +8425,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -8448,11 +8448,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK19-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK19-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK19-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK19-NEXT: ret void // // @@ -8944,11 +8944,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK19-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK19-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -8977,16 +8977,16 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK19-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK19-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK19-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK19-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK19-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK19-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK19-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK19-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK19-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -9010,7 +9010,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9036,11 +9036,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK19-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK19-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK19-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -9383,7 +9383,7 @@ int bar(int n){ // CHECK20-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK20-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9401,11 +9401,11 @@ int bar(int n){ // CHECK20-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK20-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK20-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK20-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK20-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK20-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -9431,7 +9431,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9454,11 +9454,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK20-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK20-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK20-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK20-NEXT: ret void // // @@ -9950,11 +9950,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK20-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK20-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK20-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK20-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -9983,16 +9983,16 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK20-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK20-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK20-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK20-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK20-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK20-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK20-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK20-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK20-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK20-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK20-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -10016,7 +10016,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -10042,11 +10042,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK20-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK20-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK20-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK20-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -10085,7 +10085,7 @@ int bar(int n){ // CHECK25-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK25-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -10103,11 +10103,11 @@ int bar(int n){ // CHECK25-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK25-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK25-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK25-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK25-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK25-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK25-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK25-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -10131,11 +10131,11 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK25-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -10156,14 +10156,14 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK25-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK25-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK25-NEXT: ret void // // @@ -10198,7 +10198,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK25-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK25-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK25-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -10240,9 +10240,9 @@ int bar(int n){ // CHECK25-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK25-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK25-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK25-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK25-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -10297,15 +10297,15 @@ int bar(int n){ // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK25-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK25-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK25-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK25-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -10332,19 +10332,19 @@ int bar(int n){ // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK25-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK25-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK25-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK25-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK25-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK25-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK25-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK25-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK25-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK25-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK25-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -10371,7 +10371,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK25-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK25-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -10401,7 +10401,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK25-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK25-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK25-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -10432,11 +10432,11 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -10460,14 +10460,14 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK25-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK25-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK25-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -10499,7 +10499,7 @@ int bar(int n){ // CHECK26-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK26-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -10517,11 +10517,11 @@ int bar(int n){ // CHECK26-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK26-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK26-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK26-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK26-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK26-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK26-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK26-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -10545,11 +10545,11 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK26-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -10570,14 +10570,14 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK26-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK26-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK26-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK26-NEXT: ret void // // @@ -10612,7 +10612,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK26-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK26-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK26-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -10654,9 +10654,9 @@ int bar(int n){ // CHECK26-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK26-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK26-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK26-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK26-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -10711,15 +10711,15 @@ int bar(int n){ // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK26-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK26-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK26-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK26-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK26-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -10746,19 +10746,19 @@ int bar(int n){ // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK26-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK26-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK26-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK26-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK26-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK26-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK26-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK26-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK26-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK26-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK26-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -10785,7 +10785,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK26-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK26-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -10815,7 +10815,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK26-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK26-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK26-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -10846,11 +10846,11 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -10874,14 +10874,14 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK26-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK26-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK26-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK26-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -10913,7 +10913,7 @@ int bar(int n){ // CHECK27-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -10931,11 +10931,11 @@ int bar(int n){ // CHECK27-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK27-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK27-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK27-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK27-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK27-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -10961,7 +10961,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -10984,11 +10984,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK27-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK27-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK27-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK27-NEXT: ret void // // @@ -11121,11 +11121,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK27-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK27-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -11154,16 +11154,16 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK27-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK27-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK27-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK27-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK27-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK27-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK27-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK27-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK27-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK27-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK27-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -11250,7 +11250,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11276,11 +11276,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK27-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK27-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK27-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK27-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -11312,7 +11312,7 @@ int bar(int n){ // CHECK28-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK28-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11330,11 +11330,11 @@ int bar(int n){ // CHECK28-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK28-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK28-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK28-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK28-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 // CHECK28-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1) @@ -11360,7 +11360,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11383,11 +11383,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK28-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK28-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK28-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK28-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK28-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK28-NEXT: ret void // // @@ -11520,11 +11520,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK28-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK28-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK28-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK28-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK28-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -11553,16 +11553,16 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK28-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK28-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK28-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK28-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK28-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK28-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK28-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK28-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK28-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK28-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK28-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK28-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK28-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK28-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -11649,7 +11649,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11675,11 +11675,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK28-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK28-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK28-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK28-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK28-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK28-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 diff --git a/clang/test/OpenMP/target_parallel_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_debug_codegen.cpp index 0ac82cb0106c..f8cf931a431f 100644 --- a/clang/test/OpenMP/target_parallel_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_debug_codegen.cpp @@ -246,7 +246,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG114]] // CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG114]] // CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG114]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG114]] // CHECK1-NEXT: [[TMP7:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG114]] // CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG114]] // CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP5]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG114]] @@ -275,7 +275,7 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG123]] // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG123]] // CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG123]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG123]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG123]] // CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG123]] // CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG123]] // CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP3]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG123]] @@ -455,7 +455,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG191]] // CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG191]] // CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG191]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG191]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG191]] // CHECK1-NEXT: [[TMP7:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG191]] // CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG191]] // CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP5]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG191]] @@ -485,7 +485,7 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG198]] // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG198]] // CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG198]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG198]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG198]] // CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG198]] // CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG198]] // CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP3]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG198]] diff --git a/clang/test/OpenMP/target_parallel_for_codegen.cpp b/clang/test/OpenMP/target_parallel_for_codegen.cpp index 585ecf61cf35..6ee0d4a6a896 100644 --- a/clang/test/OpenMP/target_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_codegen.cpp @@ -735,7 +735,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -802,9 +802,9 @@ int bar(int n){ // CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] // CHECK1-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -844,15 +844,15 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -888,9 +888,9 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK1-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() // CHECK1-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -942,11 +942,11 @@ int bar(int n){ // CHECK1-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK1-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK1-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK1-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -964,9 +964,9 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK1-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 8 +// CHECK1-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: // CHECK1-NEXT: ret void @@ -1088,11 +1088,11 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1152,14 +1152,14 @@ int bar(int n){ // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK1-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK1-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK1-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1210,11 +1210,11 @@ int bar(int n){ // CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -1270,7 +1270,7 @@ int bar(int n){ // CHECK1-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -1306,9 +1306,9 @@ int bar(int n){ // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK1-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK1-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -1685,7 +1685,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -1753,7 +1753,7 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK1-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK1-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -1800,15 +1800,15 @@ int bar(int n){ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK1-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -1854,11 +1854,11 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1920,14 +1920,14 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK1-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -2373,7 +2373,7 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -2440,9 +2440,9 @@ int bar(int n){ // CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] // CHECK2-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !12 +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !12 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2482,15 +2482,15 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -2526,9 +2526,9 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK2-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() // CHECK2-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -2580,11 +2580,11 @@ int bar(int n){ // CHECK2-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK2-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK2-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK2-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK2-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK2-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2602,9 +2602,9 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK2: .omp.linear.pu: // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK2-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 8 +// CHECK2-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK2: .omp.linear.pu.done: // CHECK2-NEXT: ret void @@ -2726,11 +2726,11 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2790,14 +2790,14 @@ int bar(int n){ // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK2-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK2-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK2-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2848,11 +2848,11 @@ int bar(int n){ // CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -2908,7 +2908,7 @@ int bar(int n){ // CHECK2-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -2944,9 +2944,9 @@ int bar(int n){ // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK2-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK2-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -3323,7 +3323,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3391,7 +3391,7 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK2-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK2-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -3438,15 +3438,15 @@ int bar(int n){ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK2-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -3492,11 +3492,11 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -3558,14 +3558,14 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK2-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -4103,7 +4103,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4197,11 +4197,11 @@ int bar(int n){ // CHECK3-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK3-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK3-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK3-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4343,7 +4343,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4405,11 +4405,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK3-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK3-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5040,11 +5040,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK3-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -5091,7 +5091,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5155,11 +5155,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -5697,7 +5697,7 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5791,11 +5791,11 @@ int bar(int n){ // CHECK4-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK4-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK4-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK4-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK4-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK4-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5937,7 +5937,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5999,11 +5999,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK4-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK4-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6634,11 +6634,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK4-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -6685,7 +6685,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -6749,11 +6749,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK4-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK4-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -6879,15 +6879,15 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -6923,9 +6923,9 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK9-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK9-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -6977,11 +6977,11 @@ int bar(int n){ // CHECK9-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK9-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK9-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK9-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK9-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK9-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK9-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6999,9 +6999,9 @@ int bar(int n){ // CHECK9-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK9: .omp.linear.pu: // CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK9-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK9-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 8 +// CHECK9-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK9: .omp.linear.pu.done: // CHECK9-NEXT: ret void @@ -7024,11 +7024,11 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7088,14 +7088,14 @@ int bar(int n){ // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK9-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK9-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK9-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK9-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7146,11 +7146,11 @@ int bar(int n){ // CHECK9-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -7206,7 +7206,7 @@ int bar(int n){ // CHECK9-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -7242,9 +7242,9 @@ int bar(int n){ // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK9-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK9-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK9-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -7321,15 +7321,15 @@ int bar(int n){ // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK9-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -7380,7 +7380,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -7448,7 +7448,7 @@ int bar(int n){ // CHECK9-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK9-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK9-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -7491,11 +7491,11 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7557,14 +7557,14 @@ int bar(int n){ // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK9-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK9-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK9-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -7683,15 +7683,15 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -7727,9 +7727,9 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK10-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK10-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -7781,11 +7781,11 @@ int bar(int n){ // CHECK10-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK10-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK10-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK10-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK10-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK10-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK10-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK10-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7803,9 +7803,9 @@ int bar(int n){ // CHECK10-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK10: .omp.linear.pu: // CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK10-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK10-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 8 +// CHECK10-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 // CHECK10-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK10: .omp.linear.pu.done: // CHECK10-NEXT: ret void @@ -7828,11 +7828,11 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7892,14 +7892,14 @@ int bar(int n){ // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK10-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK10-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK10-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK10-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK10-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK10-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7950,11 +7950,11 @@ int bar(int n){ // CHECK10-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK10-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK10-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK10-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -8010,7 +8010,7 @@ int bar(int n){ // CHECK10-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -8046,9 +8046,9 @@ int bar(int n){ // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK10-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK10-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK10-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -8125,15 +8125,15 @@ int bar(int n){ // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK10-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -8184,7 +8184,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -8252,7 +8252,7 @@ int bar(int n){ // CHECK10-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK10-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK10-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK10-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK10-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -8295,11 +8295,11 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8361,14 +8361,14 @@ int bar(int n){ // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK10-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK10-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK10-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK10-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -8485,7 +8485,7 @@ int bar(int n){ // CHECK11-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -8579,11 +8579,11 @@ int bar(int n){ // CHECK11-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK11-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK11-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK11-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK11-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8628,7 +8628,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -8690,11 +8690,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK11-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK11-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8916,11 +8916,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK11-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -9080,7 +9080,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9144,11 +9144,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK11-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK11-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -9265,7 +9265,7 @@ int bar(int n){ // CHECK12-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK12-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9359,11 +9359,11 @@ int bar(int n){ // CHECK12-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK12-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK12-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK12-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK12-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK12-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9408,7 +9408,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9470,11 +9470,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK12-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK12-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK12-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK12-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9696,11 +9696,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK12-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK12-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -9860,7 +9860,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9924,11 +9924,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK12-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK12-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK12-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -10367,7 +10367,7 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -10434,9 +10434,9 @@ int bar(int n){ // CHECK17-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] // CHECK17-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !12 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !12 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !12 // CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK17-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !12 +// CHECK17-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !12 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10476,15 +10476,15 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -10520,9 +10520,9 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK17-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() // CHECK17-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -10574,11 +10574,11 @@ int bar(int n){ // CHECK17-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK17-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK17-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK17-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK17-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK17-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK17-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10596,9 +10596,9 @@ int bar(int n){ // CHECK17-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK17: .omp.linear.pu: // CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK17-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK17-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 8 +// CHECK17-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 // CHECK17-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK17: .omp.linear.pu.done: // CHECK17-NEXT: ret void @@ -10720,11 +10720,11 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -10784,14 +10784,14 @@ int bar(int n){ // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK17-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK17-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK17-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK17-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10842,11 +10842,11 @@ int bar(int n){ // CHECK17-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -10902,7 +10902,7 @@ int bar(int n){ // CHECK17-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -10938,9 +10938,9 @@ int bar(int n){ // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK17-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK17-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK17-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -11317,7 +11317,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -11385,7 +11385,7 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK17-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK17-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -11432,15 +11432,15 @@ int bar(int n){ // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK17-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -11486,11 +11486,11 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -11552,14 +11552,14 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK17-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK17-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -12005,7 +12005,7 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK18-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -12072,9 +12072,9 @@ int bar(int n){ // CHECK18-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] // CHECK18-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !12 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !12 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !12 // CHECK18-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK18-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !12 +// CHECK18-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !12 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12114,15 +12114,15 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -12158,9 +12158,9 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK18-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK18-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() // CHECK18-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -12212,11 +12212,11 @@ int bar(int n){ // CHECK18-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK18-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK18-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK18-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK18-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK18-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK18-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK18-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12234,9 +12234,9 @@ int bar(int n){ // CHECK18-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK18: .omp.linear.pu: // CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK18-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK18-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 8 +// CHECK18-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 // CHECK18-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK18: .omp.linear.pu.done: // CHECK18-NEXT: ret void @@ -12358,11 +12358,11 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -12422,14 +12422,14 @@ int bar(int n){ // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK18-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK18-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK18-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4 +// CHECK18-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK18-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK18-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK18-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12480,11 +12480,11 @@ int bar(int n){ // CHECK18-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK18-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK18-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK18-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -12540,7 +12540,7 @@ int bar(int n){ // CHECK18-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK18-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK18-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -12576,9 +12576,9 @@ int bar(int n){ // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK18-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK18-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK18-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -12955,7 +12955,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK18-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -13023,7 +13023,7 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK18-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK18-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK18-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK18-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -13070,15 +13070,15 @@ int bar(int n){ // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK18-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK18-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK18-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -13124,11 +13124,11 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -13190,14 +13190,14 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK18-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK18-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK18-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK18-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -13735,7 +13735,7 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -13829,11 +13829,11 @@ int bar(int n){ // CHECK19-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK19-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK19-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK19-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK19-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK19-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -13975,7 +13975,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14037,11 +14037,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK19-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK19-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK19-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -14672,11 +14672,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK19-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK19-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -14723,7 +14723,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14787,11 +14787,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -15329,7 +15329,7 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK20-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -15423,11 +15423,11 @@ int bar(int n){ // CHECK20-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK20-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK20-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK20-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK20-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK20-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK20-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -15569,7 +15569,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -15631,11 +15631,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK20-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK20-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK20-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK20-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -16266,11 +16266,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK20-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK20-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK20-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK20-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -16317,7 +16317,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -16381,11 +16381,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK20-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK20-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK20-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -16511,15 +16511,15 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK25-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK25-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK25-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -16555,9 +16555,9 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK25-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK25-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK25-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -16609,11 +16609,11 @@ int bar(int n){ // CHECK25-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK25-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK25-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK25-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK25-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK25-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK25-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK25-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK25-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -16631,9 +16631,9 @@ int bar(int n){ // CHECK25-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK25: .omp.linear.pu: // CHECK25-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK25-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 +// CHECK25-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 // CHECK25-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK25-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 8 +// CHECK25-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 // CHECK25-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK25: .omp.linear.pu.done: // CHECK25-NEXT: ret void @@ -16656,11 +16656,11 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK25-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -16720,14 +16720,14 @@ int bar(int n){ // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK25-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK25-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK25-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK25-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK25-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK25-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -16778,11 +16778,11 @@ int bar(int n){ // CHECK25-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK25-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK25-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK25-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK25-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK25-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -16838,7 +16838,7 @@ int bar(int n){ // CHECK25-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK25-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK25-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -16874,9 +16874,9 @@ int bar(int n){ // CHECK25-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK25-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK25-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK25-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK25-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK25-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -16953,15 +16953,15 @@ int bar(int n){ // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK25-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK25-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK25-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK25-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -17012,7 +17012,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK25-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK25-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -17080,7 +17080,7 @@ int bar(int n){ // CHECK25-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK25-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK25-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK25-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK25-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -17123,11 +17123,11 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -17189,14 +17189,14 @@ int bar(int n){ // CHECK25-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK25-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK25-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK25-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK25-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK25-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK25-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK25-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -17315,15 +17315,15 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK26-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK26-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK26-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK26-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -17359,9 +17359,9 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK26-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK26-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK26-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK26-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -17413,11 +17413,11 @@ int bar(int n){ // CHECK26-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK26-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK26-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK26-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK26-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK26-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK26-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK26-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK26-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2 // CHECK26-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK26: omp.body.continue: // CHECK26-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -17435,9 +17435,9 @@ int bar(int n){ // CHECK26-NEXT: br i1 [[TMP19]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK26: .omp.linear.pu: // CHECK26-NEXT: [[TMP20:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK26-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 +// CHECK26-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 4 // CHECK26-NEXT: [[TMP21:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK26-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 8 +// CHECK26-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 // CHECK26-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK26: .omp.linear.pu.done: // CHECK26-NEXT: ret void @@ -17460,11 +17460,11 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK26-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -17524,14 +17524,14 @@ int bar(int n){ // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK26-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK26-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK26-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK26-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK26-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK26-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2 // CHECK26-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK26: omp.body.continue: // CHECK26-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -17582,11 +17582,11 @@ int bar(int n){ // CHECK26-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK26-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK26-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK26-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK26-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK26-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK26-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -17642,7 +17642,7 @@ int bar(int n){ // CHECK26-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK26-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK26-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK26-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK26-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK26-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -17678,9 +17678,9 @@ int bar(int n){ // CHECK26-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK26-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK26-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK26-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK26-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK26-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -17757,15 +17757,15 @@ int bar(int n){ // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK26-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK26-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK26-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK26-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK26-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -17816,7 +17816,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK26-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK26-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -17884,7 +17884,7 @@ int bar(int n){ // CHECK26-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK26-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK26-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK26-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK26-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK26-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -17927,11 +17927,11 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -17993,14 +17993,14 @@ int bar(int n){ // CHECK26-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK26-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK26-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK26-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK26-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK26-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK26-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK26-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK26-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -18117,7 +18117,7 @@ int bar(int n){ // CHECK27-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK27-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -18211,11 +18211,11 @@ int bar(int n){ // CHECK27-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK27-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK27-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK27-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK27-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK27-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK27-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -18260,7 +18260,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -18322,11 +18322,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK27-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK27-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK27-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK27-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -18548,11 +18548,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK27-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK27-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -18712,7 +18712,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -18776,11 +18776,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK27-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK27-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK27-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK27-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -18897,7 +18897,7 @@ int bar(int n){ // CHECK28-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK28-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -18991,11 +18991,11 @@ int bar(int n){ // CHECK28-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK28-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK28-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK28-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK28-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK28-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK28-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 // CHECK28-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK28: omp.body.continue: // CHECK28-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -19040,7 +19040,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19102,11 +19102,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK28-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK28-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK28-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK28-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK28-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2 // CHECK28-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK28: omp.body.continue: // CHECK28-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -19328,11 +19328,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK28-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK28-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK28-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK28-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK28-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -19492,7 +19492,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19556,11 +19556,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK28-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK28-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK28-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK28-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK28-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK28-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK28-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK28-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 diff --git a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp index 7f9ba2c9e773..b5a704c8f6d2 100644 --- a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp @@ -316,7 +316,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG122]] // CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG122]] // CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG122]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG122]] // CHECK1-NEXT: [[TMP7:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG122]] // CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG122]] // CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP5]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG122]] @@ -349,10 +349,10 @@ int main() { // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG132]] // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8*, !dbg [[DBG132]] // CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG132]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG132]] // CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG132]] // CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG132]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 8, !dbg [[DBG132]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1, !dbg [[DBG132]] // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1, !dbg [[DBG132]] // CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP3]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG132]] // CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast i8* [[TMP6]] to i8 addrspace(1)*, !dbg [[DBG132]] @@ -604,7 +604,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG213]] // CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG213]] // CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG213]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG213]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG213]] // CHECK1-NEXT: [[TMP7:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG213]] // CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG213]] // CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP5]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG213]] @@ -634,7 +634,7 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG222]] // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG222]] // CHECK1-NEXT: [[TMP3:%.*]] = load [10 x [10 x [10 x i32]]]*, [10 x [10 x [10 x i32]]]** [[C_ADDR]], align 8, !dbg [[DBG222]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG222]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4, !dbg [[DBG222]] // CHECK1-NEXT: [[TMP5:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[B_ADDR]], align 8, !dbg [[DBG222]] // CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[BB_ADDR]], align 8, !dbg [[DBG222]] // CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast [10 x [10 x [10 x i32]]]* [[TMP3]] to [10 x [10 x [10 x i32]]] addrspace(1)*, !dbg [[DBG222]] diff --git a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp index d2529bb85026..475217fdc2c6 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp @@ -715,7 +715,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -782,9 +782,9 @@ int bar(int n){ // CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] // CHECK1-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !26 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -831,15 +831,15 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -875,9 +875,9 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK1-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() // CHECK1-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -929,11 +929,11 @@ int bar(int n){ // CHECK1-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK1-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK1-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK1-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK1-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -958,9 +958,9 @@ int bar(int n){ // CHECK1-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK1: .omp.linear.pu: // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK1-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 8 +// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK1: .omp.linear.pu.done: // CHECK1-NEXT: ret void @@ -977,11 +977,11 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1041,14 +1041,14 @@ int bar(int n){ // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK1-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK1-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK1-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK1-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1106,11 +1106,11 @@ int bar(int n){ // CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -1166,7 +1166,7 @@ int bar(int n){ // CHECK1-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -1202,9 +1202,9 @@ int bar(int n){ // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK1-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK1-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !35 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -1588,7 +1588,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -1656,7 +1656,7 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK1-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK1-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !38 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -1710,15 +1710,15 @@ int bar(int n){ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK1-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -1764,11 +1764,11 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1830,14 +1830,14 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK1-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !41 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !41 -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !41 // CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK1-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !41 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -2293,7 +2293,7 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -2360,9 +2360,9 @@ int bar(int n){ // CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] // CHECK2-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !26 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2409,15 +2409,15 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -2453,9 +2453,9 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK2-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() // CHECK2-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -2507,11 +2507,11 @@ int bar(int n){ // CHECK2-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK2-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK2-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !29 -// CHECK2-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK2-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK2-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK2-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK2-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2536,9 +2536,9 @@ int bar(int n){ // CHECK2-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK2: .omp.linear.pu: // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK2-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK2-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 8 +// CHECK2-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK2: .omp.linear.pu.done: // CHECK2-NEXT: ret void @@ -2555,11 +2555,11 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2619,14 +2619,14 @@ int bar(int n){ // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK2-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !32 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !32 -// CHECK2-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK2-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK2-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK2-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK2-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2684,11 +2684,11 @@ int bar(int n){ // CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -2744,7 +2744,7 @@ int bar(int n){ // CHECK2-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -2780,9 +2780,9 @@ int bar(int n){ // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK2-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK2-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !35 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -3166,7 +3166,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3234,7 +3234,7 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK2-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK2-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !38 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -3288,15 +3288,15 @@ int bar(int n){ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK2-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -3342,11 +3342,11 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -3408,14 +3408,14 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK2-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !41 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !41 -// CHECK2-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !41 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !41 // CHECK2-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !41 +// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !41 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -3970,7 +3970,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4064,11 +4064,11 @@ int bar(int n){ // CHECK3-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK3-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK3-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !30 -// CHECK3-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK3-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK3-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4114,7 +4114,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4176,11 +4176,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK3-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK3-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4832,11 +4832,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK3-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -4883,7 +4883,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4947,11 +4947,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !42 // CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !42 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -5506,7 +5506,7 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5600,11 +5600,11 @@ int bar(int n){ // CHECK4-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK4-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK4-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !30 -// CHECK4-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK4-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK4-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK4-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK4-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK4-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK4-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5650,7 +5650,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5712,11 +5712,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK4-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK4-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK4-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK4-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK4-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK4-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6368,11 +6368,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK4-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -6419,7 +6419,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -6483,11 +6483,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK4-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !42 -// CHECK4-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !42 // CHECK4-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK4-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !42 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -6943,7 +6943,7 @@ int bar(int n){ // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -7010,9 +7010,9 @@ int bar(int n){ // CHECK5-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] // CHECK5-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !26 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK5-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7059,15 +7059,15 @@ int bar(int n){ // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK5-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK5-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK5-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -7103,9 +7103,9 @@ int bar(int n){ // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK5-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() // CHECK5-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -7157,11 +7157,11 @@ int bar(int n){ // CHECK5-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK5-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK5-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !29 -// CHECK5-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK5-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK5-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK5-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK5-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7186,9 +7186,9 @@ int bar(int n){ // CHECK5-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK5: .omp.linear.pu: // CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK5-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 8 +// CHECK5-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 4 // CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK5-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 8 +// CHECK5-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK5: .omp.linear.pu.done: // CHECK5-NEXT: ret void @@ -7205,11 +7205,11 @@ int bar(int n){ // CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK5-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7269,14 +7269,14 @@ int bar(int n){ // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK5-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK5-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !32 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !32 // CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !32 -// CHECK5-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK5-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !32 +// CHECK5-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK5-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK5-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK5-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK5-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7334,11 +7334,11 @@ int bar(int n){ // CHECK5-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK5-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK5-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -7394,7 +7394,7 @@ int bar(int n){ // CHECK5-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -7430,9 +7430,9 @@ int bar(int n){ // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK5-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK5-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !35 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK5-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK5-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK5-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -7848,17 +7848,17 @@ int bar(int n){ // CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK5-NEXT: [[TMP4:%.*]] = load i16*, i16** [[C_ADDR]], align 8 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP5]], i32* [[CONV4]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK5-NEXT: store i8 [[FROMBOOL]], i8* [[CONV5]], align 1 // CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK5-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK5-NEXT: br i1 [[TOBOOL6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: @@ -7911,7 +7911,7 @@ int bar(int n){ // CHECK5-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK5-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: @@ -7942,7 +7942,7 @@ int bar(int n){ // CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 // CHECK5-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK5-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !38 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !38 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !38 // CHECK5-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK5-NEXT: [[ADD:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -7994,7 +7994,7 @@ int bar(int n){ // CHECK5-NEXT: [[MUL18:%.*]] = mul i64 [[TMP24]], 400 // CHECK5-NEXT: [[SUB19:%.*]] = sub i64 2000, [[MUL18]] // CHECK5-NEXT: store i64 [[SUB19]], i64* [[IT]], align 8 -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV20:%.*]] = sitofp i32 [[TMP25]] to double // CHECK5-NEXT: [[ADD21:%.*]] = fadd double [[CONV20]], 1.500000e+00 // CHECK5-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -8052,15 +8052,15 @@ int bar(int n){ // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK5-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK5-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK5-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -8106,11 +8106,11 @@ int bar(int n){ // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK5-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8172,14 +8172,14 @@ int bar(int n){ // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK5-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !44 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !44 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !44 -// CHECK5-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !44 // CHECK5-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK5-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK5-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !44 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -8635,7 +8635,7 @@ int bar(int n){ // CHECK6-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK6-NEXT: store i64 [[K]], i64* [[K_ADDR]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK6-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -8702,9 +8702,9 @@ int bar(int n){ // CHECK6-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] // CHECK6-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !26 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK6-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK6-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK6-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8751,15 +8751,15 @@ int bar(int n){ // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK6-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK6-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK6-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK6-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK6-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK6-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -8795,9 +8795,9 @@ int bar(int n){ // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK6-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() // CHECK6-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -8849,11 +8849,11 @@ int bar(int n){ // CHECK6-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK6-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK6-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !29 -// CHECK6-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK6-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK6-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK6-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK6-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK6-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK6-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8878,9 +8878,9 @@ int bar(int n){ // CHECK6-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK6: .omp.linear.pu: // CHECK6-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK6-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 8 +// CHECK6-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 4 // CHECK6-NEXT: [[TMP23:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK6-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 8 +// CHECK6-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 // CHECK6-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK6: .omp.linear.pu.done: // CHECK6-NEXT: ret void @@ -8897,11 +8897,11 @@ int bar(int n){ // CHECK6-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK6-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK6-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK6-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8961,14 +8961,14 @@ int bar(int n){ // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK6-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK6-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !32 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !32 // CHECK6-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK6-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !32 -// CHECK6-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK6-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !32 +// CHECK6-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK6-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK6-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK6-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK6-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK6-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9026,11 +9026,11 @@ int bar(int n){ // CHECK6-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK6-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK6-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK6-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK6-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -9086,7 +9086,7 @@ int bar(int n){ // CHECK6-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK6-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -9122,9 +9122,9 @@ int bar(int n){ // CHECK6-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK6-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK6-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !35 -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK6-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK6-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK6-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -9540,17 +9540,17 @@ int bar(int n){ // CHECK6-NEXT: [[TMP3:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK6-NEXT: [[TMP4:%.*]] = load i16*, i16** [[C_ADDR]], align 8 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP5]], i32* [[CONV4]], align 4 // CHECK6-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK6-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK6-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK6-NEXT: store i8 [[FROMBOOL]], i8* [[CONV5]], align 1 // CHECK6-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK6-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK6-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK6-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK6-NEXT: br i1 [[TOBOOL6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK6: omp_if.then: @@ -9603,7 +9603,7 @@ int bar(int n){ // CHECK6-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK6-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK6-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK6-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK6: omp_if.then: @@ -9634,7 +9634,7 @@ int bar(int n){ // CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 // CHECK6-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK6-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !38 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !38 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !38 // CHECK6-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK6-NEXT: [[ADD:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -9686,7 +9686,7 @@ int bar(int n){ // CHECK6-NEXT: [[MUL18:%.*]] = mul i64 [[TMP24]], 400 // CHECK6-NEXT: [[SUB19:%.*]] = sub i64 2000, [[MUL18]] // CHECK6-NEXT: store i64 [[SUB19]], i64* [[IT]], align 8 -// CHECK6-NEXT: [[TMP25:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP25:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV20:%.*]] = sitofp i32 [[TMP25]] to double // CHECK6-NEXT: [[ADD21:%.*]] = fadd double [[CONV20]], 1.500000e+00 // CHECK6-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -9744,15 +9744,15 @@ int bar(int n){ // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK6-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK6-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK6-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK6-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK6-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK6-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -9798,11 +9798,11 @@ int bar(int n){ // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK6-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK6-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK6-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -9864,14 +9864,14 @@ int bar(int n){ // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK6-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !44 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !44 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !44 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !44 -// CHECK6-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !44 +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !44 +// CHECK6-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !44 // CHECK6-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK6-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK6-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK6-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !44 +// CHECK6-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !44 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -10426,7 +10426,7 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK7-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK7-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK7-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -10520,11 +10520,11 @@ int bar(int n){ // CHECK7-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK7-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK7-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !30 -// CHECK7-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK7-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK7-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK7-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK7-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10570,7 +10570,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK7-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK7-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -10632,11 +10632,11 @@ int bar(int n){ // CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK7-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK7-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK7-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK7-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK7-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK7-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK7-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -11201,13 +11201,13 @@ int bar(int n){ // CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP5]], i32* [[B_CASTED]], align 4 // CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 // CHECK7-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK7-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK7-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 // CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK7-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK7-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK7: omp_if.then: @@ -11259,7 +11259,7 @@ int bar(int n){ // CHECK7-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK7-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK7-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK7: omp_if.then: @@ -11402,11 +11402,11 @@ int bar(int n){ // CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK7-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK7-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK7-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK7-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -11453,7 +11453,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK7-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK7-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11517,11 +11517,11 @@ int bar(int n){ // CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !45 // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK7-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !45 -// CHECK7-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !45 // CHECK7-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK7-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK7-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !45 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -12076,7 +12076,7 @@ int bar(int n){ // CHECK8-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK8-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK8-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK8-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK8-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK8-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -12170,11 +12170,11 @@ int bar(int n){ // CHECK8-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK8-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK8-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !30 -// CHECK8-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK8-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK8-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK8-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK8-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK8-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK8-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12220,7 +12220,7 @@ int bar(int n){ // CHECK8-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK8-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK8-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK8-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK8-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -12282,11 +12282,11 @@ int bar(int n){ // CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK8-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK8-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK8-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK8-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK8-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK8-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK8-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK8-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK8-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12851,13 +12851,13 @@ int bar(int n){ // CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK8-NEXT: store i32 [[TMP5]], i32* [[B_CASTED]], align 4 // CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK8-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 // CHECK8-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK8-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK8-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 // CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK8-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK8-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK8-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK8: omp_if.then: @@ -12909,7 +12909,7 @@ int bar(int n){ // CHECK8-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK8-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK8-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK8-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK8-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK8-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK8: omp_if.then: @@ -13052,11 +13052,11 @@ int bar(int n){ // CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK8-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK8-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK8-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK8-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK8-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK8-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK8-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK8-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -13103,7 +13103,7 @@ int bar(int n){ // CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK8-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK8-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK8-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK8-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -13167,11 +13167,11 @@ int bar(int n){ // CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !45 // CHECK8-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK8-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !45 -// CHECK8-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !45 +// CHECK8-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !45 // CHECK8-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK8-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK8-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK8-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !45 +// CHECK8-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !45 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK8-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -17302,15 +17302,15 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -17346,9 +17346,9 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK17-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK17-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -17400,11 +17400,11 @@ int bar(int n){ // CHECK17-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK17-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK17-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !17 -// CHECK17-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !17 // CHECK17-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK17-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK17-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK17-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !17 +// CHECK17-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2, !llvm.access.group !17 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -17429,9 +17429,9 @@ int bar(int n){ // CHECK17-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK17: .omp.linear.pu: // CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK17-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK17-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 8 +// CHECK17-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 // CHECK17-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK17: .omp.linear.pu.done: // CHECK17-NEXT: ret void @@ -17454,11 +17454,11 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -17518,14 +17518,14 @@ int bar(int n){ // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK17-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK17-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !20 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !20 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !20 // CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !20 -// CHECK17-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !20 +// CHECK17-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !20 +// CHECK17-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !20 // CHECK17-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK17-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK17-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !20 +// CHECK17-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2, !llvm.access.group !20 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -17583,11 +17583,11 @@ int bar(int n){ // CHECK17-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -17643,7 +17643,7 @@ int bar(int n){ // CHECK17-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -17679,9 +17679,9 @@ int bar(int n){ // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK17-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK17-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !23 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !23 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4, !llvm.access.group !23 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK17-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -17765,15 +17765,15 @@ int bar(int n){ // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK17-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -17824,7 +17824,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -17892,7 +17892,7 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK17-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK17-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !26 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK17-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -17942,11 +17942,11 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -18008,14 +18008,14 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK17-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !29 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !29 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !29 -// CHECK17-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !29 +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !29 // CHECK17-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK17-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !29 +// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !29 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !29 // CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -18131,15 +18131,15 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -18175,9 +18175,9 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK18-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK18-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK18-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -18229,11 +18229,11 @@ int bar(int n){ // CHECK18-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK18-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK18-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !17 -// CHECK18-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !17 +// CHECK18-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !17 // CHECK18-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK18-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK18-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK18-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !17 +// CHECK18-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2, !llvm.access.group !17 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -18258,9 +18258,9 @@ int bar(int n){ // CHECK18-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK18: .omp.linear.pu: // CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK18-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP23:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK18-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 8 +// CHECK18-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 // CHECK18-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK18: .omp.linear.pu.done: // CHECK18-NEXT: ret void @@ -18283,11 +18283,11 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -18347,14 +18347,14 @@ int bar(int n){ // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK18-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK18-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !20 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !20 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !20 // CHECK18-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK18-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !20 -// CHECK18-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !20 +// CHECK18-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !20 +// CHECK18-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !20 // CHECK18-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK18-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK18-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK18-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !20 +// CHECK18-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2, !llvm.access.group !20 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -18412,11 +18412,11 @@ int bar(int n){ // CHECK18-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK18-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK18-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK18-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -18472,7 +18472,7 @@ int bar(int n){ // CHECK18-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK18-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK18-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -18508,9 +18508,9 @@ int bar(int n){ // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK18-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK18-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !23 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !23 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4, !llvm.access.group !23 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK18-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -18594,15 +18594,15 @@ int bar(int n){ // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK18-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK18-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK18-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -18653,7 +18653,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK18-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -18721,7 +18721,7 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK18-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK18-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !26 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK18-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK18-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK18-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -18771,11 +18771,11 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -18837,14 +18837,14 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK18-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !29 -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !29 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !29 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !29 -// CHECK18-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !29 +// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !29 +// CHECK18-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !29 // CHECK18-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK18-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK18-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !29 +// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !29 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !29 // CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -18958,7 +18958,7 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19052,11 +19052,11 @@ int bar(int n){ // CHECK19-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK19-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK19-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK19-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK19-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK19-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK19-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -19108,7 +19108,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19170,11 +19170,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !21 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK19-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !21 -// CHECK19-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !21 // CHECK19-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK19-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK19-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2, !llvm.access.group !21 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -19410,11 +19410,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK19-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK19-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -19581,7 +19581,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19645,11 +19645,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !30 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !30 -// CHECK19-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK19-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !30 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -19763,7 +19763,7 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK20-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19857,11 +19857,11 @@ int bar(int n){ // CHECK20-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK20-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK20-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !18 -// CHECK20-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !18 +// CHECK20-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK20-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK20-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK20-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK20-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !18 +// CHECK20-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -19913,7 +19913,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19975,11 +19975,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !21 // CHECK20-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK20-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !21 -// CHECK20-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !21 +// CHECK20-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !21 // CHECK20-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK20-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK20-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK20-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !21 +// CHECK20-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2, !llvm.access.group !21 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -20215,11 +20215,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK20-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK20-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK20-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK20-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -20386,7 +20386,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -20450,11 +20450,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !30 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK20-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !30 -// CHECK20-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK20-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK20-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK20-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK20-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !30 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -20570,15 +20570,15 @@ int bar(int n){ // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK21-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK21-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK21-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK21-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK21-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK21-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -20614,9 +20614,9 @@ int bar(int n){ // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK21-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK21-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK21-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK21-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK21-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -20668,11 +20668,11 @@ int bar(int n){ // CHECK21-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK21-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK21-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !17 -// CHECK21-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !17 +// CHECK21-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !17 // CHECK21-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK21-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK21-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK21-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !17 +// CHECK21-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2, !llvm.access.group !17 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -20697,9 +20697,9 @@ int bar(int n){ // CHECK21-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK21: .omp.linear.pu: // CHECK21-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK21-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 8 +// CHECK21-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 4 // CHECK21-NEXT: [[TMP23:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK21-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 8 +// CHECK21-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 // CHECK21-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK21: .omp.linear.pu.done: // CHECK21-NEXT: ret void @@ -20722,11 +20722,11 @@ int bar(int n){ // CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK21-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK21-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK21-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK21-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK21-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -20786,14 +20786,14 @@ int bar(int n){ // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK21-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK21-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !20 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !20 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !20 // CHECK21-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK21-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !20 -// CHECK21-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !20 +// CHECK21-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !20 +// CHECK21-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !20 // CHECK21-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK21-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK21-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK21-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !20 +// CHECK21-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2, !llvm.access.group !20 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -20851,11 +20851,11 @@ int bar(int n){ // CHECK21-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK21-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK21-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK21-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK21-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -20911,7 +20911,7 @@ int bar(int n){ // CHECK21-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK21-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK21-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -20947,9 +20947,9 @@ int bar(int n){ // CHECK21-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK21-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK21-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !23 -// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !23 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK21-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK21-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4, !llvm.access.group !23 // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK21-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK21-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -21033,15 +21033,15 @@ int bar(int n){ // CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK21-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK21-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK21-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK21-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK21-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK21-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK21-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -21099,17 +21099,17 @@ int bar(int n){ // CHECK21-NEXT: [[TMP3:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK21-NEXT: [[TMP4:%.*]] = load i16*, i16** [[C_ADDR]], align 8 // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP5]], i32* [[CONV4]], align 4 // CHECK21-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK21-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 // CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK21-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK21-NEXT: store i8 [[FROMBOOL]], i8* [[CONV5]], align 1 // CHECK21-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK21-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK21-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK21-NEXT: br i1 [[TOBOOL6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: @@ -21162,7 +21162,7 @@ int bar(int n){ // CHECK21-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK21-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK21-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK21-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK21-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: @@ -21193,7 +21193,7 @@ int bar(int n){ // CHECK21-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 // CHECK21-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK21-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !26 -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK21-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK21-NEXT: [[ADD:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -21245,7 +21245,7 @@ int bar(int n){ // CHECK21-NEXT: [[MUL18:%.*]] = mul i64 [[TMP24]], 400 // CHECK21-NEXT: [[SUB19:%.*]] = sub i64 2000, [[MUL18]] // CHECK21-NEXT: store i64 [[SUB19]], i64* [[IT]], align 8 -// CHECK21-NEXT: [[TMP25:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP25:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV20:%.*]] = sitofp i32 [[TMP25]] to double // CHECK21-NEXT: [[ADD21:%.*]] = fadd double [[CONV20]], 1.500000e+00 // CHECK21-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -21299,11 +21299,11 @@ int bar(int n){ // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK21-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK21-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK21-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK21-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -21365,14 +21365,14 @@ int bar(int n){ // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK21-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !32 -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !32 // CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK21-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !32 -// CHECK21-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK21-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !32 +// CHECK21-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK21-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK21-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK21-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK21-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK21-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -21488,15 +21488,15 @@ int bar(int n){ // CHECK22-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK22-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK22-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK22-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK22-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK22-NEXT: store i16 [[TMP0]], i16* [[CONV3]], align 2 // CHECK22-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK22-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK22-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK22-NEXT: [[CONV4:%.*]] = bitcast i64* [[LIN_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP2]], i32* [[CONV4]], align 4 // CHECK22-NEXT: [[TMP3:%.*]] = load i64, i64* [[LIN_CASTED]], align 8 -// CHECK22-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK22-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK22-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK22-NEXT: [[TMP5:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -21532,9 +21532,9 @@ int bar(int n){ // CHECK22-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK22-NEXT: [[CONV1:%.*]] = bitcast i64* [[LIN_ADDR]] to i32* // CHECK22-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK22-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK22-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK22-NEXT: store i32 [[TMP0]], i32* [[DOTLINEAR_START]], align 4 -// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK22-NEXT: store i32 [[TMP1]], i32* [[DOTLINEAR_START3]], align 4 // CHECK22-NEXT: [[CALL:%.*]] = call i64 @_Z7get_valv() #[[ATTR5:[0-9]+]] // CHECK22-NEXT: store i64 [[CALL]], i64* [[DOTLINEAR_STEP]], align 8 @@ -21586,11 +21586,11 @@ int bar(int n){ // CHECK22-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK22-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 // CHECK22-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !17 -// CHECK22-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !17 +// CHECK22-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !17 // CHECK22-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK22-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK22-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK22-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !17 +// CHECK22-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 2, !llvm.access.group !17 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -21615,9 +21615,9 @@ int bar(int n){ // CHECK22-NEXT: br i1 [[TMP21]], label [[DOTOMP_LINEAR_PU:%.*]], label [[DOTOMP_LINEAR_PU_DONE:%.*]] // CHECK22: .omp.linear.pu: // CHECK22-NEXT: [[TMP22:%.*]] = load i32, i32* [[LIN4]], align 4 -// CHECK22-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 8 +// CHECK22-NEXT: store i32 [[TMP22]], i32* [[CONV1]], align 4 // CHECK22-NEXT: [[TMP23:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK22-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 8 +// CHECK22-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 // CHECK22-NEXT: br label [[DOTOMP_LINEAR_PU_DONE]] // CHECK22: .omp.linear.pu.done: // CHECK22-NEXT: ret void @@ -21640,11 +21640,11 @@ int bar(int n){ // CHECK22-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK22-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK22-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK22-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK22-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK22-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK22-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK22-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK22-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK22-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -21704,14 +21704,14 @@ int bar(int n){ // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK22-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 // CHECK22-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !20 -// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !20 +// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !20 // CHECK22-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK22-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !20 -// CHECK22-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !20 +// CHECK22-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 4, !llvm.access.group !20 +// CHECK22-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !20 // CHECK22-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK22-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK22-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK22-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !20 +// CHECK22-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 2, !llvm.access.group !20 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -21769,11 +21769,11 @@ int bar(int n){ // CHECK22-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK22-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK22-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK22-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK22-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK22-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK22-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK22-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -21829,7 +21829,7 @@ int bar(int n){ // CHECK22-NEXT: store i32 25, i32* [[DOTOMP_UB]], align 4 // CHECK22-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK22-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK22-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK22-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK22-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -21865,9 +21865,9 @@ int bar(int n){ // CHECK22-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK22-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 // CHECK22-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !23 -// CHECK22-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK22-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !23 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK22-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK22-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4, !llvm.access.group !23 // CHECK22-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK22-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK22-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -21951,15 +21951,15 @@ int bar(int n){ // CHECK22-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK22-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK22-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK22-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK22-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK22-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK22-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK22-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK22-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK22-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK22-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK22-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK22-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK22-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -22017,17 +22017,17 @@ int bar(int n){ // CHECK22-NEXT: [[TMP3:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK22-NEXT: [[TMP4:%.*]] = load i16*, i16** [[C_ADDR]], align 8 // CHECK22-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK22-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP5]], i32* [[CONV4]], align 4 // CHECK22-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK22-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK22-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK22-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 // CHECK22-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK22-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK22-NEXT: store i8 [[FROMBOOL]], i8* [[CONV5]], align 1 // CHECK22-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK22-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK22-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK22-NEXT: [[TOBOOL6:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK22-NEXT: br i1 [[TOBOOL6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK22: omp_if.then: @@ -22080,7 +22080,7 @@ int bar(int n){ // CHECK22-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK22-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK22-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK22-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK22-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK22-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK22-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK22: omp_if.then: @@ -22111,7 +22111,7 @@ int bar(int n){ // CHECK22-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 // CHECK22-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] // CHECK22-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !26 -// CHECK22-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 +// CHECK22-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !26 // CHECK22-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK22-NEXT: [[ADD:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK22-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -22163,7 +22163,7 @@ int bar(int n){ // CHECK22-NEXT: [[MUL18:%.*]] = mul i64 [[TMP24]], 400 // CHECK22-NEXT: [[SUB19:%.*]] = sub i64 2000, [[MUL18]] // CHECK22-NEXT: store i64 [[SUB19]], i64* [[IT]], align 8 -// CHECK22-NEXT: [[TMP25:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP25:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV20:%.*]] = sitofp i32 [[TMP25]] to double // CHECK22-NEXT: [[ADD21:%.*]] = fadd double [[CONV20]], 1.500000e+00 // CHECK22-NEXT: [[A22:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -22217,11 +22217,11 @@ int bar(int n){ // CHECK22-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK22-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK22-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK22-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK22-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK22-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK22-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK22-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK22-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -22283,14 +22283,14 @@ int bar(int n){ // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] // CHECK22-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !32 -// CHECK22-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK22-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !32 // CHECK22-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK22-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !32 -// CHECK22-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK22-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !32 +// CHECK22-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK22-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK22-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK22-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK22-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK22-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK22-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK22-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK22-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -22404,7 +22404,7 @@ int bar(int n){ // CHECK23-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK23-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK23-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK23-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK23-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -22498,11 +22498,11 @@ int bar(int n){ // CHECK23-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK23-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK23-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !18 -// CHECK23-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !18 +// CHECK23-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK23-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK23-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK23-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK23-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !18 +// CHECK23-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -22554,7 +22554,7 @@ int bar(int n){ // CHECK23-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK23-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK23-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -22616,11 +22616,11 @@ int bar(int n){ // CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !21 // CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK23-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !21 -// CHECK23-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !21 +// CHECK23-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !21 // CHECK23-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK23-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK23-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !21 +// CHECK23-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2, !llvm.access.group !21 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -22856,11 +22856,11 @@ int bar(int n){ // CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK23-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK23-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK23-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK23-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -22919,13 +22919,13 @@ int bar(int n){ // CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK23-NEXT: store i32 [[TMP5]], i32* [[B_CASTED]], align 4 // CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 // CHECK23-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK23-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK23-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 // CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK23-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK23-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: @@ -22977,7 +22977,7 @@ int bar(int n){ // CHECK23-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK23-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK23-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK23-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: @@ -23116,7 +23116,7 @@ int bar(int n){ // CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK23-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK23-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -23180,11 +23180,11 @@ int bar(int n){ // CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK23-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK23-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK23-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK23-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK23-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK23-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK23-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -23298,7 +23298,7 @@ int bar(int n){ // CHECK24-NEXT: store i32 [[LIN]], i32* [[LIN_ADDR]], align 4 // CHECK24-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK24-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK24-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK24-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK24-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK24-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -23392,11 +23392,11 @@ int bar(int n){ // CHECK24-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK24-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 // CHECK24-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !18 -// CHECK24-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !18 +// CHECK24-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK24-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK24-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK24-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK24-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !18 +// CHECK24-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -23448,7 +23448,7 @@ int bar(int n){ // CHECK24-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK24-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK24-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK24-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK24-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK24-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK24-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -23510,11 +23510,11 @@ int bar(int n){ // CHECK24-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !21 // CHECK24-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK24-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !21 -// CHECK24-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !21 +// CHECK24-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !21 // CHECK24-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK24-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK24-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK24-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !21 +// CHECK24-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 2, !llvm.access.group !21 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -23750,11 +23750,11 @@ int bar(int n){ // CHECK24-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK24-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK24-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK24-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK24-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK24-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK24-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK24-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK24-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK24-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK24-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK24-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -23813,13 +23813,13 @@ int bar(int n){ // CHECK24-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK24-NEXT: store i32 [[TMP5]], i32* [[B_CASTED]], align 4 // CHECK24-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK24-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK24-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP7]] to i1 // CHECK24-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK24-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK24-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 // CHECK24-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK24-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK24-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK24-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK24: omp_if.then: @@ -23871,7 +23871,7 @@ int bar(int n){ // CHECK24-NEXT: store i64 3, i64* [[DOTOMP_UB]], align 8 // CHECK24-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK24-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK24-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK24-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK24-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK24: omp_if.then: @@ -24010,7 +24010,7 @@ int bar(int n){ // CHECK24-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK24-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK24-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK24-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK24-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK24-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK24-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -24074,11 +24074,11 @@ int bar(int n){ // CHECK24-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK24-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK24-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK24-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK24-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK24-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK24-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK24-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK24-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK24-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK24-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK24-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK24-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 diff --git a/clang/test/OpenMP/target_parallel_if_codegen.cpp b/clang/test/OpenMP/target_parallel_if_codegen.cpp index 68590aeffcb5..750310cd908a 100644 --- a/clang/test/OpenMP/target_parallel_if_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_if_codegen.cpp @@ -498,11 +498,11 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: @@ -532,7 +532,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -552,7 +552,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: @@ -593,7 +593,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: @@ -647,7 +647,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -669,9 +669,9 @@ int bar(int n){ // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: ret void // // @@ -686,11 +686,11 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -711,11 +711,11 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: ret void // // @@ -1042,11 +1042,11 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: @@ -1076,7 +1076,7 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -1096,7 +1096,7 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK2-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: @@ -1137,7 +1137,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK2-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: @@ -1191,7 +1191,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -1213,9 +1213,9 @@ int bar(int n){ // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK2-NEXT: ret void // // @@ -1230,11 +1230,11 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -1255,11 +1255,11 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK2-NEXT: ret void // // @@ -1585,7 +1585,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[B_CASTED]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: @@ -1634,7 +1634,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: @@ -1675,7 +1675,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK3-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: @@ -1767,7 +1767,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -1787,7 +1787,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -2117,7 +2117,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[B_CASTED]], align 4 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: @@ -2166,7 +2166,7 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: @@ -2207,7 +2207,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK4-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: @@ -2299,7 +2299,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -2319,7 +2319,7 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -2343,7 +2343,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK9-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: @@ -2403,11 +2403,11 @@ int bar(int n){ // CHECK9-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: @@ -2437,7 +2437,7 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -2457,7 +2457,7 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK9-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: @@ -2499,7 +2499,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -2521,9 +2521,9 @@ int bar(int n){ // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK9-NEXT: ret void // // @@ -2538,11 +2538,11 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2563,11 +2563,11 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK9-NEXT: ret void // // @@ -2580,7 +2580,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK10-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: @@ -2640,11 +2640,11 @@ int bar(int n){ // CHECK10-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: @@ -2674,7 +2674,7 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK10-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK10-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK10-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -2694,7 +2694,7 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK10-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: @@ -2736,7 +2736,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -2758,9 +2758,9 @@ int bar(int n){ // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK10-NEXT: ret void // // @@ -2775,11 +2775,11 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2800,11 +2800,11 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK10-NEXT: ret void // // @@ -2817,7 +2817,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK11-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: @@ -2879,7 +2879,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP2]], i32* [[B_CASTED]], align 4 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: @@ -2928,7 +2928,7 @@ int bar(int n){ // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: @@ -3008,7 +3008,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -3028,7 +3028,7 @@ int bar(int n){ // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -3045,7 +3045,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK12-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK12-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: @@ -3107,7 +3107,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP2]], i32* [[B_CASTED]], align 4 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: @@ -3156,7 +3156,7 @@ int bar(int n){ // CHECK12-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK12-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: @@ -3236,7 +3236,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -3256,7 +3256,7 @@ int bar(int n){ // CHECK12-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -3580,11 +3580,11 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK17-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK17: omp_if.then: @@ -3614,7 +3614,7 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -3634,7 +3634,7 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK17-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK17-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK17: omp_if.then: @@ -3675,7 +3675,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK17-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK17-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK17-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK17: omp_if.then: @@ -3729,7 +3729,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -3751,9 +3751,9 @@ int bar(int n){ // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK17-NEXT: ret void // // @@ -3768,11 +3768,11 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3793,11 +3793,11 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK17-NEXT: ret void // // @@ -4124,11 +4124,11 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK18-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK18-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK18: omp_if.then: @@ -4158,7 +4158,7 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK18-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK18-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK18-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -4178,7 +4178,7 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK18-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK18-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK18-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK18: omp_if.then: @@ -4219,7 +4219,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK18-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK18-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK18-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK18: omp_if.then: @@ -4273,7 +4273,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK18-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -4295,9 +4295,9 @@ int bar(int n){ // CHECK18-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK18-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK18-NEXT: ret void // // @@ -4312,11 +4312,11 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4337,11 +4337,11 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK18-NEXT: ret void // // @@ -4667,7 +4667,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP2]], i32* [[B_CASTED]], align 4 // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK19-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK19: omp_if.then: @@ -4716,7 +4716,7 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK19-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK19-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK19: omp_if.then: @@ -4757,7 +4757,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK19-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK19-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK19-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK19: omp_if.then: @@ -4849,7 +4849,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -4869,7 +4869,7 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -5199,7 +5199,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP2]], i32* [[B_CASTED]], align 4 // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK20-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK20-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK20-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK20: omp_if.then: @@ -5248,7 +5248,7 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK20-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK20-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK20-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK20-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK20: omp_if.then: @@ -5289,7 +5289,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK20-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK20-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK20-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK20-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK20: omp_if.then: @@ -5381,7 +5381,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -5401,7 +5401,7 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -5425,7 +5425,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK25-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK25-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK25-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK25-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK25: omp_if.then: @@ -5485,11 +5485,11 @@ int bar(int n){ // CHECK25-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK25-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK25-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK25: omp_if.then: @@ -5519,7 +5519,7 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK25-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK25-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -5539,7 +5539,7 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK25-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK25-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK25-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK25-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK25: omp_if.then: @@ -5581,7 +5581,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK25-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -5603,9 +5603,9 @@ int bar(int n){ // CHECK25-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK25-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK25-NEXT: ret void // // @@ -5620,11 +5620,11 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK25-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -5645,11 +5645,11 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK25-NEXT: ret void // // @@ -5662,7 +5662,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK26-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK26-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK26-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK26-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK26: omp_if.then: @@ -5722,11 +5722,11 @@ int bar(int n){ // CHECK26-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK26-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK26-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK26-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK26: omp_if.then: @@ -5756,7 +5756,7 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK26-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK26-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK26-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -5776,7 +5776,7 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK26-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK26-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK26-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK26-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK26: omp_if.then: @@ -5818,7 +5818,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK26-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -5840,9 +5840,9 @@ int bar(int n){ // CHECK26-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK26-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK26-NEXT: ret void // // @@ -5857,11 +5857,11 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK26-NEXT: [[TMP3:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -5882,11 +5882,11 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK26-NEXT: ret void // // @@ -5899,7 +5899,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK27-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK27-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK27-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK27-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK27: omp_if.then: @@ -5961,7 +5961,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP2]], i32* [[B_CASTED]], align 4 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK27-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK27-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK27-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK27: omp_if.then: @@ -6010,7 +6010,7 @@ int bar(int n){ // CHECK27-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK27-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK27-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK27-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK27: omp_if.then: @@ -6090,7 +6090,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -6110,7 +6110,7 @@ int bar(int n){ // CHECK27-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -6127,7 +6127,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK28-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK28-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP1:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK28-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP1]] to i1 // CHECK28-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK28: omp_if.then: @@ -6189,7 +6189,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP2]], i32* [[B_CASTED]], align 4 // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK28-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK28-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP4]] to i1 // CHECK28-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK28: omp_if.then: @@ -6238,7 +6238,7 @@ int bar(int n){ // CHECK28-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK28-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK28-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK28-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK28-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK28: omp_if.then: @@ -6318,7 +6318,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -6338,7 +6338,7 @@ int bar(int n){ // CHECK28-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] diff --git a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp index ee15b339271d..2c636c9187a1 100644 --- a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp @@ -465,9 +465,9 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -488,7 +488,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -530,7 +530,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK1-NEXT: ret void @@ -553,7 +553,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK1-NEXT: ret void @@ -603,14 +603,14 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -631,11 +631,11 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: ret void // // @@ -922,9 +922,9 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -945,7 +945,7 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -987,7 +987,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK2-NEXT: ret void @@ -1010,7 +1010,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK2-NEXT: ret void @@ -1060,14 +1060,14 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK2-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -1088,11 +1088,11 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK2-NEXT: ret void // // @@ -1505,13 +1505,13 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK3-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -1531,7 +1531,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -1948,13 +1948,13 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK4-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK4-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -1974,7 +1974,7 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -1996,7 +1996,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK9-NEXT: ret void @@ -2019,7 +2019,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK9-NEXT: ret void @@ -2049,9 +2049,9 @@ int bar(int n){ // CHECK9-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2072,7 +2072,7 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -2141,14 +2141,14 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK9-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK9-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2169,11 +2169,11 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK9-NEXT: ret void // // @@ -2184,7 +2184,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK10-NEXT: ret void @@ -2207,7 +2207,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK10-NEXT: ret void @@ -2237,9 +2237,9 @@ int bar(int n){ // CHECK10-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2260,7 +2260,7 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK10-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK10-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK10-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -2329,14 +2329,14 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK10-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK10-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK10-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2357,11 +2357,11 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK10-NEXT: ret void // // @@ -2510,13 +2510,13 @@ int bar(int n){ // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK11-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -2536,7 +2536,7 @@ int bar(int n){ // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -2689,13 +2689,13 @@ int bar(int n){ // CHECK12-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK12-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK12-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -2715,7 +2715,7 @@ int bar(int n){ // CHECK12-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -2999,9 +2999,9 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3022,7 +3022,7 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -3064,7 +3064,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK17-NEXT: ret void @@ -3087,7 +3087,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK17-NEXT: ret void @@ -3137,14 +3137,14 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK17-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK17-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3165,11 +3165,11 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK17-NEXT: ret void // // @@ -3456,9 +3456,9 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3479,7 +3479,7 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK18-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK18-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK18-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -3521,7 +3521,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK18-NEXT: ret void @@ -3544,7 +3544,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK18-NEXT: ret void @@ -3594,14 +3594,14 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK18-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK18-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK18-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK18-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3622,11 +3622,11 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK18-NEXT: ret void // // @@ -4039,13 +4039,13 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK19-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -4065,7 +4065,7 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -4482,13 +4482,13 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK20-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK20-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK20-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -4508,7 +4508,7 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -4530,7 +4530,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK25-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK25-NEXT: ret void @@ -4553,7 +4553,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK25-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK25-NEXT: ret void @@ -4583,9 +4583,9 @@ int bar(int n){ // CHECK25-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4606,7 +4606,7 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK25-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK25-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -4675,14 +4675,14 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK25-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK25-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK25-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK25-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4703,11 +4703,11 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK25-NEXT: ret void // // @@ -4718,7 +4718,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK26-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK26-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK26-NEXT: ret void @@ -4741,7 +4741,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK26-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]]) // CHECK26-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK26-NEXT: ret void @@ -4771,9 +4771,9 @@ int bar(int n){ // CHECK26-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4794,7 +4794,7 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK26-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK26-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK26-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -4863,14 +4863,14 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK26-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK26-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK26-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) -// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK26-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4891,11 +4891,11 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK26-NEXT: ret void // // @@ -5044,13 +5044,13 @@ int bar(int n){ // CHECK27-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK27-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK27-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -5070,7 +5070,7 @@ int bar(int n){ // CHECK27-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -5223,13 +5223,13 @@ int bar(int n){ // CHECK28-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK28-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK28-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK28-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]]) // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK28-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -5249,7 +5249,7 @@ int bar(int n){ // CHECK28-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] diff --git a/clang/test/OpenMP/target_teams_codegen.cpp b/clang/test/OpenMP/target_teams_codegen.cpp index 387be05f7bc0..7b3a3d825154 100644 --- a/clang/test/OpenMP/target_teams_codegen.cpp +++ b/clang/test/OpenMP/target_teams_codegen.cpp @@ -684,10 +684,10 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -822,7 +822,7 @@ int bar(int n){ // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -840,9 +840,9 @@ int bar(int n){ // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: ret void // // @@ -853,7 +853,7 @@ int bar(int n){ // CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -871,11 +871,11 @@ int bar(int n){ // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK1-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK1-NEXT: ret void // // @@ -890,11 +890,11 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -915,14 +915,14 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK1-NEXT: ret void // // @@ -957,7 +957,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -999,9 +999,9 @@ int bar(int n){ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -1045,7 +1045,7 @@ int bar(int n){ // CHECK1-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -1064,7 +1064,7 @@ int bar(int n){ // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -1092,7 +1092,7 @@ int bar(int n){ // CHECK1-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -1501,7 +1501,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -1531,7 +1531,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -1566,15 +1566,15 @@ int bar(int n){ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK1-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -1601,19 +1601,19 @@ int bar(int n){ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK1-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK1-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK1-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK1-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK1-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -1635,11 +1635,11 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1663,14 +1663,14 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -2067,10 +2067,10 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2205,7 +2205,7 @@ int bar(int n){ // CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -2223,9 +2223,9 @@ int bar(int n){ // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK2-NEXT: ret void // // @@ -2236,7 +2236,7 @@ int bar(int n){ // CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2254,11 +2254,11 @@ int bar(int n){ // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK2-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK2-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK2-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK2-NEXT: ret void // // @@ -2273,11 +2273,11 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2298,14 +2298,14 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK2-NEXT: ret void // // @@ -2340,7 +2340,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -2382,9 +2382,9 @@ int bar(int n){ // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -2428,7 +2428,7 @@ int bar(int n){ // CHECK2-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -2447,7 +2447,7 @@ int bar(int n){ // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -2475,7 +2475,7 @@ int bar(int n){ // CHECK2-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -2884,7 +2884,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2914,7 +2914,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -2949,15 +2949,15 @@ int bar(int n){ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK2-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -2984,19 +2984,19 @@ int bar(int n){ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK2-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK2-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK2-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK2-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK2-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK2-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK2-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -3018,11 +3018,11 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -3046,14 +3046,14 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -3444,7 +3444,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -3605,7 +3605,7 @@ int bar(int n){ // CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -3623,11 +3623,11 @@ int bar(int n){ // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK3-NEXT: ret void // // @@ -3644,7 +3644,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -3667,11 +3667,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK3-NEXT: ret void // // @@ -4299,11 +4299,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK3-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -4332,16 +4332,16 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK3-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK3-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK3-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK3-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -4365,7 +4365,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4391,11 +4391,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -4786,7 +4786,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK4-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4947,7 +4947,7 @@ int bar(int n){ // CHECK4-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4965,11 +4965,11 @@ int bar(int n){ // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK4-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK4-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK4-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK4-NEXT: ret void // // @@ -4986,7 +4986,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5009,11 +5009,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK4-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK4-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK4-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK4-NEXT: ret void // // @@ -5641,11 +5641,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK4-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -5674,16 +5674,16 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK4-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK4-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK4-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK4-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK4-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK4-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK4-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK4-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK4-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -5707,7 +5707,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5733,11 +5733,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK4-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK4-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK4-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -5766,10 +5766,10 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -5797,7 +5797,7 @@ int bar(int n){ // CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -5815,11 +5815,11 @@ int bar(int n){ // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK9-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK9-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK9-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK9-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK9-NEXT: ret void // // @@ -5834,11 +5834,11 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -5859,14 +5859,14 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK9-NEXT: ret void // // @@ -5901,7 +5901,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -5943,9 +5943,9 @@ int bar(int n){ // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK9-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -5989,7 +5989,7 @@ int bar(int n){ // CHECK9-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -6008,7 +6008,7 @@ int bar(int n){ // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -6036,7 +6036,7 @@ int bar(int n){ // CHECK9-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -6113,15 +6113,15 @@ int bar(int n){ // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK9-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -6148,19 +6148,19 @@ int bar(int n){ // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK9-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK9-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK9-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK9-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK9-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK9-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK9-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK9-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -6187,7 +6187,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -6217,7 +6217,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -6248,11 +6248,11 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -6276,14 +6276,14 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -6305,10 +6305,10 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -6336,7 +6336,7 @@ int bar(int n){ // CHECK10-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -6354,11 +6354,11 @@ int bar(int n){ // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK10-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK10-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK10-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK10-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK10-NEXT: ret void // // @@ -6373,11 +6373,11 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -6398,14 +6398,14 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK10-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK10-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK10-NEXT: ret void // // @@ -6440,7 +6440,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK10-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK10-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -6482,9 +6482,9 @@ int bar(int n){ // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK10-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK10-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK10-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -6528,7 +6528,7 @@ int bar(int n){ // CHECK10-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -6547,7 +6547,7 @@ int bar(int n){ // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -6575,7 +6575,7 @@ int bar(int n){ // CHECK10-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -6652,15 +6652,15 @@ int bar(int n){ // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK10-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -6687,19 +6687,19 @@ int bar(int n){ // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK10-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK10-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK10-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK10-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK10-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK10-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK10-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK10-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK10-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -6726,7 +6726,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -6756,7 +6756,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK10-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK10-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -6787,11 +6787,11 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -6815,14 +6815,14 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK10-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK10-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -6845,7 +6845,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -6873,7 +6873,7 @@ int bar(int n){ // CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -6891,11 +6891,11 @@ int bar(int n){ // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK11-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK11-NEXT: ret void // // @@ -6912,7 +6912,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -6935,11 +6935,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK11-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK11-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK11-NEXT: ret void // // @@ -7177,11 +7177,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK11-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -7210,16 +7210,16 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK11-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK11-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK11-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK11-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK11-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK11-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK11-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK11-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -7306,7 +7306,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -7332,11 +7332,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK11-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK11-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK11-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -7359,7 +7359,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK12-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -7387,7 +7387,7 @@ int bar(int n){ // CHECK12-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK12-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -7405,11 +7405,11 @@ int bar(int n){ // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK12-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK12-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK12-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK12-NEXT: ret void // // @@ -7426,7 +7426,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -7449,11 +7449,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK12-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK12-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK12-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK12-NEXT: ret void // // @@ -7691,11 +7691,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK12-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK12-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -7724,16 +7724,16 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK12-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK12-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK12-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK12-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK12-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK12-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK12-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK12-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK12-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -7820,7 +7820,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -7846,11 +7846,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK12-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK12-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK12-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -8240,10 +8240,10 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8378,7 +8378,7 @@ int bar(int n){ // CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -8396,9 +8396,9 @@ int bar(int n){ // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK17-NEXT: ret void // // @@ -8409,7 +8409,7 @@ int bar(int n){ // CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8427,11 +8427,11 @@ int bar(int n){ // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK17-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK17-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK17-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK17-NEXT: ret void // // @@ -8446,11 +8446,11 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8471,14 +8471,14 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK17-NEXT: ret void // // @@ -8513,7 +8513,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK17-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -8555,9 +8555,9 @@ int bar(int n){ // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK17-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -8601,7 +8601,7 @@ int bar(int n){ // CHECK17-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -8620,7 +8620,7 @@ int bar(int n){ // CHECK17-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK17-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -8648,7 +8648,7 @@ int bar(int n){ // CHECK17-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -9057,7 +9057,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -9087,7 +9087,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -9122,15 +9122,15 @@ int bar(int n){ // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK17-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -9157,19 +9157,19 @@ int bar(int n){ // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK17-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK17-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK17-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK17-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK17-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK17-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK17-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK17-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -9191,11 +9191,11 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -9219,14 +9219,14 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -9623,10 +9623,10 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -9761,7 +9761,7 @@ int bar(int n){ // CHECK18-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK18-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -9779,9 +9779,9 @@ int bar(int n){ // CHECK18-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK18-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK18-NEXT: ret void // // @@ -9792,7 +9792,7 @@ int bar(int n){ // CHECK18-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -9810,11 +9810,11 @@ int bar(int n){ // CHECK18-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK18-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK18-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK18-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK18-NEXT: ret void // // @@ -9829,11 +9829,11 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -9854,14 +9854,14 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK18-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK18-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK18-NEXT: ret void // // @@ -9896,7 +9896,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK18-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK18-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK18-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -9938,9 +9938,9 @@ int bar(int n){ // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK18-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK18-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK18-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -9984,7 +9984,7 @@ int bar(int n){ // CHECK18-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK18-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -10003,7 +10003,7 @@ int bar(int n){ // CHECK18-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK18-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -10031,7 +10031,7 @@ int bar(int n){ // CHECK18-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK18-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -10440,7 +10440,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK18-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -10470,7 +10470,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK18-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK18-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK18-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -10505,15 +10505,15 @@ int bar(int n){ // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK18-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK18-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK18-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -10540,19 +10540,19 @@ int bar(int n){ // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK18-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK18-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK18-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK18-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK18-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK18-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK18-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK18-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK18-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -10574,11 +10574,11 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -10602,14 +10602,14 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK18-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK18-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -11000,7 +11000,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK19-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11161,7 +11161,7 @@ int bar(int n){ // CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11179,11 +11179,11 @@ int bar(int n){ // CHECK19-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK19-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK19-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK19-NEXT: ret void // // @@ -11200,7 +11200,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11223,11 +11223,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK19-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK19-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK19-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK19-NEXT: ret void // // @@ -11855,11 +11855,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK19-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK19-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -11888,16 +11888,16 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK19-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK19-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK19-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK19-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK19-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK19-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK19-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK19-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK19-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -11921,7 +11921,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11947,11 +11947,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK19-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK19-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK19-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -12342,7 +12342,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK20-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -12503,7 +12503,7 @@ int bar(int n){ // CHECK20-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK20-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -12521,11 +12521,11 @@ int bar(int n){ // CHECK20-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK20-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK20-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK20-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK20-NEXT: ret void // // @@ -12542,7 +12542,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -12565,11 +12565,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK20-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK20-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK20-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK20-NEXT: ret void // // @@ -13197,11 +13197,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK20-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK20-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK20-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK20-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -13230,16 +13230,16 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK20-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK20-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK20-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK20-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK20-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK20-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK20-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK20-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK20-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK20-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK20-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -13263,7 +13263,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -13289,11 +13289,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK20-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK20-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK20-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK20-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -13322,10 +13322,10 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK25-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK25-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -13353,7 +13353,7 @@ int bar(int n){ // CHECK25-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK25-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -13371,11 +13371,11 @@ int bar(int n){ // CHECK25-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK25-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK25-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK25-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK25-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK25-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK25-NEXT: ret void // // @@ -13390,11 +13390,11 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK25-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -13415,14 +13415,14 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK25-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK25-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK25-NEXT: ret void // // @@ -13457,7 +13457,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK25-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK25-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK25-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -13499,9 +13499,9 @@ int bar(int n){ // CHECK25-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK25-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK25-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK25-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK25-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -13545,7 +13545,7 @@ int bar(int n){ // CHECK25-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK25-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -13564,7 +13564,7 @@ int bar(int n){ // CHECK25-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK25-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -13592,7 +13592,7 @@ int bar(int n){ // CHECK25-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK25-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -13669,15 +13669,15 @@ int bar(int n){ // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK25-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK25-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK25-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK25-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -13704,19 +13704,19 @@ int bar(int n){ // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK25-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK25-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK25-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK25-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK25-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK25-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK25-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK25-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK25-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK25-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK25-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -13743,7 +13743,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK25-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK25-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -13773,7 +13773,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK25-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK25-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK25-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -13804,11 +13804,11 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -13832,14 +13832,14 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK25-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK25-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK25-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -13861,10 +13861,10 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK26-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK26-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -13892,7 +13892,7 @@ int bar(int n){ // CHECK26-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK26-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -13910,11 +13910,11 @@ int bar(int n){ // CHECK26-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK26-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK26-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK26-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK26-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 8 +// CHECK26-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK26-NEXT: ret void // // @@ -13929,11 +13929,11 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK26-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -13954,14 +13954,14 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK26-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK26-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK26-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK26-NEXT: ret void // // @@ -13996,7 +13996,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK26-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK26-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK26-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -14038,9 +14038,9 @@ int bar(int n){ // CHECK26-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK26-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK26-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK26-NEXT: [[TMP9:%.*]] = load float, float* [[ARRAYIDX]], align 4 // CHECK26-NEXT: [[CONV5:%.*]] = fpext float [[TMP9]] to double @@ -14084,7 +14084,7 @@ int bar(int n){ // CHECK26-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK26-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -14103,7 +14103,7 @@ int bar(int n){ // CHECK26-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK26-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -14131,7 +14131,7 @@ int bar(int n){ // CHECK26-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK26-NEXT: store i64 [[NN]], i64* [[NN_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[NN_ADDR]] to i32* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[NN_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[NN_CASTED]], align 8 @@ -14208,15 +14208,15 @@ int bar(int n){ // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP1]], i32* [[CONV3]], align 4 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV4:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP3]], i16* [[CONV4]], align 2 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK26-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK26-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK26-NEXT: [[CONV5:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK26-NEXT: store i8 [[TMP5]], i8* [[CONV5]], align 1 // CHECK26-NEXT: [[TMP6:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -14243,19 +14243,19 @@ int bar(int n){ // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = sext i16 [[TMP2]] to i32 // CHECK26-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK26-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK26-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 8 +// CHECK26-NEXT: store i16 [[CONV5]], i16* [[CONV1]], align 2 +// CHECK26-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV2]], align 1 // CHECK26-NEXT: [[CONV6:%.*]] = sext i8 [[TMP3]] to i32 // CHECK26-NEXT: [[ADD7:%.*]] = add nsw i32 [[CONV6]], 1 // CHECK26-NEXT: [[CONV8:%.*]] = trunc i32 [[ADD7]] to i8 -// CHECK26-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 8 +// CHECK26-NEXT: store i8 [[CONV8]], i8* [[CONV2]], align 1 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK26-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP4]], 1 @@ -14282,7 +14282,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK26-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK26-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -14312,7 +14312,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK26-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = sitofp i32 [[TMP4]] to double // CHECK26-NEXT: [[ADD:%.*]] = fadd double [[CONV3]], 1.500000e+00 // CHECK26-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -14343,11 +14343,11 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -14371,14 +14371,14 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK26-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK26-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK26-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i16 [[CONV4]], i16* [[CONV1]], align 2 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK26-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP3]], 1 @@ -14401,7 +14401,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK27-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14429,7 +14429,7 @@ int bar(int n){ // CHECK27-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14447,11 +14447,11 @@ int bar(int n){ // CHECK27-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK27-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK27-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK27-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK27-NEXT: ret void // // @@ -14468,7 +14468,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14491,11 +14491,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK27-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK27-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK27-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK27-NEXT: ret void // // @@ -14733,11 +14733,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK27-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK27-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -14766,16 +14766,16 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK27-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK27-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK27-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK27-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK27-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK27-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK27-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK27-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK27-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK27-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK27-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -14862,7 +14862,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14888,11 +14888,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK27-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK27-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK27-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK27-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 @@ -14915,7 +14915,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK28-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14943,7 +14943,7 @@ int bar(int n){ // CHECK28-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK28-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14961,11 +14961,11 @@ int bar(int n){ // CHECK28-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK28-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK28-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK28-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV2]], i16* [[CONV]], align 2 // CHECK28-NEXT: ret void // // @@ -14982,7 +14982,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -15005,11 +15005,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 // CHECK28-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK28-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = sext i16 [[TMP1]] to i32 // CHECK28-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK28-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK28-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK28-NEXT: ret void // // @@ -15247,11 +15247,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP3]], i16* [[CONV2]], align 2 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK28-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK28-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK28-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK28-NEXT: store i8 [[TMP5]], i8* [[CONV3]], align 1 // CHECK28-NEXT: [[TMP6:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -15280,16 +15280,16 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK28-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV2:%.*]] = sext i16 [[TMP2]] to i32 // CHECK28-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK28-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK28-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 -// CHECK28-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK28-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 +// CHECK28-NEXT: [[TMP3:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK28-NEXT: [[CONV5:%.*]] = sext i8 [[TMP3]] to i32 // CHECK28-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK28-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i8 -// CHECK28-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 4 +// CHECK28-NEXT: store i8 [[CONV7]], i8* [[CONV1]], align 1 // CHECK28-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK28-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP4]], 1 @@ -15376,7 +15376,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -15402,11 +15402,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK28-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = sext i16 [[TMP2]] to i32 // CHECK28-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV1]], 1 // CHECK28-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK28-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV3]], i16* [[CONV]], align 2 // CHECK28-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK28-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP3]], 1 diff --git a/clang/test/OpenMP/target_teams_distribute_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_codegen.cpp index 63e5e4dc10ef..300caa04c22d 100644 --- a/clang/test/OpenMP/target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_codegen.cpp @@ -648,10 +648,10 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -837,7 +837,7 @@ int bar(int n){ // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -893,9 +893,9 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[CONV]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -918,7 +918,7 @@ int bar(int n){ // CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -974,11 +974,11 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1005,11 +1005,11 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1068,14 +1068,14 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK1-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1126,11 +1126,11 @@ int bar(int n){ // CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -1186,7 +1186,7 @@ int bar(int n){ // CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -1221,9 +1221,9 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -1637,7 +1637,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -1705,7 +1705,7 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -1756,19 +1756,19 @@ int bar(int n){ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK1-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -1809,9 +1809,9 @@ int bar(int n){ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -1864,19 +1864,19 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK1-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK1-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK1-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2 +// CHECK1-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK1-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -1914,11 +1914,11 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1980,14 +1980,14 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK1-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -2368,10 +2368,10 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2557,7 +2557,7 @@ int bar(int n){ // CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -2613,9 +2613,9 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[CONV]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2638,7 +2638,7 @@ int bar(int n){ // CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2694,11 +2694,11 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2725,11 +2725,11 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2788,14 +2788,14 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2846,11 +2846,11 @@ int bar(int n){ // CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -2906,7 +2906,7 @@ int bar(int n){ // CHECK2-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -2941,9 +2941,9 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -3357,7 +3357,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3425,7 +3425,7 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK2-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -3476,19 +3476,19 @@ int bar(int n){ // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK2-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -3529,9 +3529,9 @@ int bar(int n){ // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -3584,19 +3584,19 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK2-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK2-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK2-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK2-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK2-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2 +// CHECK2-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK2-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK2-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK2-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK2-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -3634,11 +3634,11 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -3700,14 +3700,14 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -4083,7 +4083,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4345,7 +4345,7 @@ int bar(int n){ // CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4401,11 +4401,11 @@ int bar(int n){ // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4434,7 +4434,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4495,11 +4495,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5171,11 +5171,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK3-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -5272,16 +5272,16 @@ int bar(int n){ // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK3-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 +// CHECK3-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK3-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK3-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK3-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK3-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -5321,7 +5321,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5385,11 +5385,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -5765,7 +5765,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK4-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -6027,7 +6027,7 @@ int bar(int n){ // CHECK4-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -6083,11 +6083,11 @@ int bar(int n){ // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK4-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK4-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6116,7 +6116,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -6177,11 +6177,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK4-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK4-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6853,11 +6853,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK4-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -6954,16 +6954,16 @@ int bar(int n){ // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK4-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK4-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK4-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK4-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK4-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 +// CHECK4-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK4-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK4-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK4-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK4-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK4-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -7003,7 +7003,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -7067,11 +7067,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK4-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK4-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -7112,10 +7112,10 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7193,7 +7193,7 @@ int bar(int n){ // CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7249,11 +7249,11 @@ int bar(int n){ // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK9-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK9-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7280,11 +7280,11 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7343,14 +7343,14 @@ int bar(int n){ // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK9-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK9-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7401,11 +7401,11 @@ int bar(int n){ // CHECK9-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -7461,7 +7461,7 @@ int bar(int n){ // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -7496,9 +7496,9 @@ int bar(int n){ // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK9-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -7579,19 +7579,19 @@ int bar(int n){ // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK9-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK9-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -7632,9 +7632,9 @@ int bar(int n){ // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -7687,19 +7687,19 @@ int bar(int n){ // CHECK9-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK9-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK9-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK9-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK9-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK9-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK9-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2 +// CHECK9-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK9-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK9-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK9-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK9-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -7742,7 +7742,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -7810,7 +7810,7 @@ int bar(int n){ // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK9-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -7853,11 +7853,11 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7919,14 +7919,14 @@ int bar(int n){ // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK9-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK9-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK9-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK9-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -7960,10 +7960,10 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8041,7 +8041,7 @@ int bar(int n){ // CHECK10-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8097,11 +8097,11 @@ int bar(int n){ // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK10-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK10-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK10-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK10-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8128,11 +8128,11 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8191,14 +8191,14 @@ int bar(int n){ // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK10-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK10-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK10-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8249,11 +8249,11 @@ int bar(int n){ // CHECK10-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK10-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK10-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK10-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -8309,7 +8309,7 @@ int bar(int n){ // CHECK10-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -8344,9 +8344,9 @@ int bar(int n){ // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK10-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK10-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK10-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -8427,19 +8427,19 @@ int bar(int n){ // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK10-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK10-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK10-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK10-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -8480,9 +8480,9 @@ int bar(int n){ // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -8535,19 +8535,19 @@ int bar(int n){ // CHECK10-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK10-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK10-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK10-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 8 -// CHECK10-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK10-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK10-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK10-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK10-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK10-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK10-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK10-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2 +// CHECK10-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK10-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK10-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK10-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK10-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -8590,7 +8590,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -8658,7 +8658,7 @@ int bar(int n){ // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK10-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK10-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -8701,11 +8701,11 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8767,14 +8767,14 @@ int bar(int n){ // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK10-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK10-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK10-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK10-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -8809,7 +8809,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -8887,7 +8887,7 @@ int bar(int n){ // CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -8943,11 +8943,11 @@ int bar(int n){ // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK11-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK11-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8976,7 +8976,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9037,11 +9037,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK11-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK11-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9268,11 +9268,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK11-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -9369,16 +9369,16 @@ int bar(int n){ // CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK11-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK11-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK11-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 +// CHECK11-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK11-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK11-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK11-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK11-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -9531,7 +9531,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9595,11 +9595,11 @@ int bar(int n){ // CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK11-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK11-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK11-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK11-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -9634,7 +9634,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK12-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9712,7 +9712,7 @@ int bar(int n){ // CHECK12-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK12-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9768,11 +9768,11 @@ int bar(int n){ // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK12-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK12-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9801,7 +9801,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -9862,11 +9862,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK12-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK12-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK12-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10093,11 +10093,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK12-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK12-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -10194,16 +10194,16 @@ int bar(int n){ // CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK12-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK12-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK12-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK12-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK12-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK12-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 +// CHECK12-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK12-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK12-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK12-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK12-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK12-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK12-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -10356,7 +10356,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -10420,11 +10420,11 @@ int bar(int n){ // CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK12-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK12-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK12-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK12-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -10798,10 +10798,10 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -10987,7 +10987,7 @@ int bar(int n){ // CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -11043,9 +11043,9 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD2]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD2]], i32* [[CONV]], align 4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -11068,7 +11068,7 @@ int bar(int n){ // CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -11124,11 +11124,11 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -11155,11 +11155,11 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -11218,14 +11218,14 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK17-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -11276,11 +11276,11 @@ int bar(int n){ // CHECK17-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -11336,7 +11336,7 @@ int bar(int n){ // CHECK17-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -11371,9 +11371,9 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK17-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -11787,7 +11787,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -11855,7 +11855,7 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK17-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -11906,19 +11906,19 @@ int bar(int n){ // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK17-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -11959,9 +11959,9 @@ int bar(int n){ // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -12014,19 +12014,19 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK17-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK17-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK17-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK17-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK17-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK17-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK17-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK17-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK17-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2 +// CHECK17-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK17-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK17-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK17-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK17-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK17-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -12064,11 +12064,11 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -12130,14 +12130,14 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK17-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -12518,10 +12518,10 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -12707,7 +12707,7 @@ int bar(int n){ // CHECK18-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK18-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -12763,9 +12763,9 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK18-NEXT: store i32 [[ADD2]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD2]], i32* [[CONV]], align 4 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12788,7 +12788,7 @@ int bar(int n){ // CHECK18-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -12844,11 +12844,11 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK18-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK18-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12875,11 +12875,11 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -12938,14 +12938,14 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK18-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK18-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK18-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12996,11 +12996,11 @@ int bar(int n){ // CHECK18-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK18-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK18-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK18-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -13056,7 +13056,7 @@ int bar(int n){ // CHECK18-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK18-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK18-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -13091,9 +13091,9 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK18-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK18-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK18-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -13507,7 +13507,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK18-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -13575,7 +13575,7 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK18-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK18-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -13626,19 +13626,19 @@ int bar(int n){ // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK18-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK18-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK18-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK18-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK18-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK18-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -13679,9 +13679,9 @@ int bar(int n){ // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -13734,19 +13734,19 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK18-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK18-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK18-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 8 -// CHECK18-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK18-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 4 +// CHECK18-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK18-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK18-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK18-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK18-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK18-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK18-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2 +// CHECK18-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK18-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK18-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK18-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK18-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK18-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK18-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -13784,11 +13784,11 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -13850,14 +13850,14 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK18-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK18-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK18-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -14233,7 +14233,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK19-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14495,7 +14495,7 @@ int bar(int n){ // CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14551,11 +14551,11 @@ int bar(int n){ // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK19-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK19-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -14584,7 +14584,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14645,11 +14645,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -15321,11 +15321,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK19-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK19-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -15422,16 +15422,16 @@ int bar(int n){ // CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK19-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK19-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK19-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK19-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK19-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 +// CHECK19-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK19-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK19-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK19-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK19-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK19-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -15471,7 +15471,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -15535,11 +15535,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -15915,7 +15915,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK20-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -16177,7 +16177,7 @@ int bar(int n){ // CHECK20-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK20-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -16233,11 +16233,11 @@ int bar(int n){ // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK20-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK20-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK20-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK20-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -16266,7 +16266,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -16327,11 +16327,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK20-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK20-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK20-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -17003,11 +17003,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK20-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK20-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK20-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK20-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -17104,16 +17104,16 @@ int bar(int n){ // CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK20-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK20-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK20-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK20-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK20-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK20-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 +// CHECK20-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK20-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK20-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK20-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK20-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK20-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK20-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK20-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -17153,7 +17153,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -17217,11 +17217,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK20-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK20-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK20-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -17262,10 +17262,10 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK25-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK25-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -17343,7 +17343,7 @@ int bar(int n){ // CHECK25-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK25-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -17399,11 +17399,11 @@ int bar(int n){ // CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK25-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK25-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK25-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK25-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK25-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -17430,11 +17430,11 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK25-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -17493,14 +17493,14 @@ int bar(int n){ // CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK25-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK25-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK25-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK25-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -17551,11 +17551,11 @@ int bar(int n){ // CHECK25-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK25-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK25-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK25-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK25-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK25-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -17611,7 +17611,7 @@ int bar(int n){ // CHECK25-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK25-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK25-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK25-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK25-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK25-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK25-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -17646,9 +17646,9 @@ int bar(int n){ // CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK25-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK25-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK25-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK25-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK25-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK25-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -17729,19 +17729,19 @@ int bar(int n){ // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK25-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK25-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK25-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK25-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK25-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK25-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK25-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -17782,9 +17782,9 @@ int bar(int n){ // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -17837,19 +17837,19 @@ int bar(int n){ // CHECK25-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK25-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK25-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK25-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK25-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 8 -// CHECK25-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK25-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 4 +// CHECK25-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK25-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK25-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK25-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK25-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK25-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK25-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2 +// CHECK25-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK25-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK25-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK25-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK25-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK25-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK25-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK25-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -17892,7 +17892,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK25-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK25-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -17960,7 +17960,7 @@ int bar(int n){ // CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK25-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK25-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -18003,11 +18003,11 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK25-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -18069,14 +18069,14 @@ int bar(int n){ // CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK25-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK25-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK25-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK25-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK25-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK25-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK25-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK25-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -18110,10 +18110,10 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK26-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK26-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -18191,7 +18191,7 @@ int bar(int n){ // CHECK26-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK26-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -18247,11 +18247,11 @@ int bar(int n){ // CHECK26-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK26-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK26-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK26-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK26-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK26-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK26-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK26-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK26-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK26: omp.body.continue: // CHECK26-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -18278,11 +18278,11 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK26-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -18341,14 +18341,14 @@ int bar(int n){ // CHECK26-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK26-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK26-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK26-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK26-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK26-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK26-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK26: omp.body.continue: // CHECK26-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -18399,11 +18399,11 @@ int bar(int n){ // CHECK26-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK26-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 // CHECK26-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV6:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP8]], i32* [[CONV6]], align 4 // CHECK26-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK26-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK26-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP10]], i32* [[CONV7]], align 4 // CHECK26-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -18459,7 +18459,7 @@ int bar(int n){ // CHECK26-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 // CHECK26-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK26-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK26-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK26-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK26-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 // CHECK26-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP8]]) @@ -18494,9 +18494,9 @@ int bar(int n){ // CHECK26-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK26-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK26-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK26-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK26-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK26-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK26-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK26-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK26-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double @@ -18577,19 +18577,19 @@ int bar(int n){ // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK26-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK26-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK26-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK26-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK26-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK26-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK26-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK26-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK26-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -18630,9 +18630,9 @@ int bar(int n){ // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK26-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -18685,19 +18685,19 @@ int bar(int n){ // CHECK26-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK26-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK26-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK26-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK26-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK26-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 8 -// CHECK26-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK26-NEXT: store i32 [[ADD13]], i32* [[CONV1]], align 4 +// CHECK26-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK26-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK26-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK26-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK26-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK26-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK26-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2 +// CHECK26-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK26-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK26-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK26-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK26-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK26-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK26-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK26-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -18740,7 +18740,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK26-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK26-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -18808,7 +18808,7 @@ int bar(int n){ // CHECK26-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK26-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK26-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK26-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK26-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -18851,11 +18851,11 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK26-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -18917,14 +18917,14 @@ int bar(int n){ // CHECK26-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK26-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK26-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK26-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK26-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK26-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK26-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK26-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK26-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK26-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -18959,7 +18959,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK27-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19037,7 +19037,7 @@ int bar(int n){ // CHECK27-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK27-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19093,11 +19093,11 @@ int bar(int n){ // CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK27-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK27-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK27-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK27-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK27-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -19126,7 +19126,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19187,11 +19187,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK27-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK27-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK27-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK27-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -19418,11 +19418,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK27-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK27-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK27-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK27-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK27-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -19519,16 +19519,16 @@ int bar(int n){ // CHECK27-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK27-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK27-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK27-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK27-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK27-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK27-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 +// CHECK27-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK27-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK27-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK27-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK27-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK27-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1 // CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK27-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK27-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -19681,7 +19681,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19745,11 +19745,11 @@ int bar(int n){ // CHECK27-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK27-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK27-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK27-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK27-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK27-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK27-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK27-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK27-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -19784,7 +19784,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK28-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19862,7 +19862,7 @@ int bar(int n){ // CHECK28-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK28-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -19918,11 +19918,11 @@ int bar(int n){ // CHECK28-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK28-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK28-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK28-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK28-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK28-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2 // CHECK28-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK28: omp.body.continue: // CHECK28-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -19951,7 +19951,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -20012,11 +20012,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK28-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK28-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK28-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK28-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK28-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK28-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK28: omp.body.continue: // CHECK28-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -20243,11 +20243,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK28-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK28-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK28-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK28-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK28-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK28-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -20344,16 +20344,16 @@ int bar(int n){ // CHECK28-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK28-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK28-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK28-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK28-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK28-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK28-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK28-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2 +// CHECK28-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK28-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK28-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK28-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK28-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK28-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1 // CHECK28-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK28-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK28-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -20506,7 +20506,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -20570,11 +20570,11 @@ int bar(int n){ // CHECK28-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK28-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK28-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK28-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK28-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK28-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK28-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2 // CHECK28-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK28-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 // CHECK28-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 diff --git a/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp index fcb73beed6ed..e5b6eac75b0d 100644 --- a/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp @@ -820,11 +820,11 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[M_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[M_CASTED]], align 8 @@ -868,9 +868,9 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 @@ -1264,11 +1264,11 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[M_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[M_CASTED]], align 8 @@ -1312,9 +1312,9 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 diff --git a/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp index 8a278c56a29e..6022cf67baf2 100644 --- a/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp @@ -1755,7 +1755,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -1789,7 +1789,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -1871,7 +1871,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -1905,7 +1905,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -1991,11 +1991,11 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -2032,7 +2032,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -2049,7 +2049,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -2700,7 +2700,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -2734,7 +2734,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -2816,7 +2816,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -2850,7 +2850,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -2936,11 +2936,11 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -2977,7 +2977,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -2994,7 +2994,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) diff --git a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp index 106500b00271..3af5b56cf7ed 100644 --- a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp @@ -345,11 +345,11 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -446,7 +446,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -458,9 +458,9 @@ int main() { // CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR5]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -694,7 +694,7 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -794,7 +794,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -1135,11 +1135,11 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -1236,7 +1236,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -1248,9 +1248,9 @@ int main() { // CHECK2-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR5]] to i8* // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1484,7 +1484,7 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK2-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -1584,7 +1584,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -3407,7 +3407,7 @@ int main() { // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -3416,7 +3416,7 @@ int main() { // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -3481,10 +3481,10 @@ int main() { // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK9-NEXT: store volatile i32 1, i32* [[TMP8]], align 4 -// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -3664,7 +3664,7 @@ int main() { // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK10-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -3673,7 +3673,7 @@ int main() { // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -3738,10 +3738,10 @@ int main() { // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK10-NEXT: store volatile i32 1, i32* [[TMP8]], align 4 -// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK10-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8 // CHECK10-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 diff --git a/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp index a1087d5a9843..51e4b21eb6c6 100644 --- a/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp @@ -182,11 +182,11 @@ int main() { // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[G1_CASTED]] to double* // CHECK1-NEXT: store double [[TMP3]], double* [[CONV5]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* [[CONV3]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* [[CONV3]], align 4 // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* // CHECK1-NEXT: store float [[TMP7]], float* [[CONV7]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 @@ -298,9 +298,9 @@ int main() { // CHECK1-NEXT: [[TMP20:%.*]] = load double, double* [[TMP19]], align 8 // CHECK1-NEXT: store volatile double [[TMP20]], double* [[TMP0]], align 8 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK1-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 8 +// CHECK1-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = load float, float* [[SFVAR9]], align 4 -// CHECK1-NEXT: store float [[TMP22]], float* [[CONV3]], align 8 +// CHECK1-NEXT: store float [[TMP22]], float* [[CONV3]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -361,11 +361,11 @@ int main() { // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[G1_CASTED]] to double* // CHECK2-NEXT: store double [[TMP3]], double* [[CONV5]], align 8 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load float, float* [[CONV3]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load float, float* [[CONV3]], align 4 // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* // CHECK2-NEXT: store float [[TMP7]], float* [[CONV7]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 @@ -477,9 +477,9 @@ int main() { // CHECK2-NEXT: [[TMP20:%.*]] = load double, double* [[TMP19]], align 8 // CHECK2-NEXT: store volatile double [[TMP20]], double* [[TMP0]], align 8 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK2-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 8 +// CHECK2-NEXT: store i32 [[TMP21]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP22:%.*]] = load float, float* [[SFVAR9]], align 4 -// CHECK2-NEXT: store float [[TMP22]], float* [[CONV3]], align 8 +// CHECK2-NEXT: store float [[TMP22]], float* [[CONV3]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: ret void @@ -977,12 +977,12 @@ int main() { // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK9-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 // CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP6]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 @@ -1104,7 +1104,7 @@ int main() { // CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: // CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK9-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i64 8, i1 false) @@ -1129,7 +1129,7 @@ int main() { // CHECK9-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[TMP30]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i64 4, i1 false) // CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP33]], i32* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[TMP33]], i32* [[CONV1]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] @@ -1320,7 +1320,7 @@ int main() { // CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK9-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -1439,7 +1439,7 @@ int main() { // CHECK9-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: // CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK9-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i64 8, i1 false) @@ -1683,12 +1683,12 @@ int main() { // CHECK10-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK10-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 // CHECK10-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP6]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP7:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 @@ -1810,7 +1810,7 @@ int main() { // CHECK10-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK10: .omp.lastprivate.then: // CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK10-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK10-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* // CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i64 8, i1 false) @@ -1835,7 +1835,7 @@ int main() { // CHECK10-NEXT: [[TMP32:%.*]] = bitcast %struct.S* [[TMP30]] to i8* // CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP31]], i8* align 4 [[TMP32]], i64 4, i1 false) // CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK10-NEXT: store i32 [[TMP33]], i32* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[TMP33]], i32* [[CONV1]], align 4 // CHECK10-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK10: .omp.lastprivate.done: // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] @@ -2026,7 +2026,7 @@ int main() { // CHECK10-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK10-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -2145,7 +2145,7 @@ int main() { // CHECK10-NEXT: br i1 [[TMP22]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK10: .omp.lastprivate.then: // CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK10-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK10-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* // CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP24]], i8* align 4 [[TMP25]], i64 8, i1 false) diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp index 1b0bd4615118..2148b693e151 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp @@ -136,10 +136,10 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -171,7 +171,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -217,7 +217,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -267,7 +267,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -369,7 +369,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -404,7 +404,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -450,7 +450,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -503,7 +503,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -754,10 +754,10 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -789,7 +789,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -835,7 +835,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -885,7 +885,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -987,7 +987,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -1022,7 +1022,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -1068,7 +1068,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -1121,7 +1121,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -1379,10 +1379,10 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK3-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK3-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -1414,7 +1414,7 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -1460,7 +1460,7 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK3-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -1510,7 +1510,7 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -1612,7 +1612,7 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK3-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -1647,7 +1647,7 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -1693,7 +1693,7 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK3-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -1746,7 +1746,7 @@ int target_teams_fun(int *g){ // CHECK3-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3046,10 +3046,10 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3081,7 +3081,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3127,7 +3127,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3177,7 +3177,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3279,7 +3279,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3314,7 +3314,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3360,7 +3360,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3413,7 +3413,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3508,10 +3508,10 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK11-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK11-NEXT: store i32 [[TMP4]], i32* [[CONV5]], align 4 // CHECK11-NEXT: [[TMP5:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3543,7 +3543,7 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3589,7 +3589,7 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK11-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3639,7 +3639,7 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3741,7 +3741,7 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK11-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3776,7 +3776,7 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3822,7 +3822,7 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK11-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 // CHECK11-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3875,7 +3875,7 @@ int target_teams_fun(int *g){ // CHECK11-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp index f2b302b1f9f1..281654218d09 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp @@ -1117,11 +1117,11 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[M_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[M_CASTED]], align 8 @@ -1167,9 +1167,9 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 @@ -1224,11 +1224,11 @@ int main (int argc, char **argv) { // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 // CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV17:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP21]], i32* [[CONV17]], align 4 // CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV18]], align 4 // CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[M_CASTED]], align 8 @@ -1291,9 +1291,9 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 @@ -1766,11 +1766,11 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[M_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[M_CASTED]], align 8 @@ -1816,9 +1816,9 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 @@ -1873,11 +1873,11 @@ int main (int argc, char **argv) { // CHECK10: omp.inner.for.body: // CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 // CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV17:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP21]], i32* [[CONV17]], align 4 // CHECK10-NEXT: [[TMP22:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP23]], i32* [[CONV18]], align 4 // CHECK10-NEXT: [[TMP24:%.*]] = load i64, i64* [[M_CASTED]], align 8 @@ -1940,9 +1940,9 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp index 48bdd180c93d..23e40d279777 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp @@ -2633,7 +2633,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -2668,7 +2668,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -2714,7 +2714,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -2767,7 +2767,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -2855,7 +2855,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -2890,7 +2890,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -2936,7 +2936,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -2989,7 +2989,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -3081,11 +3081,11 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -3124,7 +3124,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -3141,7 +3141,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -3172,11 +3172,11 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 // CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 // CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -3255,7 +3255,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -3733,7 +3733,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -3766,7 +3766,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) @@ -3793,7 +3793,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -4173,7 +4173,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -4208,7 +4208,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -4254,7 +4254,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -4307,7 +4307,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -4395,7 +4395,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -4430,7 +4430,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -4476,7 +4476,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -4529,7 +4529,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -4621,11 +4621,11 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -4664,7 +4664,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -4681,7 +4681,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -4712,11 +4712,11 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 // CHECK10-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 // CHECK10-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -4795,7 +4795,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -5273,7 +5273,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -5306,7 +5306,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) @@ -5333,7 +5333,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp index 1eb2cdd9018e..643cdf2cd375 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp @@ -406,11 +406,11 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -509,11 +509,11 @@ int main() { // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -677,7 +677,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -689,9 +689,9 @@ int main() { // CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -892,7 +892,7 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -993,7 +993,7 @@ int main() { // CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -1140,7 +1140,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -1468,11 +1468,11 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -1571,11 +1571,11 @@ int main() { // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -1739,7 +1739,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -1751,9 +1751,9 @@ int main() { // CHECK2-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1954,7 +1954,7 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK2-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -2055,7 +2055,7 @@ int main() { // CHECK2-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -2202,7 +2202,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -4528,7 +4528,7 @@ int main() { // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK5-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -4537,7 +4537,7 @@ int main() { // CHECK5-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -4604,7 +4604,7 @@ int main() { // CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4 // CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -4613,7 +4613,7 @@ int main() { // CHECK5-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4 // CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK5-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4 // CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -4699,10 +4699,10 @@ int main() { // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: store i32 1, i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 1, i32* [[CONV]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK5-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK5-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK5-NEXT: store i32 2, i32* [[CONV2]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK5-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8 // CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -4882,7 +4882,7 @@ int main() { // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK6-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK6-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -4891,7 +4891,7 @@ int main() { // CHECK6-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK6-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -4958,7 +4958,7 @@ int main() { // CHECK6-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4 // CHECK6-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -4967,7 +4967,7 @@ int main() { // CHECK6-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4 // CHECK6-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK6-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4 // CHECK6-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -5053,10 +5053,10 @@ int main() { // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: store i32 1, i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 1, i32* [[CONV]], align 4 // CHECK6-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK6-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK6-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK6-NEXT: store i32 2, i32* [[CONV2]], align 4 // CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK6-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8 // CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -5116,11 +5116,11 @@ int main() { // CHECK13-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -5219,11 +5219,11 @@ int main() { // CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK13-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -5387,7 +5387,7 @@ int main() { // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -5399,9 +5399,9 @@ int main() { // CHECK13-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* // CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) // CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK13-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK13-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5458,7 +5458,7 @@ int main() { // CHECK13-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK13-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -5559,7 +5559,7 @@ int main() { // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -5706,7 +5706,7 @@ int main() { // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -5855,11 +5855,11 @@ int main() { // CHECK14-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK14-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK14-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -5958,11 +5958,11 @@ int main() { // CHECK14-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK14-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK14-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -6126,7 +6126,7 @@ int main() { // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -6138,9 +6138,9 @@ int main() { // CHECK14-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* // CHECK14-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) // CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK14-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK14-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6197,7 +6197,7 @@ int main() { // CHECK14-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK14-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK14-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -6298,7 +6298,7 @@ int main() { // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -6445,7 +6445,7 @@ int main() { // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -8015,7 +8015,7 @@ int main() { // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK17-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -8024,7 +8024,7 @@ int main() { // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -8091,7 +8091,7 @@ int main() { // CHECK17-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -8100,7 +8100,7 @@ int main() { // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4 // CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4 // CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -8186,10 +8186,10 @@ int main() { // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: store i32 1, i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 1, i32* [[CONV]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK17-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK17-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK17-NEXT: store i32 2, i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0 // CHECK17-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8 // CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp index 5c9b5be7444d..e17961e76ae3 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp @@ -763,7 +763,7 @@ int main() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -822,7 +822,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: @@ -1264,7 +1264,7 @@ int main() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -1323,7 +1323,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: @@ -2087,7 +2087,7 @@ int main() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -2146,7 +2146,7 @@ int main() { // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: @@ -2588,7 +2588,7 @@ int main() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -2647,7 +2647,7 @@ int main() { // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: @@ -3411,7 +3411,7 @@ int main() { // CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK5-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -3470,7 +3470,7 @@ int main() { // CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: @@ -3912,7 +3912,7 @@ int main() { // CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK5-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -3971,7 +3971,7 @@ int main() { // CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: @@ -4735,7 +4735,7 @@ int main() { // CHECK6-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK6-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK6-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK6-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -4794,7 +4794,7 @@ int main() { // CHECK6-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK6-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK6: omp_if.then: @@ -5236,7 +5236,7 @@ int main() { // CHECK6-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK6-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK6-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK6-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -5295,7 +5295,7 @@ int main() { // CHECK6-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK6-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK6: omp_if.then: @@ -6059,7 +6059,7 @@ int main() { // CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK9-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -6118,7 +6118,7 @@ int main() { // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: @@ -6560,7 +6560,7 @@ int main() { // CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK9-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -6619,7 +6619,7 @@ int main() { // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: @@ -7383,7 +7383,7 @@ int main() { // CHECK10-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK10-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK10-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -7442,7 +7442,7 @@ int main() { // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: @@ -7884,7 +7884,7 @@ int main() { // CHECK10-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK10-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK10-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -7943,7 +7943,7 @@ int main() { // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: @@ -8707,7 +8707,7 @@ int main() { // CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK13-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK13-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK13-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -8766,7 +8766,7 @@ int main() { // CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK13-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK13: omp_if.then: @@ -9208,7 +9208,7 @@ int main() { // CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK13-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK13-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK13-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -9267,7 +9267,7 @@ int main() { // CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK13-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK13: omp_if.then: @@ -10031,7 +10031,7 @@ int main() { // CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK14-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK14-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK14-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -10090,7 +10090,7 @@ int main() { // CHECK14-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK14-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK14-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK14-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK14: omp_if.then: @@ -10532,7 +10532,7 @@ int main() { // CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK14-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK14-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK14-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -10591,7 +10591,7 @@ int main() { // CHECK14-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK14-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK14-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK14-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK14: omp_if.then: diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp index 470a213961f6..38d9f4673b0b 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp @@ -213,11 +213,11 @@ int main() { // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to double* // CHECK1-NEXT: store double [[TMP1]], double* [[CONV4]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load float, float* [[CONV2]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* // CHECK1-NEXT: store float [[TMP5]], float* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 @@ -338,9 +338,9 @@ int main() { // CHECK1-NEXT: [[TMP27:%.*]] = load double, double* [[TMP26]], align 8 // CHECK1-NEXT: store volatile double [[TMP27]], double* [[TMP0]], align 8 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK1-NEXT: store i32 [[TMP28]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[TMP28]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP29:%.*]] = load float, float* [[SFVAR9]], align 4 -// CHECK1-NEXT: store float [[TMP29]], float* [[CONV2]], align 8 +// CHECK1-NEXT: store float [[TMP29]], float* [[CONV2]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -459,9 +459,9 @@ int main() { // CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[TMP21]], align 8 // CHECK1-NEXT: store volatile double [[TMP22]], double* [[TMP2]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[SVAR10]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = load float, float* [[SFVAR11]], align 4 -// CHECK1-NEXT: store float [[TMP24]], float* [[CONV2]], align 8 +// CHECK1-NEXT: store float [[TMP24]], float* [[CONV2]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -518,11 +518,11 @@ int main() { // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to double* // CHECK2-NEXT: store double [[TMP1]], double* [[CONV4]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load float, float* [[CONV2]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* // CHECK2-NEXT: store float [[TMP5]], float* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 @@ -643,9 +643,9 @@ int main() { // CHECK2-NEXT: [[TMP27:%.*]] = load double, double* [[TMP26]], align 8 // CHECK2-NEXT: store volatile double [[TMP27]], double* [[TMP0]], align 8 // CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK2-NEXT: store i32 [[TMP28]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[TMP28]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP29:%.*]] = load float, float* [[SFVAR9]], align 4 -// CHECK2-NEXT: store float [[TMP29]], float* [[CONV2]], align 8 +// CHECK2-NEXT: store float [[TMP29]], float* [[CONV2]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: ret void @@ -764,9 +764,9 @@ int main() { // CHECK2-NEXT: [[TMP22:%.*]] = load double, double* [[TMP21]], align 8 // CHECK2-NEXT: store volatile double [[TMP22]], double* [[TMP2]], align 8 // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[SVAR10]], align 4 -// CHECK2-NEXT: store i32 [[TMP23]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[TMP23]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP24:%.*]] = load float, float* [[SFVAR11]], align 4 -// CHECK2-NEXT: store float [[TMP24]], float* [[CONV2]], align 8 +// CHECK2-NEXT: store float [[TMP24]], float* [[CONV2]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: ret void @@ -1484,12 +1484,12 @@ int main() { // CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK5-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 // CHECK5-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP6]], i32* [[CONV3]], align 4 // CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 @@ -1610,7 +1610,7 @@ int main() { // CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: // CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK5-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 4 // CHECK5-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK5-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 8, i1 false) @@ -1635,7 +1635,7 @@ int main() { // CHECK5-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[TMP33]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) // CHECK5-NEXT: [[TMP36:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK5-NEXT: store i32 [[TMP36]], i32* [[CONV1]], align 8 +// CHECK5-NEXT: store i32 [[TMP36]], i32* [[CONV1]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] @@ -1776,7 +1776,7 @@ int main() { // CHECK5-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: // CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR5]], align 4 -// CHECK5-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 4 // CHECK5-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK5-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC6]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) @@ -1801,7 +1801,7 @@ int main() { // CHECK5-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP32]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) // CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[SVAR10]], align 4 -// CHECK5-NEXT: store i32 [[TMP35]], i32* [[CONV1]], align 8 +// CHECK5-NEXT: store i32 [[TMP35]], i32* [[CONV1]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR8]]) #[[ATTR4]] @@ -1992,7 +1992,7 @@ int main() { // CHECK5-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK5-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -2105,7 +2105,7 @@ int main() { // CHECK5-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: // CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK5-NEXT: store i32 [[TMP24]], i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[TMP24]], i32* [[CONV]], align 4 // CHECK5-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK5-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP25]], i8* align 4 [[TMP26]], i64 8, i1 false) @@ -2265,7 +2265,7 @@ int main() { // CHECK5-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: // CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR4]], align 4 -// CHECK5-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 4 // CHECK5-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK5-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC5]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) @@ -2509,12 +2509,12 @@ int main() { // CHECK6-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK6-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 // CHECK6-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP6]], i32* [[CONV3]], align 4 // CHECK6-NEXT: [[TMP7:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 @@ -2635,7 +2635,7 @@ int main() { // CHECK6-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK6: .omp.lastprivate.then: // CHECK6-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK6-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 4 // CHECK6-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK6-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 8, i1 false) @@ -2660,7 +2660,7 @@ int main() { // CHECK6-NEXT: [[TMP35:%.*]] = bitcast %struct.S* [[TMP33]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP34]], i8* align 4 [[TMP35]], i64 4, i1 false) // CHECK6-NEXT: [[TMP36:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK6-NEXT: store i32 [[TMP36]], i32* [[CONV1]], align 8 +// CHECK6-NEXT: store i32 [[TMP36]], i32* [[CONV1]], align 4 // CHECK6-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK6: .omp.lastprivate.done: // CHECK6-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] @@ -2801,7 +2801,7 @@ int main() { // CHECK6-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK6: .omp.lastprivate.then: // CHECK6-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR5]], align 4 -// CHECK6-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 4 // CHECK6-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK6-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC6]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) @@ -2826,7 +2826,7 @@ int main() { // CHECK6-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP32]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) // CHECK6-NEXT: [[TMP35:%.*]] = load i32, i32* [[SVAR10]], align 4 -// CHECK6-NEXT: store i32 [[TMP35]], i32* [[CONV1]], align 8 +// CHECK6-NEXT: store i32 [[TMP35]], i32* [[CONV1]], align 4 // CHECK6-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK6: .omp.lastprivate.done: // CHECK6-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR8]]) #[[ATTR4]] @@ -3017,7 +3017,7 @@ int main() { // CHECK6-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK6-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK6-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -3130,7 +3130,7 @@ int main() { // CHECK6-NEXT: br i1 [[TMP23]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK6: .omp.lastprivate.then: // CHECK6-NEXT: [[TMP24:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK6-NEXT: store i32 [[TMP24]], i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[TMP24]], i32* [[CONV]], align 4 // CHECK6-NEXT: [[TMP25:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK6-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP25]], i8* align 4 [[TMP26]], i64 8, i1 false) @@ -3290,7 +3290,7 @@ int main() { // CHECK6-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK6: .omp.lastprivate.then: // CHECK6-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR4]], align 4 -// CHECK6-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 4 // CHECK6-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK6-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC5]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp index 364dac905a3d..86235945dd3b 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp @@ -7797,7 +7797,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -7832,7 +7832,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -7878,7 +7878,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -7931,7 +7931,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -8019,7 +8019,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -8054,7 +8054,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -8100,7 +8100,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -8153,7 +8153,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -8245,11 +8245,11 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -8288,7 +8288,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -8305,7 +8305,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -8336,11 +8336,11 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 // CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 // CHECK13-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -8419,7 +8419,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -8507,7 +8507,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -8542,7 +8542,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -8588,7 +8588,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -8641,7 +8641,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -8729,11 +8729,11 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -8772,7 +8772,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -8818,11 +8818,11 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4 // CHECK13-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -8878,7 +8878,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -8901,7 +8901,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -9413,7 +9413,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -9473,7 +9473,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -9526,7 +9526,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -9752,7 +9752,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -9812,7 +9812,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -9865,7 +9865,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -10316,7 +10316,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -10351,7 +10351,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -10397,7 +10397,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -10450,7 +10450,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -10538,7 +10538,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -10573,7 +10573,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -10619,7 +10619,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -10672,7 +10672,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -10764,11 +10764,11 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK14-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -10807,7 +10807,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -10824,7 +10824,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -10855,11 +10855,11 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 // CHECK14-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 // CHECK14-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -10938,7 +10938,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -11026,7 +11026,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -11061,7 +11061,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -11107,7 +11107,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -11160,7 +11160,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -11248,11 +11248,11 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK14-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -11291,7 +11291,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -11337,11 +11337,11 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4 // CHECK14-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -11397,7 +11397,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -11420,7 +11420,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -11932,7 +11932,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -11992,7 +11992,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -12045,7 +12045,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -12271,7 +12271,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -12331,7 +12331,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -12384,7 +12384,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -17669,7 +17669,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -17704,7 +17704,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -17750,7 +17750,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -17803,7 +17803,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -17891,7 +17891,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -17926,7 +17926,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -17972,7 +17972,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -18025,7 +18025,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -18117,11 +18117,11 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -18160,7 +18160,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -18177,7 +18177,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -18208,11 +18208,11 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 // CHECK17-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 // CHECK17-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -18291,7 +18291,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -18379,7 +18379,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -18414,7 +18414,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -18460,7 +18460,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -18513,7 +18513,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -18601,11 +18601,11 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -18644,7 +18644,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -18690,11 +18690,11 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4 // CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4 // CHECK17-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -18750,7 +18750,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -18773,7 +18773,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -19285,7 +19285,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -19345,7 +19345,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -19398,7 +19398,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -19624,7 +19624,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -19684,7 +19684,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -19737,7 +19737,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -20188,7 +20188,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -20223,7 +20223,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20269,7 +20269,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -20322,7 +20322,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20410,7 +20410,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -20445,7 +20445,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20491,7 +20491,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -20544,7 +20544,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20636,11 +20636,11 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -20679,7 +20679,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20696,7 +20696,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK18-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -20727,11 +20727,11 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 // CHECK18-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 // CHECK18-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -20810,7 +20810,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20898,7 +20898,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -20933,7 +20933,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20979,7 +20979,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 // CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -21032,7 +21032,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -21120,11 +21120,11 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -21163,7 +21163,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -21209,11 +21209,11 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4 // CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4 // CHECK18-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -21269,7 +21269,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -21292,7 +21292,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -21804,7 +21804,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -21864,7 +21864,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -21917,7 +21917,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK18-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -22143,7 +22143,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -22203,7 +22203,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -22256,7 +22256,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp index ab2d571b7d60..bad61fd7a674 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp @@ -261,14 +261,14 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[I_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[I_CASTED]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP6]], i32* [[CONV7]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -304,7 +304,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -356,7 +356,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[I_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[I_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -383,7 +383,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: br label [[OMP_PRECOND_END]] @@ -424,7 +424,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -438,9 +438,9 @@ int target_teams_fun(int *g){ // CHECK1: omp.precond.then: // CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i64 8) ] -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTLINEAR_START]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTLINEAR_STEP]], align 4 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 @@ -512,7 +512,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK1-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] -// CHECK1-NEXT: store i32 [[ADD15]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD15]], i32* [[CONV]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 @@ -538,7 +538,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -573,7 +573,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -619,7 +619,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !11 @@ -684,7 +684,7 @@ int target_teams_fun(int *g){ // CHECK1-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -972,14 +972,14 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[I_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[I_CASTED]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP6]], i32* [[CONV7]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -1015,7 +1015,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -1067,7 +1067,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[I_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[I_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -1094,7 +1094,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK2: .omp.final.done: // CHECK2-NEXT: br label [[OMP_PRECOND_END]] @@ -1135,7 +1135,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -1149,9 +1149,9 @@ int target_teams_fun(int *g){ // CHECK2: omp.precond.then: // CHECK2-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 0 // CHECK2-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i64 8) ] -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTLINEAR_START]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTLINEAR_STEP]], align 4 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 @@ -1223,7 +1223,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK2-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] -// CHECK2-NEXT: store i32 [[ADD15]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD15]], i32* [[CONV]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK2: .omp.final.done: // CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 @@ -1249,7 +1249,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -1284,7 +1284,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -1330,7 +1330,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !11 @@ -1395,7 +1395,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3475,14 +3475,14 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[I_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV6]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[I_CASTED]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP6]], i32* [[CONV7]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3518,7 +3518,7 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3570,7 +3570,7 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: [[CONV8:%.*]] = bitcast i64* [[I_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[I_CASTED]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV9:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3597,7 +3597,7 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[ADD12]], i32* [[CONV]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: br label [[OMP_PRECOND_END]] @@ -3638,7 +3638,7 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3652,9 +3652,9 @@ int target_teams_fun(int *g){ // CHECK9: omp.precond.then: // CHECK9-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 0 // CHECK9-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i64 8) ] -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTLINEAR_START]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTLINEAR_STEP]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 @@ -3726,7 +3726,7 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK9-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] -// CHECK9-NEXT: store i32 [[ADD15]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[ADD15]], i32* [[CONV]], align 4 // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 @@ -3752,7 +3752,7 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3787,7 +3787,7 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -3833,7 +3833,7 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !12 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !12 // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4, !llvm.access.group !12 // CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !12 @@ -3898,7 +3898,7 @@ int target_teams_fun(int *g){ // CHECK9-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -4009,14 +4009,14 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[TMP1:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV5]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV5]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[I_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV6]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[I_CASTED]], align 8 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: [[CONV7:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP6]], i32* [[CONV7]], align 4 // CHECK10-NEXT: [[TMP7:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -4052,7 +4052,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -4104,7 +4104,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[CONV8:%.*]] = bitcast i64* [[I_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[I_CASTED]], align 8 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV9:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK10-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -4131,7 +4131,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV11]], 1 // CHECK10-NEXT: [[ADD12:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD12]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[ADD12]], i32* [[CONV]], align 4 // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: // CHECK10-NEXT: br label [[OMP_PRECOND_END]] @@ -4172,7 +4172,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[I_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -4186,9 +4186,9 @@ int target_teams_fun(int *g){ // CHECK10: omp.precond.then: // CHECK10-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 0 // CHECK10-NEXT: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRAYDECAY]], i64 8) ] -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTLINEAR_START]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTLINEAR_STEP]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 @@ -4260,7 +4260,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 // CHECK10-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 // CHECK10-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] -// CHECK10-NEXT: store i32 [[ADD15]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[ADD15]], i32* [[CONV]], align 4 // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: // CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 @@ -4286,7 +4286,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -4321,7 +4321,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 @@ -4367,7 +4367,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !12 // CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !12 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !12 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4, !llvm.access.group !12 // CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !12 @@ -4432,7 +4432,7 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp index b7b088652fdf..bdd4d313ede1 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp @@ -1461,11 +1461,11 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[M_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[M_CASTED]], align 8 @@ -1511,9 +1511,9 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 @@ -1568,11 +1568,11 @@ int main (int argc, char **argv) { // CHECK9: omp.inner.for.body: // CHECK9-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !5 // CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 // CHECK9-NEXT: [[CONV17:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP21]], i32* [[CONV17]], align 4, !llvm.access.group !5 // CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !5 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV3]], align 8, !llvm.access.group !5 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV3]], align 4, !llvm.access.group !5 // CHECK9-NEXT: [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV18]], align 4, !llvm.access.group !5 // CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[M_CASTED]], align 8, !llvm.access.group !5 @@ -1653,9 +1653,9 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 @@ -2162,11 +2162,11 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[M_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[M_CASTED]], align 8 @@ -2212,9 +2212,9 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 @@ -2269,11 +2269,11 @@ int main (int argc, char **argv) { // CHECK10: omp.inner.for.body: // CHECK10-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8, !llvm.access.group !5 // CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8, !llvm.access.group !5 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !5 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 // CHECK10-NEXT: [[CONV17:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP21]], i32* [[CONV17]], align 4, !llvm.access.group !5 // CHECK10-NEXT: [[TMP22:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !5 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV3]], align 8, !llvm.access.group !5 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV3]], align 4, !llvm.access.group !5 // CHECK10-NEXT: [[CONV18:%.*]] = bitcast i64* [[M_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP23]], i32* [[CONV18]], align 4, !llvm.access.group !5 // CHECK10-NEXT: [[TMP24:%.*]] = load i64, i64* [[M_CASTED]], align 8, !llvm.access.group !5 @@ -2354,9 +2354,9 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp index 73d2762a6c12..1fe8bf844358 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp @@ -3295,7 +3295,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3330,7 +3330,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -3376,7 +3376,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !9 @@ -3441,7 +3441,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -3541,7 +3541,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3576,7 +3576,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -3622,7 +3622,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !18 @@ -3687,7 +3687,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -3791,11 +3791,11 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -3834,7 +3834,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -3851,7 +3851,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -3882,11 +3882,11 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !24 @@ -3977,7 +3977,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -4495,7 +4495,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -4528,7 +4528,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) @@ -4555,7 +4555,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !42 @@ -4949,7 +4949,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -4984,7 +4984,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -5030,7 +5030,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !9 @@ -5095,7 +5095,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -5195,7 +5195,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -5230,7 +5230,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -5276,7 +5276,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !18 @@ -5341,7 +5341,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -5445,11 +5445,11 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -5488,7 +5488,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -5505,7 +5505,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -5536,11 +5536,11 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !24 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !24 @@ -5631,7 +5631,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -6149,7 +6149,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -6182,7 +6182,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) @@ -6209,7 +6209,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !42 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp index f1f72eee7f86..e900eb9a3323 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -404,11 +404,11 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -507,11 +507,11 @@ int main() { // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !5 @@ -682,7 +682,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -694,9 +694,9 @@ int main() { // CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !9 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -904,7 +904,7 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -1005,7 +1005,7 @@ int main() { // CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 @@ -1159,7 +1159,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -1494,11 +1494,11 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -1597,11 +1597,11 @@ int main() { // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !5 @@ -1772,7 +1772,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -1784,9 +1784,9 @@ int main() { // CHECK2-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !9 // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8, !llvm.access.group !9 +// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4, !llvm.access.group !9 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1994,7 +1994,7 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK2-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -2095,7 +2095,7 @@ int main() { // CHECK2-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 @@ -2249,7 +2249,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -4638,7 +4638,7 @@ int main() { // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK5-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -4647,7 +4647,7 @@ int main() { // CHECK5-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -4714,7 +4714,7 @@ int main() { // CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !4 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4, !llvm.access.group !4 // CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 @@ -4723,7 +4723,7 @@ int main() { // CHECK5-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4, !llvm.access.group !4 // CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8, !llvm.access.group !4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4, !llvm.access.group !4 // CHECK5-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4, !llvm.access.group !4 // CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !4 @@ -4816,10 +4816,10 @@ int main() { // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: store i32 1, i32* [[CONV]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: store i32 1, i32* [[CONV]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 // CHECK5-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !8 -// CHECK5-NEXT: store i32 2, i32* [[CONV2]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: store i32 2, i32* [[CONV2]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK5-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8, !llvm.access.group !8 // CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -5006,7 +5006,7 @@ int main() { // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK6-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK6-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -5015,7 +5015,7 @@ int main() { // CHECK6-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK6-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -5082,7 +5082,7 @@ int main() { // CHECK6-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !4 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4, !llvm.access.group !4 // CHECK6-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 @@ -5091,7 +5091,7 @@ int main() { // CHECK6-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4, !llvm.access.group !4 // CHECK6-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4, !llvm.access.group !4 // CHECK6-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4, !llvm.access.group !4 // CHECK6-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !4 @@ -5184,10 +5184,10 @@ int main() { // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 -// CHECK6-NEXT: store i32 1, i32* [[CONV]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: store i32 1, i32* [[CONV]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 // CHECK6-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !8 -// CHECK6-NEXT: store i32 2, i32* [[CONV2]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: store i32 2, i32* [[CONV2]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK6-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8, !llvm.access.group !8 // CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -6818,11 +6818,11 @@ int main() { // CHECK13-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK13-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -6921,11 +6921,11 @@ int main() { // CHECK13-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !6 // CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !6 @@ -7096,7 +7096,7 @@ int main() { // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -7108,9 +7108,9 @@ int main() { // CHECK13-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* // CHECK13-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !10 // CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !10 // CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK13-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8, !llvm.access.group !10 +// CHECK13-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4, !llvm.access.group !10 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7174,7 +7174,7 @@ int main() { // CHECK13-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK13-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK13-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -7275,7 +7275,7 @@ int main() { // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !15 @@ -7429,7 +7429,7 @@ int main() { // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -7585,11 +7585,11 @@ int main() { // CHECK14-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK14-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK14-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -7688,11 +7688,11 @@ int main() { // CHECK14-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !6 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !6 // CHECK14-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !6 @@ -7863,7 +7863,7 @@ int main() { // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -7875,9 +7875,9 @@ int main() { // CHECK14-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* // CHECK14-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !10 // CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !10 // CHECK14-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK14-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8, !llvm.access.group !10 +// CHECK14-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4, !llvm.access.group !10 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7941,7 +7941,7 @@ int main() { // CHECK14-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK14-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK14-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -8042,7 +8042,7 @@ int main() { // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !15 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !15 @@ -8196,7 +8196,7 @@ int main() { // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !18 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -9829,7 +9829,7 @@ int main() { // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK17-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -9838,7 +9838,7 @@ int main() { // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -9905,7 +9905,7 @@ int main() { // CHECK17-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK17-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4, !llvm.access.group !5 // CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !5 @@ -9914,7 +9914,7 @@ int main() { // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4, !llvm.access.group !5 // CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !5 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8, !llvm.access.group !5 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4, !llvm.access.group !5 // CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4, !llvm.access.group !5 // CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !5 @@ -10007,10 +10007,10 @@ int main() { // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: store i32 1, i32* [[CONV]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: store i32 1, i32* [[CONV]], align 4, !llvm.access.group !9 // CHECK17-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !9 // CHECK17-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: store i32 2, i32* [[CONV2]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: store i32 2, i32* [[CONV2]], align 4, !llvm.access.group !9 // CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 0 // CHECK17-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8, !llvm.access.group !9 // CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP]], i32 0, i32 1 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp index ca9521864102..a2844f97aa93 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp @@ -157,7 +157,7 @@ int main() { // CHECK1-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 @@ -214,7 +214,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9 @@ -298,7 +298,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: store i32 0, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK1-NEXT: store i32 0, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -860,7 +860,7 @@ int main() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -919,7 +919,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !36 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: @@ -1403,7 +1403,7 @@ int main() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -1462,7 +1462,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !54 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !54 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: @@ -1631,7 +1631,7 @@ int main() { // CHECK2-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 @@ -1688,7 +1688,7 @@ int main() { // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9 @@ -1772,7 +1772,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK2-NEXT: store i32 0, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK2-NEXT: store i32 0, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2334,7 +2334,7 @@ int main() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -2393,7 +2393,7 @@ int main() { // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !36 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !36 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: @@ -2877,7 +2877,7 @@ int main() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -2936,7 +2936,7 @@ int main() { // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !54 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !54 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: @@ -3105,7 +3105,7 @@ int main() { // CHECK3-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK3-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 @@ -3162,7 +3162,7 @@ int main() { // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !10, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !nontemporal !10, !llvm.access.group !9 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK3-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9 @@ -3246,7 +3246,7 @@ int main() { // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !10, !llvm.access.group !14 +// CHECK3-NEXT: store i32 0, i32* [[CONV]], align 4, !nontemporal !10, !llvm.access.group !14 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3808,7 +3808,7 @@ int main() { // CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK3-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -3859,7 +3859,7 @@ int main() { // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE6:%.*]] // CHECK3: omp_if.then: @@ -3874,13 +3874,13 @@ int main() { // CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !34 // CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !34 // CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !34 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !34 // CHECK3-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then5: @@ -3915,13 +3915,13 @@ int main() { // CHECK3-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 // CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK3-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 // CHECK3-NEXT: [[CONV12:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED11]] to i8* // CHECK3-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL10]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL13]], i8* [[CONV12]], align 1 // CHECK3-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED11]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP26]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK3: omp_if.then15: @@ -3989,7 +3989,7 @@ int main() { // CHECK3-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: @@ -4117,7 +4117,7 @@ int main() { // CHECK3-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: @@ -4585,7 +4585,7 @@ int main() { // CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK3-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -4644,7 +4644,7 @@ int main() { // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !54 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !54 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: @@ -4813,7 +4813,7 @@ int main() { // CHECK4-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 // CHECK4-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK4-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 @@ -4870,7 +4870,7 @@ int main() { // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !10, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !nontemporal !10, !llvm.access.group !9 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK4-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9 @@ -4954,7 +4954,7 @@ int main() { // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 -// CHECK4-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !10, !llvm.access.group !14 +// CHECK4-NEXT: store i32 0, i32* [[CONV]], align 4, !nontemporal !10, !llvm.access.group !14 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5516,7 +5516,7 @@ int main() { // CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK4-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK4-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK4-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -5567,7 +5567,7 @@ int main() { // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE6:%.*]] // CHECK4: omp_if.then: @@ -5582,13 +5582,13 @@ int main() { // CHECK4-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK4-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK4-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !34 +// CHECK4-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !34 // CHECK4-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK4-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 // CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !34 // CHECK4-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !34 -// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !34 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !34 // CHECK4-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then5: @@ -5623,13 +5623,13 @@ int main() { // CHECK4-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 // CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK4-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 // CHECK4-NEXT: [[CONV12:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED11]] to i8* // CHECK4-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL10]] to i8 // CHECK4-NEXT: store i8 [[FROMBOOL13]], i8* [[CONV12]], align 1 // CHECK4-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED11]], align 8 -// CHECK4-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP26]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK4: omp_if.then15: @@ -5697,7 +5697,7 @@ int main() { // CHECK4-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: @@ -5825,7 +5825,7 @@ int main() { // CHECK4-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: @@ -6293,7 +6293,7 @@ int main() { // CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK4-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK4-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK4-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -6352,7 +6352,7 @@ int main() { // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !54 +// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !54 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: @@ -7707,7 +7707,7 @@ int main() { // CHECK9-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 @@ -7764,7 +7764,7 @@ int main() { // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13 @@ -7848,7 +7848,7 @@ int main() { // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: store i32 0, i32* [[CONV]], align 8, !llvm.access.group !17 +// CHECK9-NEXT: store i32 0, i32* [[CONV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8410,7 +8410,7 @@ int main() { // CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK9-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -8469,7 +8469,7 @@ int main() { // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !40 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: @@ -8953,7 +8953,7 @@ int main() { // CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK9-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -9012,7 +9012,7 @@ int main() { // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !58 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !58 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: @@ -9181,7 +9181,7 @@ int main() { // CHECK10-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 @@ -9238,7 +9238,7 @@ int main() { // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13 @@ -9322,7 +9322,7 @@ int main() { // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK10-NEXT: store i32 0, i32* [[CONV]], align 8, !llvm.access.group !17 +// CHECK10-NEXT: store i32 0, i32* [[CONV]], align 4, !llvm.access.group !17 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9884,7 +9884,7 @@ int main() { // CHECK10-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK10-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK10-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -9943,7 +9943,7 @@ int main() { // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !40 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !40 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: @@ -10427,7 +10427,7 @@ int main() { // CHECK10-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK10-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK10-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -10486,7 +10486,7 @@ int main() { // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !58 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !58 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: @@ -10655,7 +10655,7 @@ int main() { // CHECK11-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 // CHECK11-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK11-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 @@ -10712,7 +10712,7 @@ int main() { // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !14, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !nontemporal !14, !llvm.access.group !13 // CHECK11-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK11-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13 // CHECK11-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13 @@ -10796,7 +10796,7 @@ int main() { // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !14, !llvm.access.group !18 +// CHECK11-NEXT: store i32 0, i32* [[CONV]], align 4, !nontemporal !14, !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -11358,7 +11358,7 @@ int main() { // CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK11-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -11409,7 +11409,7 @@ int main() { // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE6:%.*]] // CHECK11: omp_if.then: @@ -11424,13 +11424,13 @@ int main() { // CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !38 // CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 // CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !38 // CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !38 -// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !38 // CHECK11-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then5: @@ -11465,13 +11465,13 @@ int main() { // CHECK11-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 // CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK11-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 // CHECK11-NEXT: [[CONV12:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED11]] to i8* // CHECK11-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL10]] to i8 // CHECK11-NEXT: store i8 [[FROMBOOL13]], i8* [[CONV12]], align 1 // CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED11]], align 8 -// CHECK11-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP26]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK11: omp_if.then15: @@ -11539,7 +11539,7 @@ int main() { // CHECK11-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: @@ -11667,7 +11667,7 @@ int main() { // CHECK11-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: @@ -12135,7 +12135,7 @@ int main() { // CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK11-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -12194,7 +12194,7 @@ int main() { // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !58 +// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !58 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: @@ -12363,7 +12363,7 @@ int main() { // CHECK12-NEXT: [[ARG_CASTED:%.*]] = alloca i64, align 8 // CHECK12-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK12-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK12-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 @@ -12420,7 +12420,7 @@ int main() { // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !14, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !nontemporal !14, !llvm.access.group !13 // CHECK12-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* // CHECK12-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13 // CHECK12-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13 @@ -12504,7 +12504,7 @@ int main() { // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK12-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !14, !llvm.access.group !18 +// CHECK12-NEXT: store i32 0, i32* [[CONV]], align 4, !nontemporal !14, !llvm.access.group !18 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -13066,7 +13066,7 @@ int main() { // CHECK12-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK12-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK12-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK12-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -13117,7 +13117,7 @@ int main() { // CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE6:%.*]] // CHECK12: omp_if.then: @@ -13132,13 +13132,13 @@ int main() { // CHECK12-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK12-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK12-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38 +// CHECK12-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !38 // CHECK12-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK12-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK12-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 // CHECK12-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !38 // CHECK12-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !38 -// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38 +// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !38 // CHECK12-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then5: @@ -13173,13 +13173,13 @@ int main() { // CHECK12-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 // CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK12-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 // CHECK12-NEXT: [[CONV12:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED11]] to i8* // CHECK12-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL10]] to i8 // CHECK12-NEXT: store i8 [[FROMBOOL13]], i8* [[CONV12]], align 1 // CHECK12-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED11]], align 8 -// CHECK12-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP26]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK12: omp_if.then15: @@ -13247,7 +13247,7 @@ int main() { // CHECK12-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: @@ -13375,7 +13375,7 @@ int main() { // CHECK12-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: @@ -13843,7 +13843,7 @@ int main() { // CHECK12-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK12-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK12-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK12-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -13902,7 +13902,7 @@ int main() { // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !58 +// CHECK12-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !58 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp index 1887c26c0d8c..03e4bae0c9bb 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp @@ -213,11 +213,11 @@ int main() { // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to double* // CHECK1-NEXT: store double [[TMP1]], double* [[CONV4]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load float, float* [[CONV2]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* // CHECK1-NEXT: store float [[TMP5]], float* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 @@ -345,9 +345,9 @@ int main() { // CHECK1-NEXT: [[TMP29:%.*]] = load double, double* [[TMP28]], align 8 // CHECK1-NEXT: store volatile double [[TMP29]], double* [[TMP0]], align 8 // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK1-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP31:%.*]] = load float, float* [[SFVAR9]], align 4 -// CHECK1-NEXT: store float [[TMP31]], float* [[CONV2]], align 8 +// CHECK1-NEXT: store float [[TMP31]], float* [[CONV2]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -473,9 +473,9 @@ int main() { // CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[TMP23]], align 8 // CHECK1-NEXT: store volatile double [[TMP24]], double* [[TMP2]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[SVAR10]], align 4 -// CHECK1-NEXT: store i32 [[TMP25]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[TMP25]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = load float, float* [[SFVAR11]], align 4 -// CHECK1-NEXT: store float [[TMP26]], float* [[CONV2]], align 8 +// CHECK1-NEXT: store float [[TMP26]], float* [[CONV2]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -532,11 +532,11 @@ int main() { // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to double* // CHECK2-NEXT: store double [[TMP1]], double* [[CONV4]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load float, float* [[CONV2]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* // CHECK2-NEXT: store float [[TMP5]], float* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 @@ -664,9 +664,9 @@ int main() { // CHECK2-NEXT: [[TMP29:%.*]] = load double, double* [[TMP28]], align 8 // CHECK2-NEXT: store volatile double [[TMP29]], double* [[TMP0]], align 8 // CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK2-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP31:%.*]] = load float, float* [[SFVAR9]], align 4 -// CHECK2-NEXT: store float [[TMP31]], float* [[CONV2]], align 8 +// CHECK2-NEXT: store float [[TMP31]], float* [[CONV2]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: ret void @@ -792,9 +792,9 @@ int main() { // CHECK2-NEXT: [[TMP24:%.*]] = load double, double* [[TMP23]], align 8 // CHECK2-NEXT: store volatile double [[TMP24]], double* [[TMP2]], align 8 // CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[SVAR10]], align 4 -// CHECK2-NEXT: store i32 [[TMP25]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[TMP25]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP26:%.*]] = load float, float* [[SFVAR11]], align 4 -// CHECK2-NEXT: store float [[TMP26]], float* [[CONV2]], align 8 +// CHECK2-NEXT: store float [[TMP26]], float* [[CONV2]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: ret void @@ -1540,12 +1540,12 @@ int main() { // CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK5-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 // CHECK5-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP6]], i32* [[CONV3]], align 4 // CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 @@ -1673,7 +1673,7 @@ int main() { // CHECK5-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: // CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK5-NEXT: store i32 [[TMP28]], i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[TMP28]], i32* [[CONV]], align 4 // CHECK5-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK5-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i64 8, i1 false) @@ -1698,7 +1698,7 @@ int main() { // CHECK5-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[TMP35]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i64 4, i1 false) // CHECK5-NEXT: [[TMP38:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK5-NEXT: store i32 [[TMP38]], i32* [[CONV1]], align 8 +// CHECK5-NEXT: store i32 [[TMP38]], i32* [[CONV1]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] @@ -1846,7 +1846,7 @@ int main() { // CHECK5-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: // CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR5]], align 4 -// CHECK5-NEXT: store i32 [[TMP27]], i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[TMP27]], i32* [[CONV]], align 4 // CHECK5-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK5-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[VEC6]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 8, i1 false) @@ -1871,7 +1871,7 @@ int main() { // CHECK5-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[TMP34]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false) // CHECK5-NEXT: [[TMP37:%.*]] = load i32, i32* [[SVAR10]], align 4 -// CHECK5-NEXT: store i32 [[TMP37]], i32* [[CONV1]], align 8 +// CHECK5-NEXT: store i32 [[TMP37]], i32* [[CONV1]], align 4 // CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK5: .omp.lastprivate.done: // CHECK5-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR8]]) #[[ATTR4]] @@ -2062,7 +2062,7 @@ int main() { // CHECK5-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK5-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -2182,7 +2182,7 @@ int main() { // CHECK5-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: // CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK5-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 4 // CHECK5-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK5-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 8, i1 false) @@ -2349,7 +2349,7 @@ int main() { // CHECK5-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK5: .omp.lastprivate.then: // CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR4]], align 4 -// CHECK5-NEXT: store i32 [[TMP27]], i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[TMP27]], i32* [[CONV]], align 4 // CHECK5-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK5-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[VEC5]] to i8* // CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 8, i1 false) @@ -2593,12 +2593,12 @@ int main() { // CHECK6-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK6-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 // CHECK6-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP6]], i32* [[CONV3]], align 4 // CHECK6-NEXT: [[TMP7:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 @@ -2726,7 +2726,7 @@ int main() { // CHECK6-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK6: .omp.lastprivate.then: // CHECK6-NEXT: [[TMP28:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK6-NEXT: store i32 [[TMP28]], i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[TMP28]], i32* [[CONV]], align 4 // CHECK6-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK6-NEXT: [[TMP30:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP29]], i8* align 4 [[TMP30]], i64 8, i1 false) @@ -2751,7 +2751,7 @@ int main() { // CHECK6-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[TMP35]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP36]], i8* align 4 [[TMP37]], i64 4, i1 false) // CHECK6-NEXT: [[TMP38:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK6-NEXT: store i32 [[TMP38]], i32* [[CONV1]], align 8 +// CHECK6-NEXT: store i32 [[TMP38]], i32* [[CONV1]], align 4 // CHECK6-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK6: .omp.lastprivate.done: // CHECK6-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR4]] @@ -2899,7 +2899,7 @@ int main() { // CHECK6-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK6: .omp.lastprivate.then: // CHECK6-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR5]], align 4 -// CHECK6-NEXT: store i32 [[TMP27]], i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[TMP27]], i32* [[CONV]], align 4 // CHECK6-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK6-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[VEC6]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 8, i1 false) @@ -2924,7 +2924,7 @@ int main() { // CHECK6-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[TMP34]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false) // CHECK6-NEXT: [[TMP37:%.*]] = load i32, i32* [[SVAR10]], align 4 -// CHECK6-NEXT: store i32 [[TMP37]], i32* [[CONV1]], align 8 +// CHECK6-NEXT: store i32 [[TMP37]], i32* [[CONV1]], align 4 // CHECK6-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK6: .omp.lastprivate.done: // CHECK6-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR8]]) #[[ATTR4]] @@ -3115,7 +3115,7 @@ int main() { // CHECK6-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK6-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK6-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -3235,7 +3235,7 @@ int main() { // CHECK6-NEXT: br i1 [[TMP25]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK6: .omp.lastprivate.then: // CHECK6-NEXT: [[TMP26:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK6-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[TMP26]], i32* [[CONV]], align 4 // CHECK6-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK6-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP27]], i8* align 4 [[TMP28]], i64 8, i1 false) @@ -3402,7 +3402,7 @@ int main() { // CHECK6-NEXT: br i1 [[TMP26]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK6: .omp.lastprivate.then: // CHECK6-NEXT: [[TMP27:%.*]] = load i32, i32* [[T_VAR4]], align 4 -// CHECK6-NEXT: store i32 [[TMP27]], i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[TMP27]], i32* [[CONV]], align 4 // CHECK6-NEXT: [[TMP28:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK6-NEXT: [[TMP29:%.*]] = bitcast [2 x i32]* [[VEC5]] to i8* // CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 8, i1 false) diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp index da7a6b1712ce..c28b90653caa 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp @@ -9127,7 +9127,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -9162,7 +9162,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -9208,7 +9208,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !13 @@ -9273,7 +9273,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -9373,7 +9373,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -9408,7 +9408,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -9454,7 +9454,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !22 @@ -9519,7 +9519,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -9623,11 +9623,11 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -9666,7 +9666,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -9683,7 +9683,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -9714,11 +9714,11 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 @@ -9809,7 +9809,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -9909,7 +9909,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -9944,7 +9944,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -9990,7 +9990,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !34 // CHECK13-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !34 // CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !34 @@ -10055,7 +10055,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -10155,11 +10155,11 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK13-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -10198,7 +10198,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -10244,11 +10244,11 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !40 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 @@ -10316,7 +10316,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK13-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -10339,7 +10339,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -10891,7 +10891,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -10951,7 +10951,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !58 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 // CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 @@ -11011,7 +11011,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -11258,7 +11258,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK13-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -11318,7 +11318,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !70 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 // CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 @@ -11378,7 +11378,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -11836,7 +11836,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -11871,7 +11871,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -11917,7 +11917,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !13 @@ -11982,7 +11982,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -12082,7 +12082,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -12117,7 +12117,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -12163,7 +12163,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !22 @@ -12228,7 +12228,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -12332,11 +12332,11 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK14-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -12375,7 +12375,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -12392,7 +12392,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -12423,11 +12423,11 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !28 -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 @@ -12518,7 +12518,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -12618,7 +12618,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -12653,7 +12653,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -12699,7 +12699,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !34 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !34 // CHECK14-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !34 // CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !34 @@ -12764,7 +12764,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -12864,11 +12864,11 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK14-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -12907,7 +12907,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -12953,11 +12953,11 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !40 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 @@ -13025,7 +13025,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -13048,7 +13048,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -13600,7 +13600,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -13660,7 +13660,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !58 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 // CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 @@ -13720,7 +13720,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -13967,7 +13967,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK14-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -14027,7 +14027,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !70 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 // CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 @@ -14087,7 +14087,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -19759,7 +19759,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -19794,7 +19794,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -19840,7 +19840,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !13 // CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !13 @@ -19905,7 +19905,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20005,7 +20005,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -20040,7 +20040,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20086,7 +20086,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !22 // CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !22 @@ -20151,7 +20151,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20255,11 +20255,11 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -20298,7 +20298,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20315,7 +20315,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -20346,11 +20346,11 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !28 // CHECK17-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4, !llvm.access.group !28 // CHECK17-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !28 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !28 // CHECK17-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !28 // CHECK17-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 @@ -20441,7 +20441,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20541,7 +20541,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -20576,7 +20576,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20622,7 +20622,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !34 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !34 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !34 // CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !34 @@ -20687,7 +20687,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20787,11 +20787,11 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -20830,7 +20830,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20876,11 +20876,11 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !40 // CHECK17-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4, !llvm.access.group !40 // CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !40 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !40 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !40 // CHECK17-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4, !llvm.access.group !40 // CHECK17-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 @@ -20948,7 +20948,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -20971,7 +20971,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -21523,7 +21523,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -21583,7 +21583,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !58 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 // CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 @@ -21643,7 +21643,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -21890,7 +21890,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -21950,7 +21950,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !70 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 // CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 @@ -22010,7 +22010,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -22468,7 +22468,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -22503,7 +22503,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -22549,7 +22549,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK18-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !13 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !13 // CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !13 @@ -22614,7 +22614,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -22714,7 +22714,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -22749,7 +22749,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -22795,7 +22795,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK18-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !22 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !22 // CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !22 @@ -22860,7 +22860,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -22964,11 +22964,11 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -23007,7 +23007,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -23024,7 +23024,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK18-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -23055,11 +23055,11 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK18-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !28 // CHECK18-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4, !llvm.access.group !28 // CHECK18-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !28 -// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !28 +// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !28 // CHECK18-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !28 // CHECK18-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 @@ -23150,7 +23150,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -23250,7 +23250,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -23285,7 +23285,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -23331,7 +23331,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK18-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !34 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !34 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !34 // CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !34 @@ -23396,7 +23396,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -23496,11 +23496,11 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -23539,7 +23539,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -23585,11 +23585,11 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK18-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !40 // CHECK18-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4, !llvm.access.group !40 // CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !40 -// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !40 +// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !40 // CHECK18-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4, !llvm.access.group !40 // CHECK18-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 @@ -23657,7 +23657,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -23680,7 +23680,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[CONV6]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -24232,7 +24232,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -24292,7 +24292,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK18-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !58 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 // CHECK18-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 @@ -24352,7 +24352,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK18-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -24599,7 +24599,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV1]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -24659,7 +24659,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK18-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !70 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 // CHECK18-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 @@ -24719,7 +24719,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 diff --git a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp index 04016eb544e0..5aa6962c06de 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp @@ -629,10 +629,10 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -824,7 +824,7 @@ int bar(int n){ // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -899,7 +899,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 // CHECK1-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK1: .omp.final.then: -// CHECK1-NEXT: store i32 10, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 10, i32* [[CONV]], align 4 // CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK1: .omp.final.done: // CHECK1-NEXT: ret void @@ -912,7 +912,7 @@ int bar(int n){ // CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -968,11 +968,11 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1006,11 +1006,11 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1069,14 +1069,14 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK1-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1130,7 +1130,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -1212,9 +1212,9 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double @@ -1625,7 +1625,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -1693,7 +1693,7 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -1751,19 +1751,19 @@ int bar(int n){ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK1-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -1804,9 +1804,9 @@ int bar(int n){ // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -1859,19 +1859,19 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK1-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !41 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !41 -// CHECK1-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2, !llvm.access.group !41 // CHECK1-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK1-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !41 -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1, !llvm.access.group !41 // CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1, !llvm.access.group !41 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -1925,11 +1925,11 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -1991,14 +1991,14 @@ int bar(int n){ // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !44 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !44 -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !44 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !44 // CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK1-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !44 +// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !44 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -2368,10 +2368,10 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2563,7 +2563,7 @@ int bar(int n){ // CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -2638,7 +2638,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 // CHECK2-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK2: .omp.final.then: -// CHECK2-NEXT: store i32 10, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 10, i32* [[CONV]], align 4 // CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK2: .omp.final.done: // CHECK2-NEXT: ret void @@ -2651,7 +2651,7 @@ int bar(int n){ // CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2707,11 +2707,11 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 -// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !29 +// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !29 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2745,11 +2745,11 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -2808,14 +2808,14 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !32 -// CHECK2-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK2-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !32 +// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !32 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2869,7 +2869,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK2-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -2951,9 +2951,9 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double @@ -3364,7 +3364,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3432,7 +3432,7 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK2-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -3490,19 +3490,19 @@ int bar(int n){ // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK2-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -3543,9 +3543,9 @@ int bar(int n){ // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -3598,19 +3598,19 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK2-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !41 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !41 -// CHECK2-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !41 +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 4, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2, !llvm.access.group !41 // CHECK2-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK2-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK2-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !41 -// CHECK2-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !41 +// CHECK2-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1, !llvm.access.group !41 // CHECK2-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK2-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK2-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !41 +// CHECK2-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1, !llvm.access.group !41 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -3664,11 +3664,11 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -3730,14 +3730,14 @@ int bar(int n){ // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !44 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !44 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !44 -// CHECK2-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !44 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !44 +// CHECK2-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !44 // CHECK2-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !44 +// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !44 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -4103,7 +4103,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4378,7 +4378,7 @@ int bar(int n){ // CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4434,11 +4434,11 @@ int bar(int n){ // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4474,7 +4474,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -4535,11 +4535,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5208,11 +5208,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK3-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -5309,16 +5309,16 @@ int bar(int n){ // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !42 // CHECK3-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK3-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1, !llvm.access.group !42 // CHECK3-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK3-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK3-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1, !llvm.access.group !42 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -5374,7 +5374,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -5438,11 +5438,11 @@ int bar(int n){ // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !45 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !45 -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !45 +// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !45 // CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !45 +// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !45 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -5808,7 +5808,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK4-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -6083,7 +6083,7 @@ int bar(int n){ // CHECK4-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -6139,11 +6139,11 @@ int bar(int n){ // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 -// CHECK4-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK4-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK4-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK4-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK4-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !30 +// CHECK4-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6179,7 +6179,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -6240,11 +6240,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK4-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 -// CHECK4-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK4-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK4-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK4-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !33 +// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !33 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6913,11 +6913,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK4-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -7014,16 +7014,16 @@ int bar(int n){ // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK4-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !42 -// CHECK4-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !42 // CHECK4-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK4-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK4-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK4-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !42 -// CHECK4-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !42 +// CHECK4-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1, !llvm.access.group !42 // CHECK4-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK4-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK4-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1, !llvm.access.group !42 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -7079,7 +7079,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -7143,11 +7143,11 @@ int bar(int n){ // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !45 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK4-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !45 -// CHECK4-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !45 +// CHECK4-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !45 // CHECK4-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK4-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !45 +// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !45 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -7517,10 +7517,10 @@ int bar(int n){ // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK5-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK5-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK5-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7712,7 +7712,7 @@ int bar(int n){ // CHECK5-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -7787,7 +7787,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 // CHECK5-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK5: .omp.final.then: -// CHECK5-NEXT: store i32 10, i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 10, i32* [[CONV]], align 4 // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void @@ -7800,7 +7800,7 @@ int bar(int n){ // CHECK5-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK5-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK5-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7856,11 +7856,11 @@ int bar(int n){ // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 -// CHECK5-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK5-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK5-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK5-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !30 +// CHECK5-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7894,11 +7894,11 @@ int bar(int n){ // CHECK5-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK5-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -7957,14 +7957,14 @@ int bar(int n){ // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !33 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !33 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !33 -// CHECK5-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !33 +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !33 // CHECK5-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK5-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK5-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !33 +// CHECK5-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !33 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8018,7 +8018,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK5-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK5-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -8100,9 +8100,9 @@ int bar(int n){ // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !36 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !36 +// CHECK5-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK5-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double @@ -8539,11 +8539,11 @@ int bar(int n){ // CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK5-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP4]], i32* [[CONV4]], align 4 // CHECK5-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -8605,7 +8605,7 @@ int bar(int n){ // CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: @@ -8620,7 +8620,7 @@ int bar(int n){ // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !39 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !39 // CHECK5-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK5-NEXT: [[ADD6:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -8656,7 +8656,7 @@ int bar(int n){ // CHECK5-NEXT: [[MUL14:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK5-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] // CHECK5-NEXT: store i32 [[ADD15]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV16:%.*]] = sitofp i32 [[TMP20]] to double // CHECK5-NEXT: [[ADD17:%.*]] = fadd double [[CONV16]], 1.500000e+00 // CHECK5-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -8716,19 +8716,19 @@ int bar(int n){ // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV4:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK5-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK5-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK5-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK5-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK5-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK5-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -8769,9 +8769,9 @@ int bar(int n){ // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -8824,19 +8824,19 @@ int bar(int n){ // CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK5-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK5-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !44 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !44 // CHECK5-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK5-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !44 -// CHECK5-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 4, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2, !llvm.access.group !44 // CHECK5-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK5-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK5-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK5-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !44 -// CHECK5-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1, !llvm.access.group !44 // CHECK5-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK5-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK5-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK5-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1, !llvm.access.group !44 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK5-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -8890,11 +8890,11 @@ int bar(int n){ // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK5-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK5-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK5-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -8956,14 +8956,14 @@ int bar(int n){ // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !47 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !47 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !47 -// CHECK5-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !47 +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !47 +// CHECK5-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !47 // CHECK5-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK5-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK5-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !47 +// CHECK5-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !47 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !47 // CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -9333,10 +9333,10 @@ int bar(int n){ // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK6-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK6-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK6-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -9528,7 +9528,7 @@ int bar(int n){ // CHECK6-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK6-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -9603,7 +9603,7 @@ int bar(int n){ // CHECK6-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 // CHECK6-NEXT: br i1 [[TMP11]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] // CHECK6: .omp.final.then: -// CHECK6-NEXT: store i32 10, i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 10, i32* [[CONV]], align 4 // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK6: .omp.final.done: // CHECK6-NEXT: ret void @@ -9616,7 +9616,7 @@ int bar(int n){ // CHECK6-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK6-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK6-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK6-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -9672,11 +9672,11 @@ int bar(int n){ // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 -// CHECK6-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !30 +// CHECK6-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK6-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK6-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK6-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !30 +// CHECK6-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !30 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9710,11 +9710,11 @@ int bar(int n){ // CHECK6-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK6-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK6-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK6-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK6-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -9773,14 +9773,14 @@ int bar(int n){ // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !33 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !33 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !33 -// CHECK6-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !33 +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !33 +// CHECK6-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !33 // CHECK6-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK6-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK6-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK6-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !33 +// CHECK6-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !33 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9834,7 +9834,7 @@ int bar(int n){ // CHECK6-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK6-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK6-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK6-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -9916,9 +9916,9 @@ int bar(int n){ // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !36 -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !36 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !36 // CHECK6-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK6-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !36 +// CHECK6-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 4, !llvm.access.group !36 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK6-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK6-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double @@ -10355,11 +10355,11 @@ int bar(int n){ // CHECK6-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK6-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP4]], i32* [[CONV4]], align 4 // CHECK6-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK6-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK6-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK6-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK6-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -10421,7 +10421,7 @@ int bar(int n){ // CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK6-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK6-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK6: omp_if.then: @@ -10436,7 +10436,7 @@ int bar(int n){ // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !39 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !39 // CHECK6-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK6-NEXT: [[ADD6:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -10472,7 +10472,7 @@ int bar(int n){ // CHECK6-NEXT: [[MUL14:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK6-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] // CHECK6-NEXT: store i32 [[ADD15]], i32* [[I]], align 4 -// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV16:%.*]] = sitofp i32 [[TMP20]] to double // CHECK6-NEXT: [[ADD17:%.*]] = fadd double [[CONV16]], 1.500000e+00 // CHECK6-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -10532,19 +10532,19 @@ int bar(int n){ // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK6-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV4:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK6-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK6-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK6-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK6-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK6-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK6-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK6-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK6-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK6-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK6-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -10585,9 +10585,9 @@ int bar(int n){ // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK6-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -10640,19 +10640,19 @@ int bar(int n){ // CHECK6-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK6-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK6-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !44 -// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !44 +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !44 // CHECK6-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK6-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !44 -// CHECK6-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !44 +// CHECK6-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 4, !llvm.access.group !44 +// CHECK6-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2, !llvm.access.group !44 // CHECK6-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK6-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK6-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK6-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !44 -// CHECK6-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !44 +// CHECK6-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2, !llvm.access.group !44 +// CHECK6-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1, !llvm.access.group !44 // CHECK6-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK6-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK6-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK6-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !44 +// CHECK6-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1, !llvm.access.group !44 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK6-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK6-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -10706,11 +10706,11 @@ int bar(int n){ // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK6-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK6-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK6-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK6-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK6-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK6-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -10772,14 +10772,14 @@ int bar(int n){ // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !47 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !47 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !47 -// CHECK6-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !47 +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !47 +// CHECK6-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !47 // CHECK6-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK6-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK6-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK6-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !47 +// CHECK6-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !47 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !47 // CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -11145,7 +11145,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK7-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK7-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK7-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK7-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11420,7 +11420,7 @@ int bar(int n){ // CHECK7-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK7-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK7-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK7-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK7-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11476,11 +11476,11 @@ int bar(int n){ // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 -// CHECK7-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !31 +// CHECK7-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !31 // CHECK7-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK7-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK7-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !31 +// CHECK7-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !31 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -11516,7 +11516,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK7-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK7-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -11577,11 +11577,11 @@ int bar(int n){ // CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !34 // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK7-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !34 -// CHECK7-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !34 +// CHECK7-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !34 // CHECK7-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK7-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK7-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !34 +// CHECK7-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !34 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12151,7 +12151,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 // CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK7-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK7-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -12212,7 +12212,7 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK7-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK7-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK7: omp_if.then: @@ -12327,11 +12327,11 @@ int bar(int n){ // CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK7-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK7-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK7-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK7-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -12428,16 +12428,16 @@ int bar(int n){ // CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !45 // CHECK7-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK7-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !45 -// CHECK7-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !45 // CHECK7-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK7-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK7-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK7-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !45 -// CHECK7-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1, !llvm.access.group !45 // CHECK7-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK7-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK7-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK7-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1, !llvm.access.group !45 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK7-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -12493,7 +12493,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK7-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK7-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -12557,11 +12557,11 @@ int bar(int n){ // CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !48 // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK7-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !48 -// CHECK7-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !48 +// CHECK7-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !48 // CHECK7-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK7-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK7-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !48 +// CHECK7-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !48 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !48 // CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -12927,7 +12927,7 @@ int bar(int n){ // CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK8-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK8-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK8-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK8-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -13202,7 +13202,7 @@ int bar(int n){ // CHECK8-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK8-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK8-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK8-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK8-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK8-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -13258,11 +13258,11 @@ int bar(int n){ // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 -// CHECK8-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !31 +// CHECK8-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !31 // CHECK8-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK8-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK8-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK8-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !31 +// CHECK8-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !31 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -13298,7 +13298,7 @@ int bar(int n){ // CHECK8-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK8-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK8-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK8-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK8-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -13359,11 +13359,11 @@ int bar(int n){ // CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !34 // CHECK8-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK8-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !34 -// CHECK8-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !34 +// CHECK8-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !34 // CHECK8-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK8-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK8-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK8-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !34 +// CHECK8-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !34 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -13933,7 +13933,7 @@ int bar(int n){ // CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK8-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 // CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK8-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK8-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK8-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK8-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -13994,7 +13994,7 @@ int bar(int n){ // CHECK8-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK8-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK8-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK8-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK8: omp_if.then: @@ -14109,11 +14109,11 @@ int bar(int n){ // CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK8-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK8-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK8-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK8-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK8-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK8-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK8-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -14210,16 +14210,16 @@ int bar(int n){ // CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !45 // CHECK8-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK8-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !45 -// CHECK8-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !45 +// CHECK8-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !45 // CHECK8-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK8-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK8-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK8-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !45 -// CHECK8-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !45 +// CHECK8-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !45 +// CHECK8-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1, !llvm.access.group !45 // CHECK8-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK8-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK8-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK8-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !45 +// CHECK8-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1, !llvm.access.group !45 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK8-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -14275,7 +14275,7 @@ int bar(int n){ // CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK8-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK8-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK8-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK8-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -14339,11 +14339,11 @@ int bar(int n){ // CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !48 // CHECK8-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK8-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !48 -// CHECK8-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !48 +// CHECK8-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !48 // CHECK8-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK8-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK8-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK8-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !48 +// CHECK8-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !48 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !48 // CHECK8-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -18655,10 +18655,10 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -18743,7 +18743,7 @@ int bar(int n){ // CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -18799,11 +18799,11 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK17-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !18 +// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -18837,11 +18837,11 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -18900,14 +18900,14 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !21 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !21 -// CHECK17-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !21 +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !21 // CHECK17-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK17-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !21 +// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !21 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -18961,7 +18961,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK17-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -19043,9 +19043,9 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !24 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK17-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !24 +// CHECK17-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double @@ -19123,19 +19123,19 @@ int bar(int n){ // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK17-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -19176,9 +19176,9 @@ int bar(int n){ // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -19231,19 +19231,19 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK17-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK17-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !27 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !27 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !27 // CHECK17-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK17-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !27 -// CHECK17-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !27 +// CHECK17-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 4, !llvm.access.group !27 +// CHECK17-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2, !llvm.access.group !27 // CHECK17-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK17-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK17-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK17-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !27 -// CHECK17-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !27 +// CHECK17-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2, !llvm.access.group !27 +// CHECK17-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1, !llvm.access.group !27 // CHECK17-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK17-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK17-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK17-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !27 +// CHECK17-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1, !llvm.access.group !27 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK17-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -19302,7 +19302,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -19370,7 +19370,7 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !30 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !30 // CHECK17-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK17-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -19420,11 +19420,11 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -19486,14 +19486,14 @@ int bar(int n){ // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !33 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !33 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !33 -// CHECK17-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !33 +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !33 +// CHECK17-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !33 // CHECK17-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK17-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !33 +// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !33 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -19534,10 +19534,10 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -19622,7 +19622,7 @@ int bar(int n){ // CHECK18-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -19678,11 +19678,11 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK18-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !18 +// CHECK18-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK18-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK18-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !18 +// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -19716,11 +19716,11 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -19779,14 +19779,14 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !21 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !21 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !21 -// CHECK18-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !21 +// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !21 +// CHECK18-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !21 // CHECK18-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK18-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK18-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !21 +// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !21 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -19840,7 +19840,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK18-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK18-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK18-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -19922,9 +19922,9 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !24 +// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK18-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !24 +// CHECK18-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double @@ -20002,19 +20002,19 @@ int bar(int n){ // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK18-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK18-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK18-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK18-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK18-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK18-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK18-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -20055,9 +20055,9 @@ int bar(int n){ // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -20110,19 +20110,19 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK18-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK18-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !27 -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !27 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !27 // CHECK18-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK18-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !27 -// CHECK18-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !27 +// CHECK18-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 4, !llvm.access.group !27 +// CHECK18-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2, !llvm.access.group !27 // CHECK18-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK18-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK18-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK18-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !27 -// CHECK18-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !27 +// CHECK18-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2, !llvm.access.group !27 +// CHECK18-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1, !llvm.access.group !27 // CHECK18-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK18-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK18-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK18-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !27 +// CHECK18-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1, !llvm.access.group !27 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK18-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -20181,7 +20181,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK18-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 -// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -20249,7 +20249,7 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !30 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !30 // CHECK18-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK18-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK18-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -20299,11 +20299,11 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -20365,14 +20365,14 @@ int bar(int n){ // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !33 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !33 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !33 -// CHECK18-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !33 +// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !33 +// CHECK18-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !33 // CHECK18-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK18-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK18-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !33 +// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !33 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -20414,7 +20414,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK19-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -20499,7 +20499,7 @@ int bar(int n){ // CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -20555,11 +20555,11 @@ int bar(int n){ // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !19 // CHECK19-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK19-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK19-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !19 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -20595,7 +20595,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -20656,11 +20656,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !22 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !22 // CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !22 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -20877,11 +20877,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK19-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK19-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -20978,16 +20978,16 @@ int bar(int n){ // CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !28 // CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK19-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !28 -// CHECK19-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !28 +// CHECK19-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !28 // CHECK19-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK19-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK19-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK19-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !28 -// CHECK19-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !28 +// CHECK19-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !28 +// CHECK19-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1, !llvm.access.group !28 // CHECK19-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK19-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK19-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK19-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !28 +// CHECK19-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1, !llvm.access.group !28 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK19-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -21163,7 +21163,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -21227,11 +21227,11 @@ int bar(int n){ // CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !34 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK19-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !34 -// CHECK19-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !34 +// CHECK19-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !34 // CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !34 +// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !34 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !34 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -21273,7 +21273,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK20-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -21358,7 +21358,7 @@ int bar(int n){ // CHECK20-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK20-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -21414,11 +21414,11 @@ int bar(int n){ // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 -// CHECK20-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !19 // CHECK20-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK20-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK20-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK20-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !19 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -21454,7 +21454,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -21515,11 +21515,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !22 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK20-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !22 -// CHECK20-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !22 // CHECK20-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK20-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK20-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !22 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -21736,11 +21736,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK20-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK20-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK20-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK20-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK20-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -21837,16 +21837,16 @@ int bar(int n){ // CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !28 // CHECK20-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK20-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !28 -// CHECK20-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !28 +// CHECK20-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !28 // CHECK20-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK20-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK20-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK20-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !28 -// CHECK20-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !28 +// CHECK20-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !28 +// CHECK20-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1, !llvm.access.group !28 // CHECK20-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK20-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK20-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK20-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !28 +// CHECK20-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1, !llvm.access.group !28 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK20-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK20-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -22022,7 +22022,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -22086,11 +22086,11 @@ int bar(int n){ // CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !34 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK20-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !34 -// CHECK20-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !34 +// CHECK20-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !34 // CHECK20-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK20-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK20-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !34 +// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !34 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !34 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -22131,10 +22131,10 @@ int bar(int n){ // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK21-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK21-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK21-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK21-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK21-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -22219,7 +22219,7 @@ int bar(int n){ // CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK21-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK21-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK21-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -22275,11 +22275,11 @@ int bar(int n){ // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK21-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !18 +// CHECK21-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK21-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK21-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK21-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !18 +// CHECK21-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -22313,11 +22313,11 @@ int bar(int n){ // CHECK21-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK21-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK21-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK21-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK21-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK21-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -22376,14 +22376,14 @@ int bar(int n){ // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !21 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !21 // CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK21-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !21 -// CHECK21-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !21 +// CHECK21-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !21 +// CHECK21-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !21 // CHECK21-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK21-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK21-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !21 +// CHECK21-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !21 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -22437,7 +22437,7 @@ int bar(int n){ // CHECK21-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK21-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK21-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK21-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -22519,9 +22519,9 @@ int bar(int n){ // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !24 +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK21-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK21-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !24 +// CHECK21-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK21-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK21-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double @@ -22599,19 +22599,19 @@ int bar(int n){ // CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK21-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV4:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK21-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK21-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK21-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK21-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK21-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK21-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK21-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK21-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK21-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -22652,9 +22652,9 @@ int bar(int n){ // CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK21-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK21-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK21-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK21-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -22707,19 +22707,19 @@ int bar(int n){ // CHECK21-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK21-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK21-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !27 -// CHECK21-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !27 +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !27 // CHECK21-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK21-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !27 -// CHECK21-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !27 +// CHECK21-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2, !llvm.access.group !27 // CHECK21-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK21-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK21-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK21-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !27 -// CHECK21-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !27 +// CHECK21-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2, !llvm.access.group !27 +// CHECK21-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1, !llvm.access.group !27 // CHECK21-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK21-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK21-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK21-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !27 +// CHECK21-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1, !llvm.access.group !27 // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK21-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK21-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -22782,11 +22782,11 @@ int bar(int n){ // CHECK21-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK21-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP4]], i32* [[CONV4]], align 4 // CHECK21-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK21-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK21-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -22848,7 +22848,7 @@ int bar(int n){ // CHECK21-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK21-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: @@ -22863,7 +22863,7 @@ int bar(int n){ // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !30 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !30 // CHECK21-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK21-NEXT: [[ADD6:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -22899,7 +22899,7 @@ int bar(int n){ // CHECK21-NEXT: [[MUL14:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK21-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] // CHECK21-NEXT: store i32 [[ADD15]], i32* [[I]], align 4 -// CHECK21-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV16:%.*]] = sitofp i32 [[TMP20]] to double // CHECK21-NEXT: [[ADD17:%.*]] = fadd double [[CONV16]], 1.500000e+00 // CHECK21-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -22951,11 +22951,11 @@ int bar(int n){ // CHECK21-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK21-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK21-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK21-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK21-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK21-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK21-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK21-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK21-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -23017,14 +23017,14 @@ int bar(int n){ // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK21-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !35 -// CHECK21-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !35 +// CHECK21-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !35 +// CHECK21-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !35 // CHECK21-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK21-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK21-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !35 +// CHECK21-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !35 // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK21-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -23065,10 +23065,10 @@ int bar(int n){ // CHECK22-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK22-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK22-NEXT: [[CONV4:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 8 -// CHECK22-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 8 +// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV3]], align 4 +// CHECK22-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV4]], align 4 // CHECK22-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK22-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK22-NEXT: [[CONV5:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK22-NEXT: store i16 [[TMP3]], i16* [[CONV5]], align 2 // CHECK22-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -23153,7 +23153,7 @@ int bar(int n){ // CHECK22-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK22-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK22-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK22-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK22-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK22-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK22-NEXT: [[TMP1:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -23209,11 +23209,11 @@ int bar(int n){ // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK22-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !18 +// CHECK22-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK22-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK22-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK22-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK22-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !18 +// CHECK22-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !18 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -23247,11 +23247,11 @@ int bar(int n){ // CHECK22-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK22-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK22-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK22-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP0]], i32* [[CONV2]], align 4 // CHECK22-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK22-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK22-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK22-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK22-NEXT: store i16 [[TMP2]], i16* [[CONV3]], align 2 // CHECK22-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -23310,14 +23310,14 @@ int bar(int n){ // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !21 +// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !21 // CHECK22-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK22-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !21 -// CHECK22-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !21 +// CHECK22-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !21 +// CHECK22-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !21 // CHECK22-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK22-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK22-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK22-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !21 +// CHECK22-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !21 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -23371,7 +23371,7 @@ int bar(int n){ // CHECK22-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 // CHECK22-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 // CHECK22-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 -// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV5:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP8]], i32* [[CONV5]], align 4 // CHECK22-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 @@ -23453,9 +23453,9 @@ int bar(int n){ // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK22-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !24 +// CHECK22-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK22-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK22-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !24 +// CHECK22-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK22-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 // CHECK22-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK22-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double @@ -23533,19 +23533,19 @@ int bar(int n){ // CHECK22-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK22-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK22-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV4:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP1]], i32* [[CONV4]], align 4 // CHECK22-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK22-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK22-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK22-NEXT: [[CONV5:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP3]], i32* [[CONV5]], align 4 // CHECK22-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK22-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK22-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK22-NEXT: [[CONV6:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK22-NEXT: store i16 [[TMP5]], i16* [[CONV6]], align 2 // CHECK22-NEXT: [[TMP6:%.*]] = load i64, i64* [[AA_CASTED]], align 8 -// CHECK22-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK22-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK22-NEXT: [[CONV7:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* // CHECK22-NEXT: store i8 [[TMP7]], i8* [[CONV7]], align 1 // CHECK22-NEXT: [[TMP8:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 @@ -23586,9 +23586,9 @@ int bar(int n){ // CHECK22-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK22-NEXT: [[CONV3:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* // CHECK22-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK22-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK22-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK22-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK22-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_4]], align 4 // CHECK22-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -23641,19 +23641,19 @@ int bar(int n){ // CHECK22-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK22-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] // CHECK22-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !27 -// CHECK22-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !27 +// CHECK22-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !27 // CHECK22-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK22-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !27 -// CHECK22-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !27 +// CHECK22-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 4, !llvm.access.group !27 +// CHECK22-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 2, !llvm.access.group !27 // CHECK22-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK22-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK22-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK22-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !27 -// CHECK22-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !27 +// CHECK22-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 2, !llvm.access.group !27 +// CHECK22-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 1, !llvm.access.group !27 // CHECK22-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK22-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK22-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK22-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !27 +// CHECK22-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 1, !llvm.access.group !27 // CHECK22-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK22-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK22-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 @@ -23716,11 +23716,11 @@ int bar(int n){ // CHECK22-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK22-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 // CHECK22-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* -// CHECK22-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP4]], i32* [[CONV4]], align 4 // CHECK22-NEXT: [[TMP5:%.*]] = load i64, i64* [[B_CASTED]], align 8 -// CHECK22-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK22-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK22-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK22-NEXT: [[CONV5:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK22-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -23782,7 +23782,7 @@ int bar(int n){ // CHECK22-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK22-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK22-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV3]], align 1 // CHECK22-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK22-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK22: omp_if.then: @@ -23797,7 +23797,7 @@ int bar(int n){ // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 -// CHECK22-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !30 +// CHECK22-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !30 // CHECK22-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK22-NEXT: [[ADD6:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK22-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -23833,7 +23833,7 @@ int bar(int n){ // CHECK22-NEXT: [[MUL14:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK22-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] // CHECK22-NEXT: store i32 [[ADD15]], i32* [[I]], align 4 -// CHECK22-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV16:%.*]] = sitofp i32 [[TMP20]] to double // CHECK22-NEXT: [[ADD17:%.*]] = fadd double [[CONV16]], 1.500000e+00 // CHECK22-NEXT: [[A18:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -23885,11 +23885,11 @@ int bar(int n){ // CHECK22-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK22-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK22-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK22-NEXT: [[CONV2:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK22-NEXT: store i32 [[TMP1]], i32* [[CONV2]], align 4 // CHECK22-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK22-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK22-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK22-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK22-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK22-NEXT: [[TMP4:%.*]] = load i64, i64* [[AA_CASTED]], align 8 @@ -23951,14 +23951,14 @@ int bar(int n){ // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 -// CHECK22-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK22-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !35 // CHECK22-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK22-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !35 -// CHECK22-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !35 +// CHECK22-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 4, !llvm.access.group !35 +// CHECK22-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 2, !llvm.access.group !35 // CHECK22-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK22-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK22-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK22-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !35 +// CHECK22-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 2, !llvm.access.group !35 // CHECK22-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 // CHECK22-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK22-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 @@ -24000,7 +24000,7 @@ int bar(int n){ // CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK23-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK23-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK23-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK23-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -24085,7 +24085,7 @@ int bar(int n){ // CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK23-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK23-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK23-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK23-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -24141,11 +24141,11 @@ int bar(int n){ // CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 -// CHECK23-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !19 +// CHECK23-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !19 // CHECK23-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK23-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK23-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !19 +// CHECK23-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !19 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -24181,7 +24181,7 @@ int bar(int n){ // CHECK23-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK23-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK23-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -24242,11 +24242,11 @@ int bar(int n){ // CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !22 // CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK23-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !22 -// CHECK23-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !22 +// CHECK23-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !22 // CHECK23-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK23-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK23-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !22 +// CHECK23-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !22 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -24463,11 +24463,11 @@ int bar(int n){ // CHECK23-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK23-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK23-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK23-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK23-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK23-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -24564,16 +24564,16 @@ int bar(int n){ // CHECK23-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !28 // CHECK23-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK23-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !28 -// CHECK23-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !28 // CHECK23-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK23-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK23-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK23-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !28 -// CHECK23-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !28 +// CHECK23-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1, !llvm.access.group !28 // CHECK23-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK23-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK23-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK23-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1, !llvm.access.group !28 // CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK23-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK23-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -24638,7 +24638,7 @@ int bar(int n){ // CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK23-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 // CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK23-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK23-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -24699,7 +24699,7 @@ int bar(int n){ // CHECK23-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK23-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: @@ -24804,7 +24804,7 @@ int bar(int n){ // CHECK23-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK23-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK23-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK23-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK23-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK23-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -24868,11 +24868,11 @@ int bar(int n){ // CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !36 // CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK23-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !36 -// CHECK23-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !36 +// CHECK23-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !36 // CHECK23-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK23-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK23-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !36 +// CHECK23-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !36 // CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 @@ -24914,7 +24914,7 @@ int bar(int n){ // CHECK24-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK24-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR2]], align 4 // CHECK24-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) -// CHECK24-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK24-NEXT: [[CONV3:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK24-NEXT: store i16 [[TMP3]], i16* [[CONV3]], align 2 // CHECK24-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -24999,7 +24999,7 @@ int bar(int n){ // CHECK24-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK24-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK24-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK24-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK24-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK24-NEXT: store i16 [[TMP0]], i16* [[CONV1]], align 2 // CHECK24-NEXT: [[TMP1:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -25055,11 +25055,11 @@ int bar(int n){ // CHECK24-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK24-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 -// CHECK24-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !19 +// CHECK24-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !19 // CHECK24-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK24-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK24-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK24-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !19 +// CHECK24-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 2, !llvm.access.group !19 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -25095,7 +25095,7 @@ int bar(int n){ // CHECK24-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK24-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 // CHECK24-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK24-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP2:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK24-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK24-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK24-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -25156,11 +25156,11 @@ int bar(int n){ // CHECK24-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !22 // CHECK24-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK24-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !22 -// CHECK24-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !22 +// CHECK24-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !22 // CHECK24-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK24-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK24-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK24-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !22 +// CHECK24-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !22 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -25377,11 +25377,11 @@ int bar(int n){ // CHECK24-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK24-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK24-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK24-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK24-NEXT: [[CONV2:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK24-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK24-NEXT: [[TMP6:%.*]] = load i32, i32* [[AA_CASTED]], align 4 -// CHECK24-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK24-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV1]], align 1 // CHECK24-NEXT: [[CONV3:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* // CHECK24-NEXT: store i8 [[TMP7]], i8* [[CONV3]], align 1 // CHECK24-NEXT: [[TMP8:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 @@ -25478,16 +25478,16 @@ int bar(int n){ // CHECK24-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !28 // CHECK24-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK24-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !28 -// CHECK24-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !28 +// CHECK24-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !28 // CHECK24-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK24-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK24-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK24-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !28 -// CHECK24-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !28 +// CHECK24-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 2, !llvm.access.group !28 +// CHECK24-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 1, !llvm.access.group !28 // CHECK24-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK24-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK24-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK24-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !28 +// CHECK24-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 1, !llvm.access.group !28 // CHECK24-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK24-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK24-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 @@ -25552,7 +25552,7 @@ int bar(int n){ // CHECK24-NEXT: [[TMP4:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK24-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 // CHECK24-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 -// CHECK24-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK24-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK24-NEXT: [[CONV3:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK24-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 @@ -25613,7 +25613,7 @@ int bar(int n){ // CHECK24-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK24-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK24-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP9:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK24-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP9]] to i1 // CHECK24-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK24: omp_if.then: @@ -25718,7 +25718,7 @@ int bar(int n){ // CHECK24-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK24-NEXT: store i32 [[TMP1]], i32* [[A_CASTED]], align 4 // CHECK24-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK24-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP3:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK24-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK24-NEXT: store i16 [[TMP3]], i16* [[CONV1]], align 2 // CHECK24-NEXT: [[TMP4:%.*]] = load i32, i32* [[AA_CASTED]], align 4 @@ -25782,11 +25782,11 @@ int bar(int n){ // CHECK24-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !36 // CHECK24-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK24-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !36 -// CHECK24-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !36 +// CHECK24-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 2, !llvm.access.group !36 // CHECK24-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK24-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK24-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK24-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !36 +// CHECK24-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 2, !llvm.access.group !36 // CHECK24-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 // CHECK24-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK24-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 diff --git a/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp index 89a4e1d65a4f..6e3c19d4421f 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp @@ -1132,11 +1132,11 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[M_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[M_CASTED]], align 8 @@ -1180,9 +1180,9 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 @@ -1602,11 +1602,11 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[M_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[M_CASTED]], align 8 @@ -1650,9 +1650,9 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_5]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 diff --git a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp index 1a8ace57e558..fb0cb638f50c 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp @@ -2333,7 +2333,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -2367,7 +2367,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -2461,7 +2461,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -2495,7 +2495,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -2593,11 +2593,11 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -2634,7 +2634,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK9-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -2651,7 +2651,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) @@ -3335,7 +3335,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3369,7 +3369,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -3463,7 +3463,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 @@ -3497,7 +3497,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -3595,11 +3595,11 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -3636,7 +3636,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 // CHECK10-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 @@ -3653,7 +3653,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP6]]) diff --git a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp index ce209edf36ac..138ff30b199a 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp @@ -345,11 +345,11 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -446,7 +446,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -458,9 +458,9 @@ int main() { // CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR5]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !5 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -701,7 +701,7 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -801,7 +801,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -1149,11 +1149,11 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -1250,7 +1250,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -1262,9 +1262,9 @@ int main() { // CHECK2-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR5]] to i8* // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !5 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1505,7 +1505,7 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK2-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -1605,7 +1605,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -4759,7 +4759,7 @@ int main() { // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK9-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -4768,7 +4768,7 @@ int main() { // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -4833,10 +4833,10 @@ int main() { // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 // CHECK9-NEXT: store volatile i32 1, i32* [[TMP8]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8, !llvm.access.group !4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -5023,7 +5023,7 @@ int main() { // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK10-NEXT: store i32* [[CONV1]], i32** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -5032,7 +5032,7 @@ int main() { // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -5097,10 +5097,10 @@ int main() { // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 -// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 // CHECK10-NEXT: store volatile i32 1, i32* [[TMP8]], align 4, !llvm.access.group !4 -// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK10-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8, !llvm.access.group !4 // CHECK10-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 diff --git a/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp index db176f546888..f62acbff4b39 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp @@ -182,11 +182,11 @@ int main() { // CHECK1-NEXT: [[CONV5:%.*]] = bitcast i64* [[G1_CASTED]] to double* // CHECK1-NEXT: store double [[TMP3]], double* [[CONV5]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV6]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* [[CONV3]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load float, float* [[CONV3]], align 4 // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* // CHECK1-NEXT: store float [[TMP7]], float* [[CONV7]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 @@ -305,9 +305,9 @@ int main() { // CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[TMP21]], align 8 // CHECK1-NEXT: store volatile double [[TMP22]], double* [[TMP0]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 8 +// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = load float, float* [[SFVAR9]], align 4 -// CHECK1-NEXT: store float [[TMP24]], float* [[CONV3]], align 8 +// CHECK1-NEXT: store float [[TMP24]], float* [[CONV3]], align 4 // CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK1: .omp.lastprivate.done: // CHECK1-NEXT: ret void @@ -368,11 +368,11 @@ int main() { // CHECK2-NEXT: [[CONV5:%.*]] = bitcast i64* [[G1_CASTED]] to double* // CHECK2-NEXT: store double [[TMP3]], double* [[CONV5]], align 8 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV6]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load float, float* [[CONV3]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load float, float* [[CONV3]], align 4 // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i64* [[SFVAR_CASTED]] to float* // CHECK2-NEXT: store float [[TMP7]], float* [[CONV7]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[SFVAR_CASTED]], align 8 @@ -491,9 +491,9 @@ int main() { // CHECK2-NEXT: [[TMP22:%.*]] = load double, double* [[TMP21]], align 8 // CHECK2-NEXT: store volatile double [[TMP22]], double* [[TMP0]], align 8 // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK2-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 8 +// CHECK2-NEXT: store i32 [[TMP23]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP24:%.*]] = load float, float* [[SFVAR9]], align 4 -// CHECK2-NEXT: store float [[TMP24]], float* [[CONV3]], align 8 +// CHECK2-NEXT: store float [[TMP24]], float* [[CONV3]], align 4 // CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK2: .omp.lastprivate.done: // CHECK2-NEXT: ret void @@ -1077,12 +1077,12 @@ int main() { // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK9-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 // CHECK9-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP6]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 @@ -1211,7 +1211,7 @@ int main() { // CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: // CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK9-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK9-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) @@ -1236,7 +1236,7 @@ int main() { // CHECK9-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP32]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) // CHECK9-NEXT: [[TMP35:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK9-NEXT: store i32 [[TMP35]], i32* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 [[TMP35]], i32* [[CONV1]], align 4 // CHECK9-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK9: .omp.lastprivate.done: // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR5]] @@ -1427,7 +1427,7 @@ int main() { // CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK9-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -1553,7 +1553,7 @@ int main() { // CHECK9-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK9: .omp.lastprivate.then: // CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK9-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK9-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) @@ -1797,12 +1797,12 @@ int main() { // CHECK10-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[SVAR_ADDR]] to i32* // CHECK10-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 // CHECK10-NEXT: [[TMP5:%.*]] = load %struct.S*, %struct.S** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[SVAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP6]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP7:%.*]] = load i64, i64* [[SVAR_CASTED]], align 8 @@ -1931,7 +1931,7 @@ int main() { // CHECK10-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK10: .omp.lastprivate.then: // CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR3]], align 4 -// CHECK10-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK10-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC4]] to i8* // CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) @@ -1956,7 +1956,7 @@ int main() { // CHECK10-NEXT: [[TMP34:%.*]] = bitcast %struct.S* [[TMP32]] to i8* // CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP33]], i8* align 4 [[TMP34]], i64 4, i1 false) // CHECK10-NEXT: [[TMP35:%.*]] = load i32, i32* [[SVAR8]], align 4 -// CHECK10-NEXT: store i32 [[TMP35]], i32* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 [[TMP35]], i32* [[CONV1]], align 4 // CHECK10-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] // CHECK10: .omp.lastprivate.done: // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR6]]) #[[ATTR5]] @@ -2147,7 +2147,7 @@ int main() { // CHECK10-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK10-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -2273,7 +2273,7 @@ int main() { // CHECK10-NEXT: br i1 [[TMP24]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] // CHECK10: .omp.lastprivate.then: // CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[T_VAR2]], align 4 -// CHECK10-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[TMP25]], i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP26:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* // CHECK10-NEXT: [[TMP27:%.*]] = bitcast [2 x i32]* [[VEC3]] to i8* // CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP26]], i8* align 4 [[TMP27]], i64 8, i1 false) diff --git a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp index 007685e1d25b..2508c7468749 100644 --- a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp +++ b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp @@ -450,9 +450,9 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -473,7 +473,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -515,7 +515,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK1-NEXT: ret void @@ -538,7 +538,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK1-NEXT: ret void @@ -588,14 +588,14 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -616,11 +616,11 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: ret void // // @@ -907,9 +907,9 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -930,7 +930,7 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -972,7 +972,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK2-NEXT: ret void @@ -995,7 +995,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK2-NEXT: ret void @@ -1045,14 +1045,14 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK2-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -1073,11 +1073,11 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK2-NEXT: ret void // // @@ -1490,13 +1490,13 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -1516,7 +1516,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -1933,13 +1933,13 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK4-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK4-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -1959,7 +1959,7 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -1981,7 +1981,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK9-NEXT: ret void @@ -2004,7 +2004,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK9-NEXT: ret void @@ -2034,9 +2034,9 @@ int bar(int n){ // CHECK9-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2057,7 +2057,7 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -2126,14 +2126,14 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK9-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2154,11 +2154,11 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK9-NEXT: ret void // // @@ -2169,7 +2169,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK10-NEXT: ret void @@ -2192,7 +2192,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK10-NEXT: ret void @@ -2222,9 +2222,9 @@ int bar(int n){ // CHECK10-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2245,7 +2245,7 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK10-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK10-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK10-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -2314,14 +2314,14 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK10-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK10-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2342,11 +2342,11 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK10-NEXT: ret void // // @@ -2495,13 +2495,13 @@ int bar(int n){ // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -2521,7 +2521,7 @@ int bar(int n){ // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -2674,13 +2674,13 @@ int bar(int n){ // CHECK12-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK12-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK12-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -2700,7 +2700,7 @@ int bar(int n){ // CHECK12-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -2984,9 +2984,9 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3007,7 +3007,7 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -3049,7 +3049,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK17-NEXT: ret void @@ -3072,7 +3072,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK17-NEXT: ret void @@ -3122,14 +3122,14 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK17-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3150,11 +3150,11 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK17-NEXT: ret void // // @@ -3441,9 +3441,9 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3464,7 +3464,7 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK18-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK18-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK18-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -3506,7 +3506,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK18-NEXT: ret void @@ -3529,7 +3529,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK18-NEXT: ret void @@ -3579,14 +3579,14 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK18-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK18-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK18-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK18-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3607,11 +3607,11 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK18-NEXT: ret void // // @@ -4024,13 +4024,13 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK19-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -4050,7 +4050,7 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -4467,13 +4467,13 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK20-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK20-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK20-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -4493,7 +4493,7 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -4515,7 +4515,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK25-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK25-NEXT: ret void @@ -4538,7 +4538,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK25-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK25-NEXT: ret void @@ -4568,9 +4568,9 @@ int bar(int n){ // CHECK25-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4591,7 +4591,7 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK25-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK25-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -4660,14 +4660,14 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK25-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK25-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK25-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4688,11 +4688,11 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK25-NEXT: ret void // // @@ -4703,7 +4703,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK26-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK26-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK26-NEXT: ret void @@ -4726,7 +4726,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK26-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK26-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK26-NEXT: ret void @@ -4756,9 +4756,9 @@ int bar(int n){ // CHECK26-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4779,7 +4779,7 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK26-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK26-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK26-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -4848,14 +4848,14 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK26-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK26-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) -// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK26-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4876,11 +4876,11 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK26-NEXT: ret void // // @@ -5029,13 +5029,13 @@ int bar(int n){ // CHECK27-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK27-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK27-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -5055,7 +5055,7 @@ int bar(int n){ // CHECK27-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -5208,13 +5208,13 @@ int bar(int n){ // CHECK28-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK28-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK28-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK28-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 0) // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK28-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -5234,7 +5234,7 @@ int bar(int n){ // CHECK28-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] diff --git a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp index d80366e55beb..860b6ee596dc 100644 --- a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp +++ b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp @@ -468,9 +468,9 @@ int bar(int n){ // CHECK1-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -491,7 +491,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -536,8 +536,8 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK1-NEXT: ret void @@ -560,7 +560,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK1-NEXT: ret void @@ -610,14 +610,14 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK1-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -638,11 +638,11 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK1-NEXT: ret void // // @@ -947,9 +947,9 @@ int bar(int n){ // CHECK2-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -970,7 +970,7 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -1015,8 +1015,8 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK2-NEXT: ret void @@ -1039,7 +1039,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK2-NEXT: ret void @@ -1089,14 +1089,14 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK2-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -1117,11 +1117,11 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK2-NEXT: ret void // // @@ -1554,13 +1554,13 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK3-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -1580,7 +1580,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK3-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -2017,13 +2017,13 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK4-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK4-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK4-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK4-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -2043,7 +2043,7 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK4-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -2068,8 +2068,8 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK9-NEXT: ret void @@ -2092,7 +2092,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) // CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK9-NEXT: ret void @@ -2122,9 +2122,9 @@ int bar(int n){ // CHECK9-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2145,7 +2145,7 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK9-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -2214,14 +2214,14 @@ int bar(int n){ // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK9-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK9-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK9-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2242,11 +2242,11 @@ int bar(int n){ // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK9-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK9-NEXT: ret void // // @@ -2260,8 +2260,8 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK10-NEXT: ret void @@ -2284,7 +2284,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) // CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK10-NEXT: ret void @@ -2314,9 +2314,9 @@ int bar(int n){ // CHECK10-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2337,7 +2337,7 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK10-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK10-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK10-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -2406,14 +2406,14 @@ int bar(int n){ // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK10-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK10-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK10-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK10-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -2434,11 +2434,11 @@ int bar(int n){ // CHECK10-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK10-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK10-NEXT: ret void // // @@ -2590,13 +2590,13 @@ int bar(int n){ // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK11-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK11-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK11-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) // CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK11-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -2616,7 +2616,7 @@ int bar(int n){ // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK11-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -2772,13 +2772,13 @@ int bar(int n){ // CHECK12-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK12-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK12-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK12-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK12-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) // CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK12-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -2798,7 +2798,7 @@ int bar(int n){ // CHECK12-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK12-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK12-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -3100,9 +3100,9 @@ int bar(int n){ // CHECK17-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3123,7 +3123,7 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -3168,8 +3168,8 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK17-NEXT: ret void @@ -3192,7 +3192,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) // CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK17-NEXT: ret void @@ -3242,14 +3242,14 @@ int bar(int n){ // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK17-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK17-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK17-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3270,11 +3270,11 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK17-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK17-NEXT: ret void // // @@ -3579,9 +3579,9 @@ int bar(int n){ // CHECK18-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3602,7 +3602,7 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK18-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK18-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK18-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -3647,8 +3647,8 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*)) // CHECK18-NEXT: ret void @@ -3671,7 +3671,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) // CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*)) // CHECK18-NEXT: ret void @@ -3721,14 +3721,14 @@ int bar(int n){ // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK18-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK18-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK18-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK18-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK18-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK18-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -3749,11 +3749,11 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK18-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK18-NEXT: ret void // // @@ -4186,13 +4186,13 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK19-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK19-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK19-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) // CHECK19-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK19-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK19-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -4212,7 +4212,7 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK19-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -4649,13 +4649,13 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK20-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK20-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK20-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK20-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK20-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) // CHECK20-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK20-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK20-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK20-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -4675,7 +4675,7 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK20-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK20-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -4700,8 +4700,8 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK25-NEXT: ret void @@ -4724,7 +4724,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK25-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) // CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK25-NEXT: ret void @@ -4754,9 +4754,9 @@ int bar(int n){ // CHECK25-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4777,7 +4777,7 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK25-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK25-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -4846,14 +4846,14 @@ int bar(int n){ // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK25-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK25-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK25-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK25-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK25-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK25-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK25-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK25-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4874,11 +4874,11 @@ int bar(int n){ // CHECK25-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK25-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK25-NEXT: ret void // // @@ -4892,8 +4892,8 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[DOTCAPTURE_EXPR_1]], i64* [[DOTCAPTURE_EXPR__ADDR2]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR2]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV3]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK26-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK26-NEXT: ret void @@ -4916,7 +4916,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK26-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) // CHECK26-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*)) // CHECK26-NEXT: ret void @@ -4946,9 +4946,9 @@ int bar(int n){ // CHECK26-NEXT: [[TMP1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP2]]) -// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[B_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -4969,7 +4969,7 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK26-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV1:%.*]] = sitofp i32 [[TMP1]] to double // CHECK26-NEXT: [[ADD:%.*]] = fadd double [[CONV1]], 1.500000e+00 // CHECK26-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 @@ -5038,14 +5038,14 @@ int bar(int n){ // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* // CHECK26-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK26-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV2]], align 2 // CHECK26-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) -// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[CONV3:%.*]] = bitcast i64* [[A_CASTED]] to i32* // CHECK26-NEXT: store i32 [[TMP3]], i32* [[CONV3]], align 4 // CHECK26-NEXT: [[TMP4:%.*]] = load i64, i64* [[A_CASTED]], align 8 -// CHECK26-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV4:%.*]] = bitcast i64* [[B_CASTED]] to i16* // CHECK26-NEXT: store i16 [[TMP5]], i16* [[CONV4]], align 2 // CHECK26-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 @@ -5066,11 +5066,11 @@ int bar(int n){ // CHECK26-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK26-NEXT: [[CONV2:%.*]] = sext i16 [[TMP0]] to i32 -// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK26-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV2]] -// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 // CHECK26-NEXT: ret void // // @@ -5222,13 +5222,13 @@ int bar(int n){ // CHECK27-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK27-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK27-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK27-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK27-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) // CHECK27-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK27-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK27-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK27-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK27-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -5248,7 +5248,7 @@ int bar(int n){ // CHECK27-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK27-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK27-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK27-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] @@ -5404,13 +5404,13 @@ int bar(int n){ // CHECK28-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* // CHECK28-NEXT: [[CONV1:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i16* -// CHECK28-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 4 +// CHECK28-NEXT: [[TMP1:%.*]] = load i16, i16* [[CONV1]], align 2 // CHECK28-NEXT: [[TMP2:%.*]] = sext i16 [[TMP1]] to i32 // CHECK28-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP2]], i32 1024) // CHECK28-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[TMP3]], i32* [[A_CASTED]], align 4 // CHECK28-NEXT: [[TMP4:%.*]] = load i32, i32* [[A_CASTED]], align 4 -// CHECK28-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV2:%.*]] = bitcast i32* [[B_CASTED]] to i16* // CHECK28-NEXT: store i16 [[TMP5]], i16* [[CONV2]], align 2 // CHECK28-NEXT: [[TMP6:%.*]] = load i32, i32* [[B_CASTED]], align 4 @@ -5430,7 +5430,7 @@ int bar(int n){ // CHECK28-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK28-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK28-NEXT: [[TMP0:%.*]] = load i16, i16* [[CONV]], align 2 // CHECK28-NEXT: [[CONV1:%.*]] = sext i16 [[TMP0]] to i32 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[CONV1]] diff --git a/clang/test/OpenMP/teams_codegen.cpp b/clang/test/OpenMP/teams_codegen.cpp index efbb8d648c6c..40a815b8d352 100644 --- a/clang/test/OpenMP/teams_codegen.cpp +++ b/clang/test/OpenMP/teams_codegen.cpp @@ -638,7 +638,7 @@ void foo() { // CHECK1-NEXT: store i64 [[COMP]], i64* [[COMP_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[LA_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[COMP_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[CONV1]]) // CHECK1-NEXT: ret void @@ -670,7 +670,7 @@ void foo() { // CHECK1-NEXT: store i64 [[COMP]], i64* [[COMP_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[LA_ADDR]] to i32* // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[COMP_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* [[CONV1]]) // CHECK1-NEXT: ret void @@ -710,11 +710,11 @@ void foo() { // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[LC_ADDR]] to float* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[COMP_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[GBLB_ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load float, float* [[CONV2]], align 4 // CHECK1-NEXT: [[CONV4:%.*]] = fptosi float [[TMP4]] to i64 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i64 [[TMP3]], [[CONV4]] // CHECK1-NEXT: [[TMP5:%.*]] = trunc i64 [[ADD5]] to i32 @@ -1130,7 +1130,7 @@ void foo() { // CHECK2-NEXT: store i64 [[COMP]], i64* [[COMP_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[LA_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[COMP_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 0) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[CONV1]]) // CHECK2-NEXT: ret void @@ -1162,7 +1162,7 @@ void foo() { // CHECK2-NEXT: store i64 [[COMP]], i64* [[COMP_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[LA_ADDR]] to i32* // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[COMP_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP1]]) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32* [[CONV1]]) // CHECK2-NEXT: ret void @@ -1202,11 +1202,11 @@ void foo() { // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[LC_ADDR]] to float* // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[COMP_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[GBLB_ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load float, float* [[CONV2]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load float, float* [[CONV2]], align 4 // CHECK2-NEXT: [[CONV4:%.*]] = fptosi float [[TMP4]] to i64 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i64 [[TMP3]], [[CONV4]] // CHECK2-NEXT: [[TMP5:%.*]] = trunc i64 [[ADD5]] to i32 @@ -3849,8 +3849,8 @@ void foo() { // CHECK33-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK33-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK33-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* -// CHECK33-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK33-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK33-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK33-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK33-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK33-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]]) // CHECK33-NEXT: ret void @@ -3882,8 +3882,8 @@ void foo() { // CHECK33-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 // CHECK33-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK33-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK33-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK33-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK33-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK33-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK33-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK33-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]]) // CHECK33-NEXT: ret void @@ -3916,8 +3916,8 @@ void foo() { // CHECK34-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK34-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* // CHECK34-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* -// CHECK34-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK34-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK34-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK34-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK34-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK34-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]]) // CHECK34-NEXT: ret void @@ -3949,8 +3949,8 @@ void foo() { // CHECK34-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 // CHECK34-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* // CHECK34-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK34-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK34-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK34-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK34-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK34-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]]) // CHECK34-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]]) // CHECK34-NEXT: ret void diff --git a/clang/test/OpenMP/teams_distribute_codegen.cpp b/clang/test/OpenMP/teams_distribute_codegen.cpp index 6124911226a9..24a4329d096b 100644 --- a/clang/test/OpenMP/teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_codegen.cpp @@ -328,8 +328,8 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]], [100 x i32]* [[TMP1]]) // CHECK1-NEXT: ret void @@ -692,8 +692,8 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]], [100 x i32]* [[TMP1]]) // CHECK2-NEXT: ret void @@ -3202,8 +3202,8 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[TE_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK25-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) // CHECK25-NEXT: ret void @@ -3562,8 +3562,8 @@ int main (int argc, char **argv) { // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[TE_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK26-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK26-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) // CHECK26-NEXT: ret void diff --git a/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp index a16577e47b56..4737bdf9885d 100644 --- a/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp @@ -1969,7 +1969,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -2025,7 +2025,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -2881,7 +2881,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -2937,7 +2937,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) diff --git a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp index f766b6257c24..d428cf999b1e 100644 --- a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp @@ -348,11 +348,11 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -449,7 +449,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -461,9 +461,9 @@ int main() { // CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR5]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -699,7 +699,7 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -799,7 +799,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -1140,11 +1140,11 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -1241,7 +1241,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -1253,9 +1253,9 @@ int main() { // CHECK2-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR5]] to i8* // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false) // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1491,7 +1491,7 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK2-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -1591,7 +1591,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -3418,7 +3418,7 @@ int main() { // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -3427,7 +3427,7 @@ int main() { // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -3492,10 +3492,10 @@ int main() { // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK9-NEXT: store volatile i32 1, i32* [[TMP8]], align 4 -// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -3675,7 +3675,7 @@ int main() { // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK10-NEXT: store i32* [[CONV2]], i32** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -3684,7 +3684,7 @@ int main() { // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -3749,10 +3749,10 @@ int main() { // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK10-NEXT: store volatile i32 1, i32* [[TMP8]], align 4 -// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK10-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8 // CHECK10-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp index c31965a4c645..61d18a0eb087 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp @@ -327,8 +327,8 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]], [100 x i32]* [[TMP1]]) // CHECK1-NEXT: ret void @@ -901,8 +901,8 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]], [100 x i32]* [[TMP1]]) // CHECK2-NEXT: ret void @@ -4806,8 +4806,8 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[TE_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK25-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) // CHECK25-NEXT: ret void @@ -5339,8 +5339,8 @@ int main (int argc, char **argv) { // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[TE_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK26-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK26-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) // CHECK26-NEXT: ret void diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp index 965e00ee1d5b..2be4e769c21d 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp @@ -3067,7 +3067,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -3124,7 +3124,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -3155,7 +3155,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4 // CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -3710,7 +3710,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -3745,7 +3745,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) @@ -3772,7 +3772,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -4577,7 +4577,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -4634,7 +4634,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -4665,7 +4665,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4 // CHECK10-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -5220,7 +5220,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -5255,7 +5255,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) @@ -5282,7 +5282,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp index 22ba0d8197a0..bf2459ced3e8 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp @@ -382,11 +382,11 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -485,11 +485,11 @@ int main() { // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -653,7 +653,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -665,9 +665,9 @@ int main() { // CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -870,7 +870,7 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -971,7 +971,7 @@ int main() { // CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -1118,7 +1118,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -1446,11 +1446,11 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -1549,11 +1549,11 @@ int main() { // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 // CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -1717,7 +1717,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -1729,9 +1729,9 @@ int main() { // CHECK2-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false) // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1934,7 +1934,7 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK2-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -2035,7 +2035,7 @@ int main() { // CHECK2-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -2182,7 +2182,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -4512,7 +4512,7 @@ int main() { // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -4521,7 +4521,7 @@ int main() { // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -4588,7 +4588,7 @@ int main() { // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -4597,7 +4597,7 @@ int main() { // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4 // CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4 // CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -4683,10 +4683,10 @@ int main() { // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK9-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8 // CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -4866,7 +4866,7 @@ int main() { // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK10-NEXT: store i32* [[CONV2]], i32** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -4875,7 +4875,7 @@ int main() { // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -4942,7 +4942,7 @@ int main() { // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -4951,7 +4951,7 @@ int main() { // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4 // CHECK10-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4 // CHECK10-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4 // CHECK10-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -5037,10 +5037,10 @@ int main() { // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8 // CHECK10-NEXT: store volatile i32 1, i32* [[TMP10]], align 4 -// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 8 +// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK10-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8 // CHECK10-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp index 77700887dab5..a52665e07e52 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp @@ -769,7 +769,7 @@ int main() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -832,7 +832,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: @@ -1281,7 +1281,7 @@ int main() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -1344,7 +1344,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: @@ -2106,7 +2106,7 @@ int main() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK2-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -2169,7 +2169,7 @@ int main() { // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: @@ -2618,7 +2618,7 @@ int main() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK2-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -2681,7 +2681,7 @@ int main() { // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: @@ -3443,7 +3443,7 @@ int main() { // CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK5-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -3506,7 +3506,7 @@ int main() { // CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: @@ -3955,7 +3955,7 @@ int main() { // CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK5-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK5-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -4018,7 +4018,7 @@ int main() { // CHECK5-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK5-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK5: omp_if.then: @@ -4780,7 +4780,7 @@ int main() { // CHECK6-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK6-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK6-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -4843,7 +4843,7 @@ int main() { // CHECK6-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK6-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK6: omp_if.then: @@ -5292,7 +5292,7 @@ int main() { // CHECK6-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK6-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK6-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -5355,7 +5355,7 @@ int main() { // CHECK6-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK6-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK6-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK6-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK6: omp_if.then: @@ -6117,7 +6117,7 @@ int main() { // CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK9-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -6180,7 +6180,7 @@ int main() { // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: @@ -6629,7 +6629,7 @@ int main() { // CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK9-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -6692,7 +6692,7 @@ int main() { // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: @@ -7454,7 +7454,7 @@ int main() { // CHECK10-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK10-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK10-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -7517,7 +7517,7 @@ int main() { // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: @@ -7966,7 +7966,7 @@ int main() { // CHECK10-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK10-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK10-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -8029,7 +8029,7 @@ int main() { // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: @@ -8791,7 +8791,7 @@ int main() { // CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK13-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK13-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK13-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -8854,7 +8854,7 @@ int main() { // CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK13-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK13: omp_if.then: @@ -9303,7 +9303,7 @@ int main() { // CHECK13-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK13-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK13-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK13-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK13-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -9366,7 +9366,7 @@ int main() { // CHECK13-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK13-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK13-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK13-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK13: omp_if.then: @@ -10128,7 +10128,7 @@ int main() { // CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK14-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK14-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK14-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK14-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -10191,7 +10191,7 @@ int main() { // CHECK14-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK14-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK14-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK14-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK14: omp_if.then: @@ -10640,7 +10640,7 @@ int main() { // CHECK14-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK14-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK14-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK14-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK14-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -10703,7 +10703,7 @@ int main() { // CHECK14-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK14-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK14-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK14-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK14: omp_if.then: diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp index 7e35b20b4bf9..0e06a7cfa116 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp @@ -346,7 +346,7 @@ int main() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: store i8 [[TMP0]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* @@ -400,7 +400,7 @@ int main() { // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1117,7 +1117,7 @@ int main() { // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1498,7 +1498,7 @@ int main() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: store i8 [[TMP0]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK2-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* @@ -1552,7 +1552,7 @@ int main() { // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2269,7 +2269,7 @@ int main() { // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2650,7 +2650,7 @@ int main() { // CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK5-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK5-NEXT: store i8 [[TMP0]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* @@ -2704,7 +2704,7 @@ int main() { // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3412,7 +3412,7 @@ int main() { // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3802,7 +3802,7 @@ int main() { // CHECK6-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK6-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK6-NEXT: store i8 [[TMP0]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK6-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* @@ -3856,7 +3856,7 @@ int main() { // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK6-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4564,7 +4564,7 @@ int main() { // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK6-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]) // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp index b1a46f0dfbb5..0ea08bb4d412 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp @@ -8233,7 +8233,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -8290,7 +8290,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -8321,7 +8321,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4 // CHECK13-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -8700,7 +8700,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -8786,7 +8786,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4 // CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -8865,7 +8865,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -9372,7 +9372,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -9434,7 +9434,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -9487,7 +9487,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -9714,7 +9714,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -9776,7 +9776,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -9829,7 +9829,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -10702,7 +10702,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -10759,7 +10759,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -10790,7 +10790,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4 // CHECK14-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -11169,7 +11169,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -11255,7 +11255,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4 // CHECK14-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -11334,7 +11334,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -11841,7 +11841,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -11903,7 +11903,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -11956,7 +11956,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -12183,7 +12183,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -12245,7 +12245,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -12298,7 +12298,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -17945,7 +17945,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -18002,7 +18002,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -18033,7 +18033,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4 // CHECK17-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -18412,7 +18412,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -18498,7 +18498,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4 // CHECK17-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -18577,7 +18577,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -19084,7 +19084,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -19146,7 +19146,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -19199,7 +19199,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK17-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -19426,7 +19426,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK17-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK17-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -19488,7 +19488,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK17-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -19541,7 +19541,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK17-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK17-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK17-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -20414,7 +20414,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -20471,7 +20471,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK18-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -20502,7 +20502,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4 // CHECK18-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -20881,7 +20881,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK18-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -20967,7 +20967,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4 // CHECK18-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -21046,7 +21046,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -21553,7 +21553,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -21615,7 +21615,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -21668,7 +21668,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK18-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -21895,7 +21895,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK18-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK18-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -21957,7 +21957,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK18-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK18-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 // CHECK18-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 @@ -22010,7 +22010,7 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK18-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK18-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK18-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp index b3d27129b22d..4fa27a341fc5 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp @@ -347,8 +347,8 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK1-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]], [100 x i32]* [[TMP1]]) // CHECK1-NEXT: ret void @@ -994,8 +994,8 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK2-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]], [100 x i32]* [[TMP1]]) // CHECK2-NEXT: ret void @@ -6742,8 +6742,8 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[TE_ADDR]] to i32* // CHECK25-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK25-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK25-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK25-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK25-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) // CHECK25-NEXT: ret void @@ -7353,8 +7353,8 @@ int main (int argc, char **argv) { // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[TE_ADDR]] to i32* // CHECK26-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK26-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK26-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK26-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK26-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB4]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK26-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB4]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) // CHECK26-NEXT: ret void diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp index 0080196f4da0..16d84deb0e2e 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp @@ -3780,7 +3780,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -3837,7 +3837,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -3868,7 +3868,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !24 @@ -4475,7 +4475,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -4510,7 +4510,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) @@ -4537,7 +4537,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !42 @@ -5404,7 +5404,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -5461,7 +5461,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -5492,7 +5492,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !24 @@ -6099,7 +6099,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -6134,7 +6134,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP1]]) @@ -6161,7 +6161,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !42 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp index 6eb15b78e96f..b0a0e22909a8 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -385,11 +385,11 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -488,11 +488,11 @@ int main() { // CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !5 @@ -663,7 +663,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -675,9 +675,9 @@ int main() { // CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !9 // CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4, !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -887,7 +887,7 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -988,7 +988,7 @@ int main() { // CHECK1-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 @@ -1142,7 +1142,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -1477,11 +1477,11 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -1580,11 +1580,11 @@ int main() { // CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !5 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !5 @@ -1755,7 +1755,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -1767,9 +1767,9 @@ int main() { // CHECK2-NEXT: [[TMP21:%.*]] = bitcast %struct.S* [[VAR7]] to i8* // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP20]], i8* align 4 [[TMP21]], i64 4, i1 false), !llvm.access.group !9 // CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP23]], [[TMP22]] -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 8, !llvm.access.group !9 +// CHECK2-NEXT: store i32 [[ADD12]], i32* [[CONV1]], align 4, !llvm.access.group !9 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1979,7 +1979,7 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK2-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -2080,7 +2080,7 @@ int main() { // CHECK2-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP19]], i32* [[CONV9]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8, !llvm.access.group !14 @@ -2234,7 +2234,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC4]], i64 0, i64 [[IDXPROM]] @@ -5935,7 +5935,7 @@ int main() { // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -5944,7 +5944,7 @@ int main() { // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -6011,7 +6011,7 @@ int main() { // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 @@ -6020,7 +6020,7 @@ int main() { // CHECK9-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !4 @@ -6113,10 +6113,10 @@ int main() { // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 8, !llvm.access.group !8 +// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 4, !llvm.access.group !8 // CHECK9-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 // CHECK9-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !8 -// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 8, !llvm.access.group !8 +// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 4, !llvm.access.group !8 // CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8, !llvm.access.group !8 // CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -6303,7 +6303,7 @@ int main() { // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK10-NEXT: store i32* [[CONV2]], i32** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -6312,7 +6312,7 @@ int main() { // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -6379,7 +6379,7 @@ int main() { // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP11]], i32* [[CONV5]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[TMP12:%.*]] = load i64, i64* [[G_CASTED]], align 8, !llvm.access.group !4 @@ -6388,7 +6388,7 @@ int main() { // CHECK10-NEXT: [[CONV6:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP14]], i32* [[CONV6]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[TMP15:%.*]] = load i64, i64* [[G1_CASTED]], align 8, !llvm.access.group !4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV2]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[CONV7:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP16]], i32* [[CONV7]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[TMP17:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8, !llvm.access.group !4 @@ -6481,10 +6481,10 @@ int main() { // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 -// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 8, !llvm.access.group !8 +// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 4, !llvm.access.group !8 // CHECK10-NEXT: [[TMP10:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !8 // CHECK10-NEXT: store volatile i32 1, i32* [[TMP10]], align 4, !llvm.access.group !8 -// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 8, !llvm.access.group !8 +// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 4, !llvm.access.group !8 // CHECK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK10-NEXT: store i32* [[CONV]], i32** [[TMP11]], align 8, !llvm.access.group !8 // CHECK10-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp index 083654213f5e..45182c712032 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp @@ -822,7 +822,7 @@ int main() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -885,7 +885,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !38 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: @@ -1376,7 +1376,7 @@ int main() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -1439,7 +1439,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !56 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !56 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: @@ -2271,7 +2271,7 @@ int main() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK2-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -2334,7 +2334,7 @@ int main() { // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !38 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: @@ -2825,7 +2825,7 @@ int main() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK2-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK2-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -2888,7 +2888,7 @@ int main() { // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !56 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !56 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: @@ -3720,7 +3720,7 @@ int main() { // CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -3775,7 +3775,7 @@ int main() { // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE6:%.*]] // CHECK3: omp_if.then: @@ -3790,13 +3790,13 @@ int main() { // CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !35 // CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !35 // CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !35 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !35 // CHECK3-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then5: @@ -3831,13 +3831,13 @@ int main() { // CHECK3-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 // CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK3-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 // CHECK3-NEXT: [[CONV12:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED11]] to i8* // CHECK3-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL10]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL13]], i8* [[CONV12]], align 1 // CHECK3-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED11]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP26]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK3: omp_if.then15: @@ -3905,7 +3905,7 @@ int main() { // CHECK3-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: @@ -4033,7 +4033,7 @@ int main() { // CHECK3-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: @@ -4508,7 +4508,7 @@ int main() { // CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK3-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK3-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -4571,7 +4571,7 @@ int main() { // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !55 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !55 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: @@ -5403,7 +5403,7 @@ int main() { // CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK4-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK4-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -5458,7 +5458,7 @@ int main() { // CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE6:%.*]] // CHECK4: omp_if.then: @@ -5473,13 +5473,13 @@ int main() { // CHECK4-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK4-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !35 // CHECK4-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK4-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 // CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !35 // CHECK4-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !35 -// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !35 // CHECK4-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then5: @@ -5514,13 +5514,13 @@ int main() { // CHECK4-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 // CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK4-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 // CHECK4-NEXT: [[CONV12:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED11]] to i8* // CHECK4-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL10]] to i8 // CHECK4-NEXT: store i8 [[FROMBOOL13]], i8* [[CONV12]], align 1 // CHECK4-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED11]], align 8 -// CHECK4-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP26]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK4: omp_if.then15: @@ -5588,7 +5588,7 @@ int main() { // CHECK4-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: @@ -5716,7 +5716,7 @@ int main() { // CHECK4-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: @@ -6191,7 +6191,7 @@ int main() { // CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK4-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK4-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -6254,7 +6254,7 @@ int main() { // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !55 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !55 +// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !55 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: @@ -8268,7 +8268,7 @@ int main() { // CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK9-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -8331,7 +8331,7 @@ int main() { // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !42 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !42 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: @@ -8822,7 +8822,7 @@ int main() { // CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK9-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK9-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -8885,7 +8885,7 @@ int main() { // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !60 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !60 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: @@ -9717,7 +9717,7 @@ int main() { // CHECK10-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK10-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK10-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -9780,7 +9780,7 @@ int main() { // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !42 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !42 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: @@ -10271,7 +10271,7 @@ int main() { // CHECK10-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK10-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK10-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -10334,7 +10334,7 @@ int main() { // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !60 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !60 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !60 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: @@ -11166,7 +11166,7 @@ int main() { // CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK11-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -11221,7 +11221,7 @@ int main() { // CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE6:%.*]] // CHECK11: omp_if.then: @@ -11236,13 +11236,13 @@ int main() { // CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !39 // CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 // CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !39 // CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !39 -// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !39 // CHECK11-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then5: @@ -11277,13 +11277,13 @@ int main() { // CHECK11-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 // CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK11-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 // CHECK11-NEXT: [[CONV12:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED11]] to i8* // CHECK11-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL10]] to i8 // CHECK11-NEXT: store i8 [[FROMBOOL13]], i8* [[CONV12]], align 1 // CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED11]], align 8 -// CHECK11-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP26]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK11: omp_if.then15: @@ -11351,7 +11351,7 @@ int main() { // CHECK11-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: @@ -11479,7 +11479,7 @@ int main() { // CHECK11-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: @@ -11954,7 +11954,7 @@ int main() { // CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK11-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -12017,7 +12017,7 @@ int main() { // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !59 +// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !59 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: @@ -12849,7 +12849,7 @@ int main() { // CHECK12-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK12-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK12-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK12-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK12-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -12904,7 +12904,7 @@ int main() { // CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP5]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE6:%.*]] // CHECK12: omp_if.then: @@ -12919,13 +12919,13 @@ int main() { // CHECK12-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK12-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !39 // CHECK12-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK12-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK12-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 // CHECK12-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !39 // CHECK12-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !39 -// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !39 // CHECK12-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then5: @@ -12960,13 +12960,13 @@ int main() { // CHECK12-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 // CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: [[TMP23:%.*]] = zext i32 [[TMP22]] to i64 -// CHECK12-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP24:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL10:%.*]] = trunc i8 [[TMP24]] to i1 // CHECK12-NEXT: [[CONV12:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED11]] to i8* // CHECK12-NEXT: [[FROMBOOL13:%.*]] = zext i1 [[TOBOOL10]] to i8 // CHECK12-NEXT: store i8 [[FROMBOOL13]], i8* [[CONV12]], align 1 // CHECK12-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED11]], align 8 -// CHECK12-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP26:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL14:%.*]] = trunc i8 [[TMP26]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL14]], label [[OMP_IF_THEN15:%.*]], label [[OMP_IF_ELSE16:%.*]] // CHECK12: omp_if.then15: @@ -13034,7 +13034,7 @@ int main() { // CHECK12-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: @@ -13162,7 +13162,7 @@ int main() { // CHECK12-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP2:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP2]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: @@ -13637,7 +13637,7 @@ int main() { // CHECK12-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK12-NEXT: store i64 [[ARG]], i64* [[ARG_ADDR]], align 8 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i64* [[ARG_ADDR]] to i32* -// CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK12-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK12-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK12-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -13700,7 +13700,7 @@ int main() { // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 // CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !59 +// CHECK12-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !59 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp index 935fa2fdc3a1..59171d40b5d9 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp @@ -362,7 +362,7 @@ int main() { // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK1-NEXT: store i8 [[TMP0]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* @@ -416,7 +416,7 @@ int main() { // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !18 // CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !18 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 @@ -1189,7 +1189,7 @@ int main() { // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !42 // CHECK1-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !42 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 @@ -1598,7 +1598,7 @@ int main() { // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK2-NEXT: store i8 [[TMP0]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK2-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* @@ -1652,7 +1652,7 @@ int main() { // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !18 +// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !18 // CHECK2-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !18 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 @@ -2425,7 +2425,7 @@ int main() { // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !42 +// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !42 // CHECK2-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !42 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 @@ -3532,7 +3532,7 @@ int main() { // CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK5-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK5-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK5-NEXT: store i8 [[TMP0]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* @@ -3586,7 +3586,7 @@ int main() { // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !18 // CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !18 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 @@ -4350,7 +4350,7 @@ int main() { // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !42 +// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !42 // CHECK5-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK5-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !42 // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 @@ -4768,7 +4768,7 @@ int main() { // CHECK6-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK6-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* -// CHECK6-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK6-NEXT: store i8 [[TMP0]], i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK6-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 // CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* @@ -4822,7 +4822,7 @@ int main() { // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !18 // CHECK6-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !18 // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 @@ -5586,7 +5586,7 @@ int main() { // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !42 +// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 1, !llvm.access.group !42 // CHECK6-NEXT: [[TMP8:%.*]] = sext i8 [[TMP7]] to i32 // CHECK6-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]], i32 [[TMP8]]), !llvm.access.group !42 // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp index c62b4386594f..7d2b99806b86 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp @@ -9621,7 +9621,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -9678,7 +9678,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -9709,7 +9709,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 @@ -10136,7 +10136,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK13-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -10222,7 +10222,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 @@ -10313,7 +10313,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -10860,7 +10860,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -10922,7 +10922,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !58 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 // CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 @@ -10982,7 +10982,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK13-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -11230,7 +11230,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK13-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK13-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK13-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -11292,7 +11292,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !70 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 // CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 @@ -11352,7 +11352,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -12280,7 +12280,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -12337,7 +12337,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -12368,7 +12368,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 @@ -12795,7 +12795,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -12881,7 +12881,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 @@ -12972,7 +12972,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -13519,7 +13519,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -13581,7 +13581,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !58 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 // CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 @@ -13641,7 +13641,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -13889,7 +13889,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK14-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -13951,7 +13951,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !70 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 // CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 @@ -14011,7 +14011,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -14939,7 +14939,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK15-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -14996,7 +14996,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK15-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -15027,7 +15027,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK15-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !28 // CHECK15-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK15-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !28 // CHECK15-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 @@ -15454,7 +15454,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK15-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -15540,7 +15540,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK15-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !40 // CHECK15-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK15-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4, !llvm.access.group !40 // CHECK15-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 @@ -15631,7 +15631,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -16178,7 +16178,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK15-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -16240,7 +16240,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK15-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !58 // CHECK15-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK15-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 // CHECK15-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 @@ -16300,7 +16300,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK15-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -16548,7 +16548,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK15-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK15-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -16610,7 +16610,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK15-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !70 // CHECK15-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK15-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 // CHECK15-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 @@ -16670,7 +16670,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -17598,7 +17598,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK16-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK16-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK16-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK16-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -17655,7 +17655,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 // CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK16-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -17686,7 +17686,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 // CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK16-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !28 // CHECK16-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK16-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !28 // CHECK16-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 @@ -18113,7 +18113,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK16-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK16-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK16-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK16-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -18199,7 +18199,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 // CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK16-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !40 // CHECK16-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK16-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4, !llvm.access.group !40 // CHECK16-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 @@ -18290,7 +18290,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 @@ -18837,7 +18837,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK16-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK16-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK16-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -18899,7 +18899,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK16-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !58 // CHECK16-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK16-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 // CHECK16-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 @@ -18959,7 +18959,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK16-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 // CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP3]]) @@ -19207,7 +19207,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store [10 x i32]* [[A]], [10 x i32]** [[A_ADDR]], align 8 // CHECK16-NEXT: [[CONV:%.*]] = bitcast i64* [[M_ADDR]] to i32* // CHECK16-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK16-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -19269,7 +19269,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 // CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK16-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !70 // CHECK16-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK16-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 // CHECK16-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 @@ -19329,7 +19329,7 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 diff --git a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp index 7e95a42396ea..245bcc0af26c 100644 --- a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp @@ -380,8 +380,8 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[I_ADDR]] to i32* // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]], i32* [[CONV3]], [100 x i32]* [[TMP1]]) // CHECK1-NEXT: ret void @@ -789,8 +789,8 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[TMP1:%.*]] = load [100 x i32]*, [100 x i32]** [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[I_ADDR]] to i32* // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, [100 x i32]*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[CONV2]], i32* [[CONV3]], [100 x i32]* [[TMP1]]) // CHECK2-NEXT: ret void @@ -4329,7 +4329,7 @@ int main (int argc, char **argv) { // CHECK21-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK21-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK21-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK21-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: @@ -4551,7 +4551,7 @@ int main (int argc, char **argv) { // CHECK22-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK22-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK22-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK22-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK22-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK22-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK22: omp_if.then: @@ -4773,7 +4773,7 @@ int main (int argc, char **argv) { // CHECK23-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK23-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK23-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK23-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: @@ -4993,7 +4993,7 @@ int main (int argc, char **argv) { // CHECK24-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK24-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK24-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK24-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK24-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP6]] to i1 // CHECK24-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK24: omp_if.then: @@ -5991,8 +5991,8 @@ int main (int argc, char **argv) { // CHECK33-NEXT: [[CONV:%.*]] = bitcast i64* [[TE_ADDR]] to i32* // CHECK33-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK33-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK33-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK33-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK33-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK33-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK33-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK33-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) // CHECK33-NEXT: ret void @@ -6369,8 +6369,8 @@ int main (int argc, char **argv) { // CHECK34-NEXT: [[CONV:%.*]] = bitcast i64* [[TE_ADDR]] to i32* // CHECK34-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK34-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK34-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK34-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK34-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK34-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK34-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK34-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) // CHECK34-NEXT: ret void @@ -7321,7 +7321,7 @@ int main (int argc, char **argv) { // CHECK37-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK37-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK37-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK37-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK37-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK37-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK37-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK37-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -7399,7 +7399,7 @@ int main (int argc, char **argv) { // CHECK37-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK37-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK37-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK37-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK37-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK37-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK37-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK37: omp_if.then: @@ -7552,8 +7552,8 @@ int main (int argc, char **argv) { // CHECK37-NEXT: [[CONV:%.*]] = bitcast i64* [[TE_ADDR]] to i32* // CHECK37-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK37-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK37-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK37-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK37-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK37-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK37-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK37-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) // CHECK37-NEXT: ret void @@ -7764,7 +7764,7 @@ int main (int argc, char **argv) { // CHECK38-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK38-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK38-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK38-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK38-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK38-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0 // CHECK38-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL]] to i8 // CHECK38-NEXT: store i8 [[FROMBOOL]], i8* [[DOTCAPTURE_EXPR_]], align 1 @@ -7842,7 +7842,7 @@ int main (int argc, char **argv) { // CHECK38-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK38-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK38-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK38-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK38-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK38-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK38-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK38: omp_if.then: @@ -7995,8 +7995,8 @@ int main (int argc, char **argv) { // CHECK38-NEXT: [[CONV:%.*]] = bitcast i64* [[TE_ADDR]] to i32* // CHECK38-NEXT: [[CONV1:%.*]] = bitcast i64* [[TH_ADDR]] to i32* // CHECK38-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[A_ADDR]], align 8 -// CHECK38-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK38-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK38-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK38-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK38-NEXT: call void @__kmpc_push_num_teams(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]], i32 [[TMP2]], i32 [[TMP3]]) // CHECK38-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [10 x i32]*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), [10 x i32]* [[TMP1]]) // CHECK38-NEXT: ret void @@ -8281,7 +8281,7 @@ int main (int argc, char **argv) { // CHECK39-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK39-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK39-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK39-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK39-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK39-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK39-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK39: omp_if.then: @@ -8713,7 +8713,7 @@ int main (int argc, char **argv) { // CHECK40-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 // CHECK40-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK40-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK40-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK40-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 1 // CHECK40-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK40-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK40: omp_if.then: diff --git a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp index fbf312e5e312..ec700b36af04 100644 --- a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp @@ -2571,7 +2571,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK9-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -2627,7 +2627,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK9-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) @@ -3540,7 +3540,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* // CHECK10-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* @@ -3596,7 +3596,7 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 // CHECK10-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[TMP7]]) diff --git a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp index 36d65ef6e984..e04322d0eaaf 100644 --- a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp @@ -348,11 +348,11 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -449,7 +449,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -461,9 +461,9 @@ int main() { // CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR5]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !5 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -706,7 +706,7 @@ int main() { // CHECK1-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK1-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -806,7 +806,7 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -1154,11 +1154,11 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -1255,7 +1255,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -1267,9 +1267,9 @@ int main() { // CHECK2-NEXT: [[TMP19:%.*]] = bitcast %struct.S* [[VAR5]] to i8* // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP18]], i8* align 4 [[TMP19]], i64 4, i1 false), !llvm.access.group !5 // CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[CONV1]], align 4, !llvm.access.group !5 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1512,7 +1512,7 @@ int main() { // CHECK2-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 // CHECK2-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -1612,7 +1612,7 @@ int main() { // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 [[IDXPROM]] @@ -4778,7 +4778,7 @@ int main() { // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK9-NEXT: store i32* [[CONV2]], i32** [[TMP]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -4787,7 +4787,7 @@ int main() { // CHECK9-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -4852,10 +4852,10 @@ int main() { // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: store i32 1, i32* [[CONV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 // CHECK9-NEXT: store volatile i32 1, i32* [[TMP8]], align 4, !llvm.access.group !4 -// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 8, !llvm.access.group !4 +// CHECK9-NEXT: store i32 2, i32* [[CONV2]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK9-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8, !llvm.access.group !4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -5042,7 +5042,7 @@ int main() { // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[G1_ADDR]] to i32* // CHECK10-NEXT: store i32* [[CONV2]], i32** [[TMP]], align 8 -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[G_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[G_CASTED]], align 8 @@ -5051,7 +5051,7 @@ int main() { // CHECK10-NEXT: [[CONV4:%.*]] = bitcast i64* [[G1_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV4]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[G1_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV5:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP5]], i32* [[CONV5]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -5116,10 +5116,10 @@ int main() { // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 -// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: store i32 1, i32* [[CONV]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP]], align 8, !llvm.access.group !4 // CHECK10-NEXT: store volatile i32 1, i32* [[TMP8]], align 4, !llvm.access.group !4 -// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 8, !llvm.access.group !4 +// CHECK10-NEXT: store i32 2, i32* [[CONV2]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK10-NEXT: store i32* [[CONV]], i32** [[TMP9]], align 8, !llvm.access.group !4 // CHECK10-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 diff --git a/clang/test/OpenMP/teams_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_firstprivate_codegen.cpp index 7ec4418794e3..1df54320269a 100644 --- a/clang/test/OpenMP/teams_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_firstprivate_codegen.cpp @@ -180,7 +180,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 128 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[G1]], align 128 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -206,7 +206,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 128 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[G1]], align 128 // CHECK1-NEXT: store i32 1, i32* [[G1]], align 128 -// CHECK1-NEXT: store i32 2, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 2, i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK1-NEXT: store i32* [[G1]], i32** [[TMP2]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -245,7 +245,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 128 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[G1]], align 128 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -271,7 +271,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 128 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[G1]], align 128 // CHECK2-NEXT: store i32 1, i32* [[G1]], align 128 -// CHECK2-NEXT: store i32 2, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 2, i32* [[CONV]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 // CHECK2-NEXT: store i32* [[G1]], i32** [[TMP2]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 @@ -578,11 +578,11 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK9-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK9-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -639,14 +639,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) // CHECK9-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S* nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* [[AGG_TMP6]]) // CHECK9-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 0 // CHECK9-NEXT: store i32 [[TMP7]], i32* [[ARRAYIDX]], align 4 // CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i64 0, i64 0 // CHECK9-NEXT: [[TMP8:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK9-NEXT: [[TMP9:%.*]] = bitcast %struct.S* [[VAR5]] to i8* // CHECK9-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 4, i1 false) -// CHECK9-NEXT: store i32 2, i32* [[CONV1]], align 8 +// CHECK9-NEXT: store i32 2, i32* [[CONV1]], align 4 // CHECK9-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 @@ -711,7 +711,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK9-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK9-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK9-NEXT: [[TMP1:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 @@ -1309,11 +1309,11 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK10-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK10-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[SIVAR_ADDR]] to i32* -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP3]], i32* [[CONV2]], align 4 // CHECK10-NEXT: [[TMP4:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV1]], align 4 // CHECK10-NEXT: [[CONV3:%.*]] = bitcast i64* [[SIVAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK10-NEXT: [[TMP6:%.*]] = load i64, i64* [[SIVAR_CASTED]], align 8 @@ -1370,14 +1370,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK10-NEXT: call void @_ZN2StC1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) // CHECK10-NEXT: call void @_ZN1SIfEC1ERKS0_2St(%struct.S* nonnull align 4 dereferenceable(4) [[VAR5]], %struct.S* nonnull align 4 dereferenceable(4) [[TMP2]], %struct.St* [[AGG_TMP6]]) // CHECK10-NEXT: call void @_ZN2StD1Ev(%struct.St* nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC2]], i64 0, i64 0 // CHECK10-NEXT: store i32 [[TMP7]], i32* [[ARRAYIDX]], align 4 // CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i64 0, i64 0 // CHECK10-NEXT: [[TMP8:%.*]] = bitcast %struct.S* [[ARRAYIDX7]] to i8* // CHECK10-NEXT: [[TMP9:%.*]] = bitcast %struct.S* [[VAR5]] to i8* // CHECK10-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 4, i1 false) -// CHECK10-NEXT: store i32 2, i32* [[CONV1]], align 8 +// CHECK10-NEXT: store i32 2, i32* [[CONV1]], align 4 // CHECK10-NEXT: call void @_ZN1SIfED1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] // CHECK10-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR3]], i32 0, i32 0 // CHECK10-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN8]], i64 2 @@ -1442,7 +1442,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK10-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK10-NEXT: [[CONV1:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* // CHECK10-NEXT: store i32 [[TMP0]], i32* [[CONV1]], align 4 // CHECK10-NEXT: [[TMP1:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 diff --git a/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs-regex.c.expected b/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs-regex.c.expected index 03e0f8ef60e0..7cc6b0dae4d4 100644 --- a/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs-regex.c.expected +++ b/clang/test/utils/update_cc_test_checks/Inputs/generated-funcs-regex.c.expected @@ -32,7 +32,7 @@ void foo(int a) // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 // CHECK-NEXT: store i64 [[A:%.*]], i64* [[A_ADDR]], align 8 // CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[CONV]], align 4 // CHECK-NEXT: call void @use(i32 [[TMP0]]) // CHECK-NEXT: ret void // From 510d0e493365386b9eb082e91d72b62fe716d32e Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 22 Dec 2021 11:27:44 +0100 Subject: [PATCH 123/293] [IR] Add memory attributes for init.trampoline Based on my reading of https://llvm.org/docs/LangRef.html#llvm-init-trampoline-intrinsic, init.trampoline writes to the first argument, while the other two are readnone. These two arguments are only captured and written into the trampoline memory. This also matches what I see in the X86TargetLowering::LowerINIT_TRAMPOLINE() implementation. Differential Revision: https://reviews.llvm.org/D116149 --- llvm/include/llvm/IR/Intrinsics.td | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 138770030ebb..acea17bc7ab8 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1019,7 +1019,10 @@ def int_codeview_annotation : DefaultAttrsIntrinsic<[], [llvm_metadata_ty], // def int_init_trampoline : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], - [IntrArgMemOnly, NoCapture>]>, + [IntrArgMemOnly, NoCapture>, + WriteOnly>, + ReadNone>, + ReadNone>]>, GCCBuiltin<"__builtin_init_trampoline">; def int_adjust_trampoline : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], From 55861c9681dd194ca08ccea8afaa5c3600f64e7b Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 23 Dec 2021 09:25:26 +0100 Subject: [PATCH 124/293] [IR] Use default attributes for trampoline intrinsics These intrinsics do not free, synchronize or diverge. --- llvm/include/llvm/IR/Intrinsics.td | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index acea17bc7ab8..da580de3dbd3 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1017,17 +1017,15 @@ def int_codeview_annotation : DefaultAttrsIntrinsic<[], [llvm_metadata_ty], //===------------------------ Trampoline Intrinsics -----------------------===// // -def int_init_trampoline : Intrinsic<[], - [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], - [IntrArgMemOnly, NoCapture>, - WriteOnly>, - ReadNone>, - ReadNone>]>, - GCCBuiltin<"__builtin_init_trampoline">; - -def int_adjust_trampoline : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], - [IntrReadMem, IntrArgMemOnly]>, - GCCBuiltin<"__builtin_adjust_trampoline">; +def int_init_trampoline : DefaultAttrsIntrinsic< + [], [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty], + [IntrArgMemOnly, NoCapture>, WriteOnly>, + ReadNone>, ReadNone>]>, + GCCBuiltin<"__builtin_init_trampoline">; + +def int_adjust_trampoline : DefaultAttrsIntrinsic< + [llvm_ptr_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>, + GCCBuiltin<"__builtin_adjust_trampoline">; //===------------------------ Overflow Intrinsics -------------------------===// // From f8042492fe2aa5f13457cb6eda2dc940ad7dc2e9 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 23 Dec 2021 09:31:44 +0100 Subject: [PATCH 125/293] [DSE] Regenerate test checks (NFC) --- .../DeadStoreElimination/invariant.start.ll | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/llvm/test/Transforms/DeadStoreElimination/invariant.start.ll b/llvm/test/Transforms/DeadStoreElimination/invariant.start.ll index 27400cd4ed16..f00001f967e6 100644 --- a/llvm/test/Transforms/DeadStoreElimination/invariant.start.ll +++ b/llvm/test/Transforms/DeadStoreElimination/invariant.start.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; Test to make sure llvm.invariant.start calls are not treated as clobbers. ; RUN: opt < %s -basic-aa -dse -S | FileCheck %s @@ -6,29 +7,31 @@ declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly ; We cannot remove the store 1 to %p. ; FIXME: By the semantics of invariant.start, the store 3 to p is unreachable. define void @test(i8 *%p) { +; CHECK-LABEL: @test( +; CHECK-NEXT: store i8 1, i8* [[P:%.*]], align 4 +; CHECK-NEXT: [[I:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 1, i8* [[P]]) +; CHECK-NEXT: store i8 3, i8* [[P]], align 4 +; CHECK-NEXT: ret void +; store i8 1, i8* %p, align 4 %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %p) store i8 3, i8* %p, align 4 ret void -; CHECK-LABEL: @test( -; CHECK-NEXT: store i8 1, i8* %p, align 4 -; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %p) -; CHECK-NEXT: store i8 3, i8* %p, align 4 -; CHECK-NEXT: ret void } ; FIXME: We should be able to remove the first store to p, even though p and q ; may alias. define void @test2(i8* %p, i8* %q) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: store i8 1, i8* [[P:%.*]], align 4 +; CHECK-NEXT: store i8 2, i8* [[Q:%.*]], align 4 +; CHECK-NEXT: [[I:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 1, i8* [[Q]]) +; CHECK-NEXT: store i8 3, i8* [[P]], align 4 +; CHECK-NEXT: ret void +; store i8 1, i8* %p, align 4 store i8 2, i8* %q, align 4 %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %q) store i8 3, i8* %p, align 4 ret void -; CHECK-LABEL: @test2( -; CHECK-NEXT: store i8 1, i8* %p, align 4 -; CHECK-NEXT: store i8 2, i8* %q, align 4 -; CHECK-NEXT: %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %q) -; CHECK-NEXT: store i8 3, i8* %p, align 4 -; CHECK-NEXT: ret void } From 58ad3428d1d455d89866661461cf4cd272a599ab Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 23 Dec 2021 09:44:01 +0100 Subject: [PATCH 126/293] [DSE] Add test for matrix store (NFC) --- .../Transforms/DeadStoreElimination/simple.ll | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/llvm/test/Transforms/DeadStoreElimination/simple.ll b/llvm/test/Transforms/DeadStoreElimination/simple.ll index 361b243f5121..eed7d535857d 100644 --- a/llvm/test/Transforms/DeadStoreElimination/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/simple.ll @@ -8,6 +8,7 @@ declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind declare void @llvm.init.trampoline(i8*, i8*, i8*) +declare void @llvm.matrix.column.major.store(<6 x float>, float*, i64, i1, i32, i32) define void @test1(i32* %Q, i32* %P) { ; CHECK-LABEL: @test1( @@ -193,6 +194,20 @@ define void @test11() { ret void } +; TODO: Specialized store intrinsics should be removed if dead. +define void @test_matrix_store(i64 %stride) { +; CHECK-LABEL: @test_matrix_store( +; CHECK-NEXT: [[A:%.*]] = alloca [6 x float], align 4 +; CHECK-NEXT: [[CAST:%.*]] = bitcast [6 x float]* [[A]] to float* +; CHECK-NEXT: call void @llvm.matrix.column.major.store.v6f32.i64(<6 x float> zeroinitializer, float* [[CAST]], i64 [[STRIDE:%.*]], i1 false, i32 3, i32 2) +; CHECK-NEXT: ret void +; + %a = alloca [6 x float] + %cast = bitcast [6 x float]* %a to float* + call void @llvm.matrix.column.major.store(<6 x float> zeroinitializer, float* %cast, i64 %stride, i1 false, i32 3, i32 2) + ret void +} + ; %P doesn't escape, the DEAD instructions should be removed. declare void @test13f() define i32* @test13() { @@ -312,7 +327,7 @@ define noalias i8* @test23() nounwind uwtable ssp { ; CHECK-NEXT: store i8 97, i8* [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x i8], [2 x i8]* [[X]], i64 0, i64 1 ; CHECK-NEXT: store i8 0, i8* [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CALL:%.*]] = call i8* @strdup(i8* [[ARRAYIDX]]) [[ATTR3:#.*]] +; CHECK-NEXT: [[CALL:%.*]] = call i8* @strdup(i8* [[ARRAYIDX]]) #[[ATTR5:[0-9]+]] ; CHECK-NEXT: ret i8* [[CALL]] ; %x = alloca [2 x i8], align 1 @@ -350,7 +365,7 @@ define i8* @test25(i8* %p) nounwind { ; CHECK-NEXT: [[P_4:%.*]] = getelementptr i8, i8* [[P:%.*]], i64 4 ; CHECK-NEXT: [[TMP:%.*]] = load i8, i8* [[P_4]], align 1 ; CHECK-NEXT: store i8 0, i8* [[P_4]], align 1 -; CHECK-NEXT: [[Q:%.*]] = call i8* @strdup(i8* [[P]]) [[ATTR6:#.*]] +; CHECK-NEXT: [[Q:%.*]] = call i8* @strdup(i8* [[P]]) #[[ATTR10:[0-9]+]] ; CHECK-NEXT: store i8 [[TMP]], i8* [[P_4]], align 1 ; CHECK-NEXT: ret i8* [[Q]] ; From 5d0be553fafefa775518df934916b2703e353599 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 23 Dec 2021 00:59:29 -0800 Subject: [PATCH 127/293] [ELF] Optimize copyLocalSymbols. NFC --- lld/ELF/Writer.cpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 653a2266b531..b593a2950e53 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -674,9 +674,6 @@ static bool shouldKeepInSymtab(const Defined &sym) { } static bool includeInSymtab(const Symbol &b) { - if (!b.isLocal() && !b.isUsedInRegularObj) - return false; - if (auto *d = dyn_cast(&b)) { // Always include absolute symbols. SectionBase *sec = d->section; @@ -712,11 +709,8 @@ template void Writer::copyLocalSymbols() { // No reason to keep local undefined symbol in symtab. if (!dr) continue; - if (!includeInSymtab(*b)) - continue; - if (!shouldKeepInSymtab(*dr)) - continue; - in.symTab->addSymbol(b); + if (includeInSymtab(*b) && shouldKeepInSymtab(*dr)) + in.symTab->addSymbol(b); } } } @@ -1985,7 +1979,7 @@ template void Writer::finalizeSections() { // Now that we have defined all possible global symbols including linker- // synthesized ones. Visit all symbols to give the finishing touches. for (Symbol *sym : symtab->symbols()) { - if (!includeInSymtab(*sym)) + if (!sym->isUsedInRegularObj || !includeInSymtab(*sym)) continue; if (in.symTab) in.symTab->addSymbol(sym); From f9607d45f399e2afc39ec16222ea68b4e0831564 Mon Sep 17 00:00:00 2001 From: Muhammad Omair Javaid Date: Thu, 23 Dec 2021 14:08:37 +0500 Subject: [PATCH 128/293] Revert "Revert "[DwarfDebug] Support emitting function-local declaration for a lexical block" & dependent patches" This has broke following LLDB buildbots: https://lab.llvm.org/buildbot/#/builders/17/builds/14984 https://lab.llvm.org/buildbot/#/builders/96/builds/15928 https://lab.llvm.org/buildbot/#/builders/68/builds/23600 This reverts commit 62a6b9e9ab3eb778111e90a34fee1e6a7c64db8a. --- .../CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 132 ++++-- .../lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 41 +- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 60 ++- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h | 12 +- llvm/lib/CodeGen/AsmPrinter/DwarfFile.h | 7 +- llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 2 +- llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h | 2 +- .../Generic/import-inlined-declaration.ll | 72 +++ .../Generic/imported-name-inlined.ll | 10 +- .../DebugInfo/Generic/inlined-local-type.ll | 125 ++++++ .../DebugInfo/Generic/inlined-static-var.ll | 92 ++++ .../DebugInfo/Generic/lexical_block_static.ll | 141 ++++++ .../DebugInfo/Generic/lexical_block_types.ll | 421 ++++++++++++++++++ llvm/test/DebugInfo/Generic/namespace.ll | 33 +- 14 files changed, 1043 insertions(+), 107 deletions(-) create mode 100644 llvm/test/DebugInfo/Generic/import-inlined-declaration.ll create mode 100644 llvm/test/DebugInfo/Generic/inlined-local-type.ll create mode 100644 llvm/test/DebugInfo/Generic/inlined-static-var.ll create mode 100644 llvm/test/DebugInfo/Generic/lexical_block_static.ll create mode 100644 llvm/test/DebugInfo/Generic/lexical_block_types.ll diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 9b73f0ab2f05..cf8c0c03772e 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -551,8 +551,18 @@ void DwarfCompileUnit::constructScopeDIE(LexicalScope *Scope, // Emit lexical blocks. DIE *ScopeDIE = constructLexicalScopeDIE(Scope); assert(ScopeDIE && "Scope DIE should not be null."); - ParentScopeDIE.addChild(ScopeDIE); + + // Track abstract and concrete lexical block scopes. + if (Scope->isAbstractScope()) { + assert(!getAbstractScopeDIEs().count(DS) && + "Abstract DIE for this scope exists!"); + getAbstractScopeDIEs()[DS] = ScopeDIE; + } else if (!Scope->getInlinedAt()) { + assert(!LocalScopeDIEs.count(DS) && "Concrete DIE for this scope exists!"); + LocalScopeDIEs[DS] = ScopeDIE; + } + createAndAddScopeChildren(Scope, *ScopeDIE); } @@ -646,7 +656,7 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { auto *InlinedSP = getDISubprogram(DS); // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram // was inlined from another compile unit. - DIE *OriginDIE = getAbstractSPDies()[InlinedSP]; + DIE *OriginDIE = getAbstractScopeDIEs()[InlinedSP]; assert(OriginDIE && "Unable to find original DIE for an inlined subprogram."); auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_inlined_subroutine); @@ -1010,6 +1020,12 @@ DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, if (Scope) { assert(!Scope->getInlinedAt()); assert(!Scope->isAbstractScope()); + + // Remember the subrogram before creating child entities. + assert(!LocalScopeDIEs.count(Sub) && + "Concrete DIE for the subprogram exists!"); + LocalScopeDIEs[Sub] = &ScopeDIE; + // Collect lexical scope children first. // ObjectPointer might be a local (non-argument) local variable if it's a // block's synthetic this pointer. @@ -1045,27 +1061,18 @@ DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope, for (DbgVariable *DV : Locals) ScopeDIE.addChild(constructVariableDIE(*DV, *Scope, ObjectPointer)); - // Emit imported entities (skipped in gmlt-like data). - if (!includeMinimalInlineScopes()) { - for (const auto *IE : ImportedEntities[Scope->getScopeNode()]) - ScopeDIE.addChild(constructImportedEntityDIE(cast(IE))); - } - // Emit labels. for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope)) ScopeDIE.addChild(constructLabelDIE(*DL, *Scope)); // Emit inner lexical scopes. - auto needToEmitLexicalScope = [this](LexicalScope *LS) { + auto needToEmitLexicalScope = [this](LexicalScope *LS) -> bool { if (isa(LS->getScopeNode())) return true; auto Vars = DU->getScopeVariables().lookup(LS); if (!Vars.Args.empty() || !Vars.Locals.empty()) return true; - if (!includeMinimalInlineScopes() && - !ImportedEntities[LS->getScopeNode()].empty()) - return true; - return false; + return LocalScopesWithLocalDecls.count(LS->getScopeNode()); }; for (LexicalScope *LS : Scope->getChildren()) { // If the lexical block doesn't have non-scope children, skip @@ -1081,11 +1088,10 @@ DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope, void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( LexicalScope *Scope) { - DIE *&AbsDef = getAbstractSPDies()[Scope->getScopeNode()]; - if (AbsDef) - return; auto *SP = cast(Scope->getScopeNode()); + if (getAbstractScopeDIEs().count(SP)) + return; DIE *ContextDIE; DwarfCompileUnit *ContextCU = this; @@ -1109,14 +1115,19 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( // Passing null as the associated node because the abstract definition // shouldn't be found by lookup. - AbsDef = &ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, nullptr); - ContextCU->applySubprogramAttributesToDefinition(SP, *AbsDef); - ContextCU->addSInt(*AbsDef, dwarf::DW_AT_inline, + DIE &AbsDef = ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram, + *ContextDIE, nullptr); + + // Store the DIE before creating children. + getAbstractScopeDIEs()[SP] = &AbsDef; + + ContextCU->applySubprogramAttributesToDefinition(SP, AbsDef); + ContextCU->addSInt(AbsDef, dwarf::DW_AT_inline, DD->getDwarfVersion() <= 4 ? Optional() : dwarf::DW_FORM_implicit_const, dwarf::DW_INL_inlined); - if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, *AbsDef)) - ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer); + if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, AbsDef)) + ContextCU->addDIEEntry(AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer); } bool DwarfCompileUnit::useGNUAnalogForDwarf5Feature() const { @@ -1250,47 +1261,61 @@ void DwarfCompileUnit::constructCallSiteParmEntryDIEs( } } -DIE *DwarfCompileUnit::constructImportedEntityDIE( - const DIImportedEntity *Module) { - DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag()); - insertDIE(Module, IMDie); +DIE *DwarfCompileUnit::createImportedEntityDIE(const DIImportedEntity *IE) { + DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)IE->getTag()); + insertDIE(IE, IMDie); + DIE *EntityDie; - auto *Entity = Module->getEntity(); + auto *Entity = IE->getEntity(); if (auto *NS = dyn_cast(Entity)) EntityDie = getOrCreateNameSpace(NS); else if (auto *M = dyn_cast(Entity)) EntityDie = getOrCreateModule(M); - else if (auto *SP = dyn_cast(Entity)) - EntityDie = getOrCreateSubprogramDIE(SP); - else if (auto *T = dyn_cast(Entity)) + else if (auto *SP = dyn_cast(Entity)) { + // If we have abstract subprogram created, refer it. + if (auto *AbsSPDie = getAbstractScopeDIEs().lookup(SP)) + EntityDie = AbsSPDie; + else + EntityDie = getOrCreateSubprogramDIE(SP); + } else if (auto *T = dyn_cast(Entity)) EntityDie = getOrCreateTypeDIE(T); else if (auto *GV = dyn_cast(Entity)) EntityDie = getOrCreateGlobalVariableDIE(GV, {}); else EntityDie = getDIE(Entity); assert(EntityDie); - addSourceLine(*IMDie, Module->getLine(), Module->getFile()); + + addSourceLine(*IMDie, IE->getLine(), IE->getFile()); addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie); - StringRef Name = Module->getName(); + StringRef Name = IE->getName(); if (!Name.empty()) addString(*IMDie, dwarf::DW_AT_name, Name); // This is for imported module with renamed entities (such as variables and // subprograms). - DINodeArray Elements = Module->getElements(); + DINodeArray Elements = IE->getElements(); for (const auto *Element : Elements) { if (!Element) continue; - IMDie->addChild( - constructImportedEntityDIE(cast(Element))); + IMDie->addChild(createImportedEntityDIE(cast(Element))); } - return IMDie; } +void DwarfCompileUnit::createAndAddImportedEntityDIE( + const DIImportedEntity *IE) { + DIE *ContextDIE = getOrCreateContextDIE(IE->getScope()); + assert(ContextDIE && + "Could not get or create scope for the imported entity!"); + if (!ContextDIE) + return; + + ContextDIE->addChild(createImportedEntityDIE(IE)); +} + void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) { DIE *D = getDIE(SP); - if (DIE *AbsSPDIE = getAbstractSPDies().lookup(SP)) { + if (DIE *AbsSPDIE = getAbstractScopeDIEs().lookup(SP)) { if (D) // If this subprogram has an abstract definition, reference that addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE); @@ -1580,3 +1605,38 @@ void DwarfCompileUnit::createBaseTypeDIEs() { Btr.Die = &Die; } } + +static DIE * +findLocalScopeDIE(const DILocalScope *LS, + DenseMap &ScopeDIEs) { + DIE *ScopeDIE = ScopeDIEs.lookup(LS); + if (isa(LS) && !ScopeDIE) + return nullptr; + if (!ScopeDIE) + return findLocalScopeDIE(cast(LS->getScope()), ScopeDIEs); + return ScopeDIE; +} + +DIE *DwarfCompileUnit::findLocalScopeDIE(const DIScope *S) { + auto *LScope = dyn_cast_or_null(S); + if (!LScope) + return nullptr; + + // Check if we have an abstract tree. + if (getAbstractScopeDIEs().count(LScope->getSubprogram())) + return ::findLocalScopeDIE(LScope, getAbstractScopeDIEs()); + + return ::findLocalScopeDIE(LScope, LocalScopeDIEs); +} + +DIE *DwarfCompileUnit::getOrCreateContextDIE(const DIScope *Context) { + if (auto *LScope = dyn_cast_or_null(Context)) { + if (DIE *ScopeDIE = findLocalScopeDIE(LScope)) + return ScopeDIE; + + // If nothing was found, fall back to DISubprogram and let + // DwarfUnit::getOrCreateContextDIE() create a new DIE for it. + Context = LScope->getSubprogram(); + } + return DwarfUnit::getOrCreateContextDIE(Context); +} diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index fb03982b5e4a..251bc4bea68b 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -62,11 +62,6 @@ class DwarfCompileUnit final : public DwarfUnit { /// The start of the unit macro info within macro section. MCSymbol *MacroLabelBegin; - using ImportedEntityList = SmallVector; - using ImportedEntityMap = DenseMap; - - ImportedEntityMap ImportedEntities; - /// GlobalNames - A map of globally visible named entities for this unit. StringMap GlobalNames; @@ -80,9 +75,14 @@ class DwarfCompileUnit final : public DwarfUnit { // ranges/locs. const MCSymbol *BaseAddress = nullptr; - DenseMap AbstractSPDies; + DenseMap LocalScopeDIEs; + DenseMap AbstractLocalScopeDIEs; DenseMap> AbstractEntities; + /// LocalScopesWithLocalDecls - A list of non-empty local scopes + /// (with declaraions of static locals, function-local types, or imports). + SmallPtrSet LocalScopesWithLocalDecls; + /// DWO ID for correlating skeleton and split units. uint64_t DWOId = 0; @@ -92,10 +92,10 @@ class DwarfCompileUnit final : public DwarfUnit { bool isDwoUnit() const override; - DenseMap &getAbstractSPDies() { + DenseMap &getAbstractScopeDIEs() { if (isDwoUnit() && !DD->shareAcrossDWOCUs()) - return AbstractSPDies; - return DU->getAbstractSPDies(); + return AbstractLocalScopeDIEs; + return DU->getAbstractScopeDIEs(); } DenseMap> &getAbstractEntities() { @@ -169,17 +169,6 @@ class DwarfCompileUnit final : public DwarfUnit { unsigned getOrCreateSourceID(const DIFile *File) override; - void addImportedEntity(const DIImportedEntity* IE) { - DIScope *Scope = IE->getScope(); - assert(Scope && "Invalid Scope encoding!"); - if (!isa(Scope)) - // No need to add imported enities that are not local declaration. - return; - - auto *LocalScope = cast(Scope)->getNonLexicalBlockFileScope(); - ImportedEntities[LocalScope].push_back(IE); - } - /// addRange - Add an address range to the list of ranges for this unit. void addRange(RangeSpan Range); @@ -221,6 +210,9 @@ class DwarfCompileUnit final : public DwarfUnit { void createBaseTypeDIEs(); + DIE *findLocalScopeDIE(const DIScope *S); + DIE *getOrCreateContextDIE(const DIScope *Ty) override; + /// Construct a DIE for this subprogram scope. DIE &constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope); @@ -259,8 +251,9 @@ class DwarfCompileUnit final : public DwarfUnit { void constructCallSiteParmEntryDIEs(DIE &CallSiteDIE, SmallVector &Params); - /// Construct import_module DIE. - DIE *constructImportedEntityDIE(const DIImportedEntity *Module); + /// Construct DIE for an imported entity. + DIE *createImportedEntityDIE(const DIImportedEntity *IE); + void createAndAddImportedEntityDIE(const DIImportedEntity *IE); void finishSubprogramDefinition(const DISubprogram *SP); void finishEntityDefinition(const DbgEntity *Entity); @@ -357,6 +350,10 @@ class DwarfCompileUnit final : public DwarfUnit { bool hasDwarfPubSections() const; void addBaseTypeRef(DIEValueList &Die, int64_t Idx); + + void recordLocalScopeWithDecls(const DILocalScope *S) { + LocalScopesWithLocalDecls.insert(S); + } }; } // end namespace llvm diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 0089753c0408..8f5bc3a07c54 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -516,7 +516,7 @@ void DwarfDebug::addSubprogramNames(const DICompileUnit &CU, // well into the name table. Only do that if we are going to actually emit // that name. if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName() && - (useAllLinkageNames() || InfoHolder.getAbstractSPDies().lookup(SP))) + (useAllLinkageNames() || InfoHolder.getAbstractScopeDIEs().lookup(SP))) addAccelName(CU, SP->getLinkageName(), Die); // If this is an Objective-C selector name add it to the ObjC accelerator @@ -1067,6 +1067,45 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit, } } } + +// Collect local scopes that contain any local declarations +// (excluding local variables) to be sure they will be emitted +// (see DwarfCompileUnit::createAndAddScopeChildren() for details). +static void collectLocalScopesWithDeclsFromCU(const DICompileUnit *CUNode, + DwarfCompileUnit &CU) { + auto getLocalScope = [](const DIScope *S) -> const DILocalScope * { + if (!S) + return nullptr; + if (isa(S)) + S = S->getScope(); + if (const auto *LScope = dyn_cast_or_null(S)) + return LScope->getNonLexicalBlockFileScope(); + return nullptr; + }; + + for (auto *GVE : CUNode->getGlobalVariables()) + if (auto *LScope = getLocalScope(GVE->getVariable()->getScope())) + CU.recordLocalScopeWithDecls(LScope); + + for (auto *Ty : CUNode->getEnumTypes()) + if (auto *LScope = getLocalScope(Ty->getScope())) + CU.recordLocalScopeWithDecls(LScope); + + for (auto *Ty : CUNode->getRetainedTypes()) + if (DIType *RT = dyn_cast(Ty)) + if (auto *LScope = getLocalScope(RT->getScope())) + CU.recordLocalScopeWithDecls(LScope); + + for (auto *IE : CUNode->getImportedEntities()) + if (auto *LScope = getLocalScope(IE->getScope())) + CU.recordLocalScopeWithDecls(LScope); + + // FIXME: We know nothing about local records and typedefs here. + // since nothing but local variables (and members of local records) + // references them. So that they will be emitted in a first available + // parent scope DIE. +} + // Create new DwarfCompileUnit for the given metadata node with tag // DW_TAG_compile_unit. DwarfCompileUnit & @@ -1081,9 +1120,6 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { DwarfCompileUnit &NewCU = *OwnedUnit; InfoHolder.addUnit(std::move(OwnedUnit)); - for (auto *IE : DIUnit->getImportedEntities()) - NewCU.addImportedEntity(IE); - // LTO with assembly output shares a single line table amongst multiple CUs. // To avoid the compilation directory being ambiguous, let the line table // explicitly describe the directory of all files, never relying on the @@ -1103,15 +1139,12 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { CUMap.insert({DIUnit, &NewCU}); CUDieMap.insert({&NewCU.getUnitDie(), &NewCU}); - return NewCU; -} -void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, - const DIImportedEntity *N) { - if (isa(N->getScope())) - return; - if (DIE *D = TheCU.getOrCreateContextDIE(N->getScope())) - D->addChild(TheCU.constructImportedEntityDIE(N)); + // Record local scopes, that have some globals (static locals), + // imports or types declared within. + collectLocalScopesWithDeclsFromCU(DIUnit, NewCU); + + return NewCU; } // Emit all Dwarf sections that should come prior to the content. Create @@ -1369,6 +1402,7 @@ void DwarfDebug::endModule() { assert(CurFn == nullptr); assert(CurMI == nullptr); + // Collect global variables info. DenseMap> GVMap; for (const GlobalVariable &Global : MMI->getModule()->globals()) { @@ -1428,7 +1462,7 @@ void DwarfDebug::endModule() { // Emit imported entities last so that the relevant context // is already available. for (auto *IE : CUNode->getImportedEntities()) - constructAndAddImportedEntityDIE(*CU, IE); + CU->createAndAddImportedEntityDIE(IE); CU->createBaseTypeDIEs(); } diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index ff2589b3c953..79f8423af602 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -316,11 +316,13 @@ class DwarfDebug : public DebugHandlerBase { /// can refer to them in spite of insertions into this list. DebugLocStream DebugLocs; + using SubprogramSetVector = + SetVector, + SmallPtrSet>; + /// This is a collection of subprogram MDNodes that are processed to /// create DIEs. - SetVector, - SmallPtrSet> - ProcessedSPNodes; + SubprogramSetVector ProcessedSPNodes; /// If nonnull, stores the current machine function we're processing. const MachineFunction *CurFn = nullptr; @@ -598,10 +600,6 @@ class DwarfDebug : public DebugHandlerBase { void finishUnitAttributes(const DICompileUnit *DIUnit, DwarfCompileUnit &NewCU); - /// Construct imported_module or imported_declaration DIE. - void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, - const DIImportedEntity *N); - /// Register a source line with debug info. Returns the unique /// label that was emitted and which provides correspondence to the /// source line list. diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h index 79a6ce7801b7..464f4f048016 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -26,6 +26,7 @@ class DbgEntity; class DbgVariable; class DbgLabel; class DINode; +class DILocalScope; class DwarfCompileUnit; class DwarfUnit; class LexicalScope; @@ -87,7 +88,7 @@ class DwarfFile { DenseMap ScopeLabels; // Collection of abstract subprogram DIEs. - DenseMap AbstractSPDies; + DenseMap AbstractLocalScopeDIEs; DenseMap> AbstractEntities; /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can @@ -162,8 +163,8 @@ class DwarfFile { return ScopeLabels; } - DenseMap &getAbstractSPDies() { - return AbstractSPDies; + DenseMap &getAbstractScopeDIEs() { + return AbstractLocalScopeDIEs; } DenseMap> &getAbstractEntities() { diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 6b6d63f14f87..7edf9be92eac 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1219,7 +1219,7 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP, "decl has a linkage name and it is different"); if (DeclLinkageName.empty() && // Always emit it for abstract subprograms. - (DD->useAllLinkageNames() || DU->getAbstractSPDies().lookup(SP))) + (DD->useAllLinkageNames() || DU->getAbstractScopeDIEs().lookup(SP))) addLinkageName(SPDie, LinkageName); if (!DeclDie) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 54b0079dd7ce..1643edf05b7a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -250,7 +250,7 @@ class DwarfUnit : public DIEUnit { DIE *getOrCreateTypeDIE(const MDNode *TyNode); /// Get context owner's DIE. - DIE *getOrCreateContextDIE(const DIScope *Context); + virtual DIE *getOrCreateContextDIE(const DIScope *Context); /// Construct DIEs for types that contain vtables. void constructContainingTypeDIEs(); diff --git a/llvm/test/DebugInfo/Generic/import-inlined-declaration.ll b/llvm/test/DebugInfo/Generic/import-inlined-declaration.ll new file mode 100644 index 000000000000..471526b1395c --- /dev/null +++ b/llvm/test/DebugInfo/Generic/import-inlined-declaration.ll @@ -0,0 +1,72 @@ +; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump - | FileCheck --implicit-check-not "{{DW_TAG|NULL}}" %s + +; namespace ns { +; inline __attribute__((always_inline)) +; void foo() { int a = 4; } +; } +; +; void goo() { +; using ns::foo; +; foo(); +; } + +; Ensure that imported declarations reference the correct subprograms even if +; those subprograms are inlined. + +; CHECK: DW_TAG_compile_unit +; CHECK: DW_TAG_namespace +; CHECK: DW_AT_name ("ns") +; CHECK: [[FOO:0x.*]]: DW_TAG_subprogram +; CHECK: DW_AT_name ("foo") +; CHECK: DW_TAG_variable +; CHECK: NULL +; CHECK: NULL +; CHECK: DW_TAG_base_type +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_name ("goo") +; CHECK: DW_TAG_inlined_subroutine +; CHECK: DW_AT_abstract_origin ([[FOO]] +; CHECK: DW_TAG_variable +; CHECK: NULL +; CHECK: DW_TAG_imported_declaration +; CHECK: DW_AT_import ([[FOO]]) +; CHECK: NULL +; CHECK: NULL + +; Function Attrs: mustprogress noinline optnone uwtable +define dso_local void @_Z3goov() !dbg !4 { +entry: + %a.i = alloca i32, align 4 + call void @llvm.dbg.declare(metadata i32* %a.i, metadata !16, metadata !DIExpression()), !dbg !18 + store i32 4, i32* %a.i, align 4, !dbg !18 + ret void, !dbg !20 +} + +; Function Attrs: nofree nosync nounwind readnone speculatable willreturn +declare void @llvm.dbg.declare(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!10, !11, !12, !13, !14} +!llvm.ident = !{!15} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, imports: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "imported-inlined-declaration.cpp", directory: "") +!2 = !{!3} +!3 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !4, entity: !8, file: !1, line: 7) +!4 = distinct !DISubprogram(name: "goo", linkageName: "_Z3goov", scope: !1, file: !1, line: 6, type: !5, scopeLine: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !7) +!5 = !DISubroutineType(types: !6) +!6 = !{null} +!7 = !{} +!8 = distinct !DISubprogram(name: "foo", linkageName: "_ZN2ns3fooEv", scope: !9, file: !1, line: 3, type: !5, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !7) +!9 = !DINamespace(name: "ns", scope: null) +!10 = !{i32 7, !"Dwarf Version", i32 4} +!11 = !{i32 2, !"Debug Info Version", i32 3} +!12 = !{i32 1, !"wchar_size", i32 4} +!13 = !{i32 7, !"uwtable", i32 1} +!14 = !{i32 7, !"frame-pointer", i32 2} +!15 = !{!"clang version 14.0.0"} +!16 = !DILocalVariable(name: "a", scope: !8, file: !1, line: 3, type: !17) +!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!18 = !DILocation(line: 3, column: 18, scope: !8, inlinedAt: !19) +!19 = distinct !DILocation(line: 8, column: 2, scope: !4) +!20 = !DILocation(line: 9, column: 1, scope: !4) diff --git a/llvm/test/DebugInfo/Generic/imported-name-inlined.ll b/llvm/test/DebugInfo/Generic/imported-name-inlined.ll index a3aaaff66f85..a26b687b1f67 100644 --- a/llvm/test/DebugInfo/Generic/imported-name-inlined.ll +++ b/llvm/test/DebugInfo/Generic/imported-name-inlined.ll @@ -13,21 +13,17 @@ ; Ensure that top level imported declarations don't produce an extra degenerate ; concrete subprogram definition. -; FIXME: imported entities should only be emitted to the abstract origin if one is present - ; CHECK: DW_TAG_compile_unit ; CHECK: DW_TAG_subprogram ; CHECK: DW_AT_name ("f1") ; CHECK: DW_TAG_imported_declaration ; CHECK: NULL -; CHECK: DW_TAG_namespace -; CHECK: DW_TAG_subprogram -; CHECK: NULL ; CHECK: DW_TAG_subprogram ; CHECK: DW_AT_name ("f2") ; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_TAG_imported_declaration -; CHECK: NULL +; CHECK: NULL +; CHECK: DW_TAG_namespace +; CHECK: DW_TAG_subprogram ; CHECK: NULL ; CHECK: NULL diff --git a/llvm/test/DebugInfo/Generic/inlined-local-type.ll b/llvm/test/DebugInfo/Generic/inlined-local-type.ll new file mode 100644 index 000000000000..b3f3f80c07d2 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/inlined-local-type.ll @@ -0,0 +1,125 @@ +; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-info - | FileCheck --implicit-check-not "{{DW_TAG|NULL}}" %s + +; inline __attribute__((always_inline)) +; int removed() { struct A {int i;}; struct A a; return a.i++; } +; +; __attribute__((always_inline)) +; int not_removed() { struct B {int i;}; struct B b; return b.i++; } +; +; int foo() { return removed() + not_removed(); }} + +; Ensure that function-local types have the correct subprogram parent even if +; those subprograms are inlined. + +; CHECK: DW_TAG_compile_unit +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_abstract_origin ({{0x.*}} "not_removed") +; CHECK: DW_TAG_variable +; CHECK: NULL +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_name ("removed") +; CHECK: [[A:0x.*]]: DW_TAG_structure_type +; CHECK: DW_AT_name ("A") +; CHECK: DW_TAG_member +; CHECK: NULL +; CHECK: DW_TAG_variable +; CHECK: DW_AT_type ([[A]] "A") +; CHECK: NULL +; CHECK: DW_TAG_base_type +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_name ("not_removed") +; CHECK: [[B:0x.*]]: DW_TAG_structure_type +; CHECK: DW_AT_name ("B") +; CHECK: DW_TAG_member +; CHECK: NULL +; CHECK: DW_TAG_variable +; CHECK: DW_AT_type ([[B]] "B") +; CHECK: NULL +; CHECK: DW_TAG_subprogram +; CHECK: DW_TAG_inlined_subroutine +; CHECK: DW_TAG_variable +; CHECK: NULL +; CHECK: DW_TAG_inlined_subroutine +; CHECK: DW_TAG_variable +; CHECK: NULL +; CHECK: NULL +; CHECK: NULL + +%struct.B = type { i32 } +%struct.A = type { i32 } + +define dso_local i32 @not_removed() !dbg !12 { + %1 = alloca %struct.B, align 4 + call void @llvm.dbg.declare(metadata %struct.B* %1, metadata !18, metadata !DIExpression()), !dbg !22 + %2 = getelementptr inbounds %struct.B, %struct.B* %1, i32 0, i32 0, !dbg !23 + %3 = load i32, i32* %2, align 4, !dbg !24 + %4 = add nsw i32 %3, 1, !dbg !24 + store i32 %4, i32* %2, align 4, !dbg !24 + ret i32 %3, !dbg !25 +} + +declare void @llvm.dbg.declare(metadata, metadata, metadata) + +define dso_local i32 @foo() !dbg !26 { + %1 = alloca %struct.A, align 4 + %2 = alloca %struct.B, align 4 + call void @llvm.dbg.declare(metadata %struct.A* %1, metadata !27, metadata !DIExpression()), !dbg !32 + %3 = getelementptr inbounds %struct.A, %struct.A* %1, i32 0, i32 0, !dbg !34 + %4 = load i32, i32* %3, align 4, !dbg !35 + %5 = add nsw i32 %4, 1, !dbg !35 + store i32 %5, i32* %3, align 4, !dbg !35 + call void @llvm.dbg.declare(metadata %struct.B* %2, metadata !18, metadata !DIExpression()), !dbg !36 + %6 = getelementptr inbounds %struct.B, %struct.B* %2, i32 0, i32 0, !dbg !38 + %7 = load i32, i32* %6, align 4, !dbg !39 + %8 = add nsw i32 %7, 1, !dbg !39 + store i32 %8, i32* %6, align 4, !dbg !39 + %9 = add nsw i32 %4, %7, !dbg !40 + ret i32 %9, !dbg !41 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8, !9, !10} +!llvm.ident = !{!11} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "inlined-local-type.cpp", directory: "") +!2 = !{i32 7, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 1, !"branch-target-enforcement", i32 0} +!6 = !{i32 1, !"sign-return-address", i32 0} +!7 = !{i32 1, !"sign-return-address-all", i32 0} +!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} +!9 = !{i32 7, !"uwtable", i32 1} +!10 = !{i32 7, !"frame-pointer", i32 1} +!11 = !{!"clang version 14.0.0"} +!12 = distinct !DISubprogram(name: "not_removed", scope: !13, file: !13, line: 5, type: !14, scopeLine: 5, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !17) +!13 = !DIFile(filename: "inlined-local-type.cpp", directory: "") +!14 = !DISubroutineType(types: !15) +!15 = !{!16} +!16 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!17 = !{} +!18 = !DILocalVariable(name: "b", scope: !12, file: !13, line: 5, type: !19) +!19 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "B", scope: !12, file: !13, line: 5, size: 32, elements: !20) +!20 = !{!21} +!21 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !19, file: !13, line: 5, baseType: !16, size: 32) +!22 = !DILocation(line: 5, column: 49, scope: !12) +!23 = !DILocation(line: 5, column: 61, scope: !12) +!24 = !DILocation(line: 5, column: 62, scope: !12) +!25 = !DILocation(line: 5, column: 52, scope: !12) +!26 = distinct !DISubprogram(name: "foo", scope: !13, file: !13, line: 7, type: !14, scopeLine: 7, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !17) +!27 = !DILocalVariable(name: "a", scope: !28, file: !13, line: 2, type: !29) +!28 = distinct !DISubprogram(name: "removed", scope: !13, file: !13, line: 2, type: !14, scopeLine: 2, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !17) +!29 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "A", scope: !28, file: !13, line: 2, size: 32, elements: !30) +!30 = !{!31} +!31 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !29, file: !13, line: 2, baseType: !16, size: 32) +!32 = !DILocation(line: 2, column: 45, scope: !28, inlinedAt: !33) +!33 = distinct !DILocation(line: 7, column: 20, scope: !26) +!34 = !DILocation(line: 2, column: 57, scope: !28, inlinedAt: !33) +!35 = !DILocation(line: 2, column: 58, scope: !28, inlinedAt: !33) +!36 = !DILocation(line: 5, column: 49, scope: !12, inlinedAt: !37) +!37 = distinct !DILocation(line: 7, column: 32, scope: !26) +!38 = !DILocation(line: 5, column: 61, scope: !12, inlinedAt: !37) +!39 = !DILocation(line: 5, column: 62, scope: !12, inlinedAt: !37) +!40 = !DILocation(line: 7, column: 30, scope: !26) +!41 = !DILocation(line: 7, column: 13, scope: !26) diff --git a/llvm/test/DebugInfo/Generic/inlined-static-var.ll b/llvm/test/DebugInfo/Generic/inlined-static-var.ll new file mode 100644 index 000000000000..60c986fc2b64 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/inlined-static-var.ll @@ -0,0 +1,92 @@ +; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-info - | FileCheck --implicit-check-not "{{DW_TAG|NULL}}" %s + +; inline __attribute__((always_inline)) +; int removed() { static int A; return A++; } +; +; __attribute__((always_inline)) +; int not_removed() { static int B; return B++; } +; +; int foo() { return removed() + not_removed(); } + +; Ensure that global variables belong to the correct subprograms even if those +; subprograms are inlined. + +; CHECK: DW_TAG_compile_unit +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_abstract_origin {{.*}} "_Z11not_removedv" +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_name ("removed") +; CHECK: DW_TAG_variable +; CHECK: DW_AT_name ("A") +; CHECK: NULL +; CHECK: DW_TAG_base_type +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_name ("not_removed") +; CHECK: DW_TAG_variable +; CHECK: DW_AT_name ("B") +; CHECK: NULL +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_name ("foo") +; CHECK: DW_TAG_inlined_subroutine +; CHECK: DW_TAG_inlined_subroutine +; CHECK: NULL +; CHECK: NULL + +@_ZZ11not_removedvE1A = internal global i32 0, align 4, !dbg !0 +@_ZZ7removedvE1A = linkonce_odr dso_local global i32 0, align 4, !dbg !10 + +define dso_local i32 @_Z11not_removedv() !dbg !2 { + %1 = load i32, i32* @_ZZ11not_removedvE1A, align 4, !dbg !24 + %2 = add nsw i32 %1, 1, !dbg !24 + store i32 %2, i32* @_ZZ11not_removedvE1A, align 4, !dbg !24 + ret i32 %1, !dbg !25 +} + +define dso_local i32 @_Z3foov() !dbg !26 { + %1 = load i32, i32* @_ZZ7removedvE1A, align 4, !dbg !27 + %2 = add nsw i32 %1, 1, !dbg !27 + store i32 %2, i32* @_ZZ7removedvE1A, align 4, !dbg !27 + %3 = load i32, i32* @_ZZ11not_removedvE1A, align 4, !dbg !29 + %4 = add nsw i32 %3, 1, !dbg !29 + store i32 %4, i32* @_ZZ11not_removedvE1A, align 4, !dbg !29 + %5 = add nsw i32 %1, %3, !dbg !31 + ret i32 %5, !dbg !32 +} + +!llvm.dbg.cu = !{!7} +!llvm.module.flags = !{!14, !15, !16, !17, !18, !19, !20, !21, !22} +!llvm.ident = !{!23} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "B", scope: !2, file: !3, line: 5, type: !6, isLocal: true, isDefinition: true) +!2 = distinct !DISubprogram(name: "not_removed", linkageName: "_Z11not_removedv", scope: !3, file: !3, line: 5, type: !4, scopeLine: 5, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !7, retainedNodes: !13) +!3 = !DIFile(filename: "example.cpp", directory: "") +!4 = !DISubroutineType(types: !5) +!5 = !{!6} +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !8, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !9, splitDebugInlining: false, nameTableKind: None) +!8 = !DIFile(filename: "example.cpp", directory: "") +!9 = !{!0, !10} +!10 = !DIGlobalVariableExpression(var: !11, expr: !DIExpression()) +!11 = distinct !DIGlobalVariable(name: "A", scope: !12, file: !3, line: 2, type: !6, isLocal: false, isDefinition: true) +!12 = distinct !DISubprogram(name: "removed", linkageName: "_Z7removedv", scope: !3, file: !3, line: 2, type: !4, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !7, retainedNodes: !13) +!13 = !{} +!14 = !{i32 7, !"Dwarf Version", i32 4} +!15 = !{i32 2, !"Debug Info Version", i32 3} +!16 = !{i32 1, !"wchar_size", i32 4} +!17 = !{i32 1, !"branch-target-enforcement", i32 0} +!18 = !{i32 1, !"sign-return-address", i32 0} +!19 = !{i32 1, !"sign-return-address-all", i32 0} +!20 = !{i32 1, !"sign-return-address-with-bkey", i32 0} +!21 = !{i32 7, !"uwtable", i32 1} +!22 = !{i32 7, !"frame-pointer", i32 1} +!23 = !{!"clang version 14.0.0"} +!24 = !DILocation(line: 5, column: 43, scope: !2) +!25 = !DILocation(line: 5, column: 35, scope: !2) +!26 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !3, file: !3, line: 7, type: !4, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !7, retainedNodes: !13) +!27 = !DILocation(line: 2, column: 39, scope: !12, inlinedAt: !28) +!28 = distinct !DILocation(line: 7, column: 20, scope: !26) +!29 = !DILocation(line: 5, column: 43, scope: !2, inlinedAt: !30) +!30 = distinct !DILocation(line: 7, column: 32, scope: !26) +!31 = !DILocation(line: 7, column: 30, scope: !26) +!32 = !DILocation(line: 7, column: 13, scope: !26) diff --git a/llvm/test/DebugInfo/Generic/lexical_block_static.ll b/llvm/test/DebugInfo/Generic/lexical_block_static.ll new file mode 100644 index 000000000000..56327b27cdb1 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/lexical_block_static.ll @@ -0,0 +1,141 @@ +; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-info - | FileCheck --implicit-check-not "{{DW_TAG|NULL}}" %s + +; inline __attribute__((always_inline)) +; int removed() { +; { +; static int A; +; return A++; +; } +; } +; +; __attribute__((always_inline)) +; int not_removed() { +; { +; static int B; +; return B++; +; } +; } +; +; int foo() { +; { +; static int C; +; return ++C + removed() + not_removed(); +; } +; } + +; CHECK: DW_TAG_compile_unit + +; Out-of-line definition of `not_removed()`. +; The empty lexical block is created to match abstract origin. +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_abstract_origin {{.*}} "_Z11not_removedv" +; CHECK: DW_TAG_lexical_block +; CHECK: NULL + +; Abstract definition of `removed()` +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_name ("removed") +; CHECK: DW_AT_inline (DW_INL_inlined) +; CHECK: DW_TAG_lexical_block +; CHECK: DW_TAG_variable +; CHECK: DW_AT_name ("A") +; CHECK: DW_AT_location +; CHECK: NULL +; CHECK: NULL +; CHECK: DW_TAG_base_type + +; Abstract definition of `not_removed()` +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_name ("not_removed") +; CHECK: DW_AT_inline (DW_INL_inlined) +; CHECK: DW_TAG_lexical_block +; CHECK: DW_TAG_variable +; CHECK: DW_AT_name ("B") +; CHECK: DW_AT_location +; CHECK: NULL +; CHECK: NULL + +; Definition of foo(). +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_name ("foo") +; CHECK: DW_TAG_lexical_block +; CHECK: DW_TAG_inlined_subroutine +; CHECK: DW_TAG_lexical_block +; CHECK: NULL +; CHECK: DW_TAG_inlined_subroutine +; CHECK: DW_TAG_lexical_block +; CHECK: NULL +; CHECK: DW_TAG_variable +; CHECK: DW_AT_name ("C") +; CHECK: DW_AT_location +; CHECK: NULL +; CHECK: NULL +; CHECK: NULL + +@_ZZ11not_removedvE1B = internal global i32 0, align 4, !dbg !0 +@_ZZ3foovE1C = internal global i32 0, align 4, !dbg !10 +@_ZZ7removedvE1A = linkonce_odr dso_local global i32 0, align 4, !dbg !15 + +define dso_local i32 @_Z11not_removedv() !dbg !4 { +entry: + %0 = load i32, i32* @_ZZ11not_removedvE1B, align 4, !dbg !25 + %inc = add nsw i32 %0, 1, !dbg !25 + store i32 %inc, i32* @_ZZ11not_removedvE1B, align 4, !dbg !25 + ret i32 %0, !dbg !26 +} + +define dso_local i32 @_Z3foov() !dbg !13 { +entry: + %0 = load i32, i32* @_ZZ3foovE1C, align 4, !dbg !27 + %inc = add nsw i32 %0, 1, !dbg !27 + store i32 %inc, i32* @_ZZ3foovE1C, align 4, !dbg !27 + %1 = load i32, i32* @_ZZ7removedvE1A, align 4, !dbg !28 + %inc.i3 = add nsw i32 %1, 1, !dbg !28 + store i32 %inc.i3, i32* @_ZZ7removedvE1A, align 4, !dbg !28 + %add = add nsw i32 %inc, %1, !dbg !30 + %2 = load i32, i32* @_ZZ11not_removedvE1B, align 4, !dbg !31 + %inc.i = add nsw i32 %2, 1, !dbg !31 + store i32 %inc.i, i32* @_ZZ11not_removedvE1B, align 4, !dbg !31 + %add2 = add nsw i32 %add, %2, !dbg !33 + ret i32 %add2, !dbg !34 +} + +!llvm.dbg.cu = !{!8} +!llvm.module.flags = !{!19, !20, !21, !22, !23} +!llvm.ident = !{!24} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "B", scope: !2, file: !3, line: 13, type: !7, isLocal: true, isDefinition: true) +!2 = distinct !DILexicalBlock(scope: !4, file: !3, line: 12, column: 3) +!3 = !DIFile(filename: "test_static.cpp", directory: "/") +!4 = distinct !DISubprogram(name: "not_removed", linkageName: "_Z11not_removedv", scope: !3, file: !3, line: 11, type: !5, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !8, retainedNodes: !14) +!5 = !DISubroutineType(types: !6) +!6 = !{!7} +!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!8 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 14.0.0 (git@github.com:llvm/llvm-project.git e1d09ac2d118825452bfc26e44565f7f4122fd6d)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !9, splitDebugInlining: false, nameTableKind: None) +!9 = !{!0, !10, !15} +!10 = !DIGlobalVariableExpression(var: !11, expr: !DIExpression()) +!11 = distinct !DIGlobalVariable(name: "C", scope: !12, file: !3, line: 20, type: !7, isLocal: true, isDefinition: true) +!12 = distinct !DILexicalBlock(scope: !13, file: !3, line: 19, column: 3) +!13 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !3, file: !3, line: 18, type: !5, scopeLine: 18, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !8, retainedNodes: !14) +!14 = !{} +!15 = !DIGlobalVariableExpression(var: !16, expr: !DIExpression()) +!16 = distinct !DIGlobalVariable(name: "A", scope: !17, file: !3, line: 5, type: !7, isLocal: false, isDefinition: true) +!17 = distinct !DILexicalBlock(scope: !18, file: !3, line: 4, column: 3) +!18 = distinct !DISubprogram(name: "removed", linkageName: "_Z7removedv", scope: !3, file: !3, line: 3, type: !5, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !8, retainedNodes: !14) +!19 = !{i32 7, !"Dwarf Version", i32 4} +!20 = !{i32 2, !"Debug Info Version", i32 3} +!21 = !{i32 1, !"wchar_size", i32 4} +!22 = !{i32 7, !"uwtable", i32 1} +!23 = !{i32 7, !"frame-pointer", i32 2} +!24 = !{!"clang version 14.0.0 (git@github.com:llvm/llvm-project.git)"} +!25 = !DILocation(line: 14, column: 13, scope: !2) +!26 = !DILocation(line: 14, column: 5, scope: !2) +!27 = !DILocation(line: 21, column: 12, scope: !12) +!28 = !DILocation(line: 6, column: 13, scope: !17, inlinedAt: !29) +!29 = distinct !DILocation(line: 21, column: 18, scope: !12) +!30 = !DILocation(line: 21, column: 16, scope: !12) +!31 = !DILocation(line: 14, column: 13, scope: !2, inlinedAt: !32) +!32 = distinct !DILocation(line: 21, column: 30, scope: !12) +!33 = !DILocation(line: 21, column: 28, scope: !12) +!34 = !DILocation(line: 21, column: 5, scope: !12) diff --git a/llvm/test/DebugInfo/Generic/lexical_block_types.ll b/llvm/test/DebugInfo/Generic/lexical_block_types.ll new file mode 100644 index 000000000000..1d4e9e7eb3e7 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/lexical_block_types.ll @@ -0,0 +1,421 @@ +; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-info - | FileCheck --implicit-check-not "{{DW_TAG|NULL}}" %s + +; inline __attribute__((always_inline)) +; void removed() { +; struct A1 { int i; }; +; typedef int Int1; +; { +; struct I1 { Int1 j; }; +; struct C1 { typedef char Char1; Char1 c; }; +; A1 a1; a1.i++; +; { +; I1 i1; i1.j++; +; C1 c1; c1.c++; +; } +; } +; } +; +; __attribute__((always_inline)) +; void not_removed() { +; struct A2 { int i; }; +; typedef int Int2; +; { +; struct I2 { Int2 j; }; +; struct C2 { typedef char Char2; Char2 c; }; +; A2 a2; a2.i++; +; { +; I2 i2; i2.j++; +; C2 c2; c2.c++; +; } +; } +; } +; +; void foo() { +; struct A3 { int i; }; +; typedef int Int3; +; { +; struct I3 { Int3 j; }; +; { +; struct C3 { typedef char Char3; Char3 c; }; +; A3 a3; a3.i++; +; { +; I3 i3; i3.j++; +; C3 c3; c3.c++; +; } +; } +; } +; removed(); +; not_removed(); +; } +; +; CHECK: DW_TAG_compile_unit + +; Out-of-line definition of `not_removed()` shouldn't contain any debug info for types. +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_abstract_origin {{.*}} "_Z11not_removedv" +; CHECK: DW_TAG_lexical_block +; CHECK: DW_TAG_variable +; CHECK: DW_AT_abstract_origin {{.*}} "a2" +; CHECK: DW_TAG_lexical_block +; CHECK: DW_TAG_variable +; CHECK: DW_AT_abstract_origin {{.*}} "i2" +; CHECK: DW_TAG_variable +; CHECK: DW_AT_abstract_origin {{.*}} "c2" +; CHECK: NULL +; CHECK: NULL +; CHECK: NULL + +; Abstract definition of `removed()`. +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_name ("removed") +; CHECK: DW_AT_inline (DW_INL_inlined) + +; I1 and C1 defined in the first lexical block, typedef Char1 is a child of C1. +; CHECK: DW_TAG_lexical_block +; CHECK: DW_TAG_variable +; CHECK: DW_AT_name ("a1") +; CHECK: DW_AT_type {{.*}} "A1" +; CHECK: DW_TAG_lexical_block +; CHECK: DW_TAG_variable +; CHECK: DW_AT_type {{.*}} "I1" +; CHECK: DW_TAG_variable +; CHECK: DW_AT_type {{.*}} "C1" +; CHECK: NULL +; CHECK: DW_TAG_structure_type +; CHECK: DW_AT_name ("I1") +; CHECK: DW_TAG_member +; CHECK: DW_AT_type {{.*}} "Int1" +; CHECK: NULL +; CHECK: DW_TAG_structure_type +; CHECK: DW_AT_name ("C1") +; CHECK: DW_TAG_member +; CHECK: DW_AT_type {{.*}} "C1::Char1" +; CHECK: DW_TAG_typedef +; CHECK: DW_AT_name ("Char1") +; CHECK: NULL +; CHECK: NULL + +; A1 and typedef Int1 defined in subprogram scope. +; CHECK: DW_TAG_structure_type +; CHECK: DW_AT_name ("A1") +; CHECK: DW_TAG_member +; CHECK: NULL +; CHECK: DW_TAG_typedef +; CHECK: DW_AT_name ("Int1") +; CHECK: NULL + +; CHECK: DW_TAG_base_type +; CHECK: DW_TAG_base_type + +; Abstract definition of `not_removed()`. +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_name ("not_removed") +; CHECK: DW_AT_inline (DW_INL_inlined) + +; I2 and C2 defined in the first lexical block, typedef Char2 is a child of C2. +; CHECK: DW_TAG_lexical_block +; CHECK: DW_TAG_variable +; CHECK: DW_AT_name ("a2") +; CHECK: DW_AT_type {{.*}} "A2" +; CHECK: DW_TAG_lexical_block +; CHECK: DW_TAG_variable +; CHECK: DW_AT_name ("i2") +; CHECK: DW_AT_type {{.*}} "I2" +; CHECK: DW_TAG_variable +; CHECK: DW_AT_name ("c2") +; CHECK: DW_AT_type {{.*}} "C2" +; CHECK: NULL +; CHECK: DW_TAG_structure_type +; CHECK: DW_AT_name ("I2") +; CHECK: DW_TAG_member +; CHECK: DW_AT_type {{.*}} "Int2" +; CHECK: NULL +; CHECK: DW_TAG_structure_type +; CHECK: DW_AT_name ("C2") +; CHECK: DW_TAG_member +; CHECK: DW_AT_type {{.*}} "C2::Char2" +; CHECK: DW_TAG_typedef +; CHECK: DW_AT_name ("Char2") +; CHECK: NULL +; CHECK: NULL + +; A2 and typedef Int2 defined in subprogram scope. +; CHECK: DW_TAG_structure_type +; CHECK: DW_AT_name ("A2") +; CHECK: DW_TAG_member +; CHECK: NULL +; CHECK: DW_TAG_typedef +; CHECK: DW_AT_name ("Int2") +; CHECK: NULL + +; Definition of `foo()`. +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_name ("foo") + +; CHECK: DW_TAG_lexical_block +; CHECK: DW_TAG_variable +; CHECK: DW_AT_name ("a3") +; CHECK: DW_AT_type {{.*}} "A3" +; CHECK: DW_TAG_lexical_block +; CHECK: DW_TAG_variable +; CHECK: DW_AT_name ("i3") +; CHECK: DW_AT_type {{.*}} "I3" +; CHECK: DW_TAG_variable +; CHECK: DW_AT_name ("c3") +; CHECK: DW_AT_type {{.*}} "C3" +; CHECK: NULL + +; C3 has the inner lexical block scope, typedef Char3 is a child of C3. +; CHECK: DW_TAG_structure_type +; CHECK: DW_AT_name ("C3") +; CHECK: DW_TAG_member +; CHECK: DW_AT_type {{.*}} "C3::Char3" +; CHECK: DW_TAG_typedef +; CHECK: DW_AT_name ("Char3") +; CHECK: NULL +; CHECK: NULL + +; CHECK: DW_TAG_inlined_subroutine +; CHECK: DW_AT_abstract_origin {{.*}} "_Z7removedv" +; CHECK: DW_TAG_lexical_block +; CHECK: DW_TAG_variable +; CHECK: DW_TAG_lexical_block +; CHECK: DW_TAG_variable +; CHECK: DW_TAG_variable +; CHECK: NULL +; CHECK: NULL +; CHECK: NULL + +; CHECK: DW_TAG_inlined_subroutine +; CHECK: DW_AT_abstract_origin {{.*}} "_Z11not_removedv" +; CHECK: DW_TAG_lexical_block +; CHECK: DW_TAG_variable +; CHECK: DW_TAG_lexical_block +; CHECK: DW_TAG_variable +; CHECK: DW_TAG_variable +; CHECK: NULL +; CHECK: NULL +; CHECK: NULL + +; A3 and typedef Int3 defined in the subprogram scope. +; FIXME: I3 has subprogram scope here, but should be in the outer lexical block +; (which is ommitted). It wasn't placed correctly, because it's the only non-scope +; entity in the block and it isn't listed in retainedTypes; we simply wasn't aware +; about it while deciding whether to create a lexical block or not. +; CHECK: DW_TAG_structure_type +; CHECK: DW_AT_name ("A3") +; CHECK: DW_TAG_member +; CHECK: NULL +; CHECK: DW_TAG_structure_type +; CHECK: DW_AT_name ("I3") +; CHECK: DW_TAG_member +; CHECK: DW_AT_type {{.*}} "Int3" +; CHECK: NULL +; CHECK: DW_TAG_typedef +; CHECK: DW_AT_name ("Int3") +; CHECK: NULL +; CHECK: NULL + +%struct.A2 = type { i32 } +%struct.I2 = type { i32 } +%struct.C2 = type { i8 } +%struct.A1 = type { i32 } +%struct.I1 = type { i32 } +%struct.C1 = type { i8 } +%struct.A3 = type { i32 } +%struct.I3 = type { i32 } +%struct.C3 = type { i8 } + +define dso_local void @_Z11not_removedv() !dbg !8 { +entry: + %a2 = alloca %struct.A2, align 4 + %i2 = alloca %struct.I2, align 4 + %c2 = alloca %struct.C2, align 1 + call void @llvm.dbg.declare(metadata %struct.A2* %a2, metadata !12, metadata !DIExpression()), !dbg !18 + %i = getelementptr inbounds %struct.A2, %struct.A2* %a2, i32 0, i32 0, !dbg !19 + %0 = load i32, i32* %i, align 4, !dbg !20 + %inc = add nsw i32 %0, 1, !dbg !20 + store i32 %inc, i32* %i, align 4, !dbg !20 + call void @llvm.dbg.declare(metadata %struct.I2* %i2, metadata !21, metadata !DIExpression()), !dbg !27 + %j = getelementptr inbounds %struct.I2, %struct.I2* %i2, i32 0, i32 0, !dbg !28 + %1 = load i32, i32* %j, align 4, !dbg !29 + %inc1 = add nsw i32 %1, 1, !dbg !29 + store i32 %inc1, i32* %j, align 4, !dbg !29 + call void @llvm.dbg.declare(metadata %struct.C2* %c2, metadata !30, metadata !DIExpression()), !dbg !36 + %c = getelementptr inbounds %struct.C2, %struct.C2* %c2, i32 0, i32 0, !dbg !37 + %2 = load i8, i8* %c, align 1, !dbg !38 + %inc2 = add i8 %2, 1, !dbg !38 + store i8 %inc2, i8* %c, align 1, !dbg !38 + ret void, !dbg !39 +} + +declare void @llvm.dbg.declare(metadata, metadata, metadata) + +define dso_local void @_Z3foov() !dbg !40 { +entry: + %a1.i = alloca %struct.A1, align 4 + %i1.i = alloca %struct.I1, align 4 + %c1.i = alloca %struct.C1, align 1 + %a2.i = alloca %struct.A2, align 4 + %i2.i = alloca %struct.I2, align 4 + %c2.i = alloca %struct.C2, align 1 + %a3 = alloca %struct.A3, align 4 + %i3 = alloca %struct.I3, align 4 + %c3 = alloca %struct.C3, align 1 + call void @llvm.dbg.declare(metadata %struct.A3* %a3, metadata !41, metadata !DIExpression()), !dbg !47 + %i = getelementptr inbounds %struct.A3, %struct.A3* %a3, i32 0, i32 0, !dbg !48 + %0 = load i32, i32* %i, align 4, !dbg !49 + %inc = add nsw i32 %0, 1, !dbg !49 + store i32 %inc, i32* %i, align 4, !dbg !49 + call void @llvm.dbg.declare(metadata %struct.I3* %i3, metadata !50, metadata !DIExpression()), !dbg !56 + %j = getelementptr inbounds %struct.I3, %struct.I3* %i3, i32 0, i32 0, !dbg !57 + %1 = load i32, i32* %j, align 4, !dbg !58 + %inc1 = add nsw i32 %1, 1, !dbg !58 + store i32 %inc1, i32* %j, align 4, !dbg !58 + call void @llvm.dbg.declare(metadata %struct.C3* %c3, metadata !59, metadata !DIExpression()), !dbg !64 + %c = getelementptr inbounds %struct.C3, %struct.C3* %c3, i32 0, i32 0, !dbg !65 + %2 = load i8, i8* %c, align 1, !dbg !66 + %inc2 = add i8 %2, 1, !dbg !66 + store i8 %inc2, i8* %c, align 1, !dbg !66 + call void @llvm.dbg.declare(metadata %struct.A1* %a1.i, metadata !67, metadata !DIExpression()), !dbg !73 + %i.i3 = getelementptr inbounds %struct.A1, %struct.A1* %a1.i, i32 0, i32 0, !dbg !75 + %3 = load i32, i32* %i.i3, align 4, !dbg !76 + %inc.i4 = add nsw i32 %3, 1, !dbg !76 + store i32 %inc.i4, i32* %i.i3, align 4, !dbg !76 + call void @llvm.dbg.declare(metadata %struct.I1* %i1.i, metadata !77, metadata !DIExpression()), !dbg !83 + %j.i5 = getelementptr inbounds %struct.I1, %struct.I1* %i1.i, i32 0, i32 0, !dbg !84 + %4 = load i32, i32* %j.i5, align 4, !dbg !85 + %inc1.i6 = add nsw i32 %4, 1, !dbg !85 + store i32 %inc1.i6, i32* %j.i5, align 4, !dbg !85 + call void @llvm.dbg.declare(metadata %struct.C1* %c1.i, metadata !86, metadata !DIExpression()), !dbg !91 + %c.i7 = getelementptr inbounds %struct.C1, %struct.C1* %c1.i, i32 0, i32 0, !dbg !92 + %5 = load i8, i8* %c.i7, align 1, !dbg !93 + %inc2.i8 = add i8 %5, 1, !dbg !93 + store i8 %inc2.i8, i8* %c.i7, align 1, !dbg !93 + call void @llvm.dbg.declare(metadata %struct.A2* %a2.i, metadata !12, metadata !DIExpression()), !dbg !94 + %i.i = getelementptr inbounds %struct.A2, %struct.A2* %a2.i, i32 0, i32 0, !dbg !96 + %6 = load i32, i32* %i.i, align 4, !dbg !97 + %inc.i = add nsw i32 %6, 1, !dbg !97 + store i32 %inc.i, i32* %i.i, align 4, !dbg !97 + call void @llvm.dbg.declare(metadata %struct.I2* %i2.i, metadata !21, metadata !DIExpression()), !dbg !98 + %j.i = getelementptr inbounds %struct.I2, %struct.I2* %i2.i, i32 0, i32 0, !dbg !99 + %7 = load i32, i32* %j.i, align 4, !dbg !100 + %inc1.i = add nsw i32 %7, 1, !dbg !100 + store i32 %inc1.i, i32* %j.i, align 4, !dbg !100 + call void @llvm.dbg.declare(metadata %struct.C2* %c2.i, metadata !30, metadata !DIExpression()), !dbg !101 + %c.i = getelementptr inbounds %struct.C2, %struct.C2* %c2.i, i32 0, i32 0, !dbg !102 + %8 = load i8, i8* %c.i, align 1, !dbg !103 + %inc2.i = add i8 %8, 1, !dbg !103 + store i8 %inc2.i, i8* %c.i, align 1, !dbg !103 + ret void, !dbg !104 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6} +!llvm.ident = !{!7} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.cpp", directory: "/") +!2 = !{i32 7, !"Dwarf Version", i32 4} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 7, !"uwtable", i32 1} +!6 = !{i32 7, !"frame-pointer", i32 2} +!7 = !{!"clang version 14.0.0"} +!8 = distinct !DISubprogram(name: "not_removed", linkageName: "_Z11not_removedv", scope: !1, file: !1, line: 17, type: !9, scopeLine: 17, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !11) +!9 = !DISubroutineType(types: !10) +!10 = !{null} +!11 = !{} +!12 = !DILocalVariable(name: "a2", scope: !13, file: !1, line: 23, type: !14) +!13 = distinct !DILexicalBlock(scope: !8, file: !1, line: 20, column: 3) +!14 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "A2", scope: !8, file: !1, line: 18, size: 32, flags: DIFlagTypePassByValue, elements: !15) +!15 = !{!16} +!16 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !14, file: !1, line: 18, baseType: !17, size: 32) +!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!18 = !DILocation(line: 23, column: 8, scope: !13) +!19 = !DILocation(line: 23, column: 15, scope: !13) +!20 = !DILocation(line: 23, column: 16, scope: !13) +!21 = !DILocalVariable(name: "i2", scope: !22, file: !1, line: 25, type: !23) +!22 = distinct !DILexicalBlock(scope: !13, file: !1, line: 24, column: 5) +!23 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "I2", scope: !13, file: !1, line: 21, size: 32, flags: DIFlagTypePassByValue, elements: !24) +!24 = !{!25} +!25 = !DIDerivedType(tag: DW_TAG_member, name: "j", scope: !23, file: !1, line: 21, baseType: !26, size: 32) +!26 = !DIDerivedType(tag: DW_TAG_typedef, name: "Int2", scope: !8, file: !1, line: 19, baseType: !17) +!27 = !DILocation(line: 25, column: 10, scope: !22) +!28 = !DILocation(line: 25, column: 17, scope: !22) +!29 = !DILocation(line: 25, column: 18, scope: !22) +!30 = !DILocalVariable(name: "c2", scope: !22, file: !1, line: 26, type: !31) +!31 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "C2", scope: !13, file: !1, line: 22, size: 8, flags: DIFlagTypePassByValue, elements: !32) +!32 = !{!33} +!33 = !DIDerivedType(tag: DW_TAG_member, name: "c", scope: !31, file: !1, line: 22, baseType: !34, size: 8) +!34 = !DIDerivedType(tag: DW_TAG_typedef, name: "Char2", scope: !31, file: !1, line: 22, baseType: !35) +!35 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) +!36 = !DILocation(line: 26, column: 10, scope: !22) +!37 = !DILocation(line: 26, column: 17, scope: !22) +!38 = !DILocation(line: 26, column: 18, scope: !22) +!39 = !DILocation(line: 29, column: 1, scope: !8) +!40 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 31, type: !9, scopeLine: 31, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !11) +!41 = !DILocalVariable(name: "a3", scope: !42, file: !1, line: 38, type: !44) +!42 = distinct !DILexicalBlock(scope: !43, file: !1, line: 36, column: 5) +!43 = distinct !DILexicalBlock(scope: !40, file: !1, line: 34, column: 3) +!44 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "A3", scope: !40, file: !1, line: 32, size: 32, flags: DIFlagTypePassByValue, elements: !45) +!45 = !{!46} +!46 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !44, file: !1, line: 32, baseType: !17, size: 32) +!47 = !DILocation(line: 38, column: 10, scope: !42) +!48 = !DILocation(line: 38, column: 17, scope: !42) +!49 = !DILocation(line: 38, column: 18, scope: !42) +!50 = !DILocalVariable(name: "i3", scope: !51, file: !1, line: 40, type: !52) +!51 = distinct !DILexicalBlock(scope: !42, file: !1, line: 39, column: 7) +!52 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "I3", scope: !43, file: !1, line: 35, size: 32, flags: DIFlagTypePassByValue, elements: !53) +!53 = !{!54} +!54 = !DIDerivedType(tag: DW_TAG_member, name: "j", scope: !52, file: !1, line: 35, baseType: !55, size: 32) +!55 = !DIDerivedType(tag: DW_TAG_typedef, name: "Int3", scope: !40, file: !1, line: 33, baseType: !17) +!56 = !DILocation(line: 40, column: 12, scope: !51) +!57 = !DILocation(line: 40, column: 19, scope: !51) +!58 = !DILocation(line: 40, column: 20, scope: !51) +!59 = !DILocalVariable(name: "c3", scope: !51, file: !1, line: 41, type: !60) +!60 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "C3", scope: !42, file: !1, line: 37, size: 8, flags: DIFlagTypePassByValue, elements: !61) +!61 = !{!62} +!62 = !DIDerivedType(tag: DW_TAG_member, name: "c", scope: !60, file: !1, line: 37, baseType: !63, size: 8) +!63 = !DIDerivedType(tag: DW_TAG_typedef, name: "Char3", scope: !60, file: !1, line: 37, baseType: !35) +!64 = !DILocation(line: 41, column: 12, scope: !51) +!65 = !DILocation(line: 41, column: 19, scope: !51) +!66 = !DILocation(line: 41, column: 20, scope: !51) +!67 = !DILocalVariable(name: "a1", scope: !68, file: !1, line: 8, type: !70) +!68 = distinct !DILexicalBlock(scope: !69, file: !1, line: 5, column: 3) +!69 = distinct !DISubprogram(name: "removed", linkageName: "_Z7removedv", scope: !1, file: !1, line: 2, type: !9, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !11) +!70 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "A1", scope: !69, file: !1, line: 3, size: 32, flags: DIFlagTypePassByValue, elements: !71, identifier: "_ZTSZ7removedvE2A1") +!71 = !{!72} +!72 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !70, file: !1, line: 3, baseType: !17, size: 32) +!73 = !DILocation(line: 8, column: 8, scope: !68, inlinedAt: !74) +!74 = distinct !DILocation(line: 45, column: 3, scope: !40) +!75 = !DILocation(line: 8, column: 15, scope: !68, inlinedAt: !74) +!76 = !DILocation(line: 8, column: 16, scope: !68, inlinedAt: !74) +!77 = !DILocalVariable(name: "i1", scope: !78, file: !1, line: 10, type: !79) +!78 = distinct !DILexicalBlock(scope: !68, file: !1, line: 9, column: 5) +!79 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "I1", scope: !68, file: !1, line: 6, size: 32, flags: DIFlagTypePassByValue, elements: !80, identifier: "_ZTSZ7removedvE2I1") +!80 = !{!81} +!81 = !DIDerivedType(tag: DW_TAG_member, name: "j", scope: !79, file: !1, line: 6, baseType: !82, size: 32) +!82 = !DIDerivedType(tag: DW_TAG_typedef, name: "Int1", scope: !69, file: !1, line: 4, baseType: !17) +!83 = !DILocation(line: 10, column: 10, scope: !78, inlinedAt: !74) +!84 = !DILocation(line: 10, column: 17, scope: !78, inlinedAt: !74) +!85 = !DILocation(line: 10, column: 18, scope: !78, inlinedAt: !74) +!86 = !DILocalVariable(name: "c1", scope: !78, file: !1, line: 11, type: !87) +!87 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "C1", scope: !68, file: !1, line: 7, size: 8, flags: DIFlagTypePassByValue, elements: !88, identifier: "_ZTSZ7removedvE2C1") +!88 = !{!89} +!89 = !DIDerivedType(tag: DW_TAG_member, name: "c", scope: !87, file: !1, line: 7, baseType: !90, size: 8) +!90 = !DIDerivedType(tag: DW_TAG_typedef, name: "Char1", scope: !87, file: !1, line: 7, baseType: !35) +!91 = !DILocation(line: 11, column: 10, scope: !78, inlinedAt: !74) +!92 = !DILocation(line: 11, column: 17, scope: !78, inlinedAt: !74) +!93 = !DILocation(line: 11, column: 18, scope: !78, inlinedAt: !74) +!94 = !DILocation(line: 23, column: 8, scope: !13, inlinedAt: !95) +!95 = distinct !DILocation(line: 46, column: 3, scope: !40) +!96 = !DILocation(line: 23, column: 15, scope: !13, inlinedAt: !95) +!97 = !DILocation(line: 23, column: 16, scope: !13, inlinedAt: !95) +!98 = !DILocation(line: 25, column: 10, scope: !22, inlinedAt: !95) +!99 = !DILocation(line: 25, column: 17, scope: !22, inlinedAt: !95) +!100 = !DILocation(line: 25, column: 18, scope: !22, inlinedAt: !95) +!101 = !DILocation(line: 26, column: 10, scope: !22, inlinedAt: !95) +!102 = !DILocation(line: 26, column: 17, scope: !22, inlinedAt: !95) +!103 = !DILocation(line: 26, column: 18, scope: !22, inlinedAt: !95) +!104 = !DILocation(line: 47, column: 1, scope: !40) diff --git a/llvm/test/DebugInfo/Generic/namespace.ll b/llvm/test/DebugInfo/Generic/namespace.ll index 6998e69b33d9..7b9bf9b531b8 100644 --- a/llvm/test/DebugInfo/Generic/namespace.ll +++ b/llvm/test/DebugInfo/Generic/namespace.ll @@ -22,15 +22,21 @@ ; CHECK: DW_TAG_formal_parameter ; CHECK: NULL +; CHECK: [[FUNC_FWD:0x[0-9a-f]*]]:{{.*}}DW_TAG_subprogram +; CHECK: DW_AT_name ("func_fwd") +; CHECK-NOT: DW_AT_declaration + +; CHECK: [[I:0x[0-9a-f]*]]:{{ *}}DW_TAG_variable +; CHECK: DW_AT_name ("i") +; CHECK: [[VAR_FWD:0x[0-9a-f]*]]:{{ *}}DW_TAG_variable +; CHECK: DW_AT_name ("var_fwd") + ; CHECK: [[FOO:0x[0-9a-f]*]]:{{ *}}DW_TAG_structure_type ; CHECK: DW_AT_name ("foo") ; CHECK: DW_AT_declaration ; CHECK: [[BAR:0x[0-9a-f]*]]:{{ *}}DW_TAG_structure_type ; CHECK: DW_AT_name ("bar") -; CHECK: [[I:0x[0-9a-f]*]]:{{ *}}DW_TAG_variable -; CHECK: DW_AT_name ("i") - ; CHECK: [[BAZ:0x[0-9a-f]*]]:{{.*}}DW_TAG_typedef ; CHECK: DW_AT_name ("baz") @@ -41,13 +47,6 @@ ; CHECK: [[FUNC_DECL:0x[0-9a-f]*]]:{{.*}}DW_TAG_subprogram ; CHECK: DW_AT_name ("func_decl") ; CHECK: DW_AT_declaration - -; CHECK: [[VAR_FWD:0x[0-9a-f]*]]:{{ *}}DW_TAG_variable -; CHECK: DW_AT_name ("var_fwd") - -; CHECK: [[FUNC_FWD:0x[0-9a-f]*]]:{{.*}}DW_TAG_subprogram -; CHECK: DW_AT_name ("func_fwd") -; CHECK-NOT: DW_AT_declaration ; CHECK: NULL ; CHECK: DW_TAG_imported_module @@ -62,6 +61,12 @@ ; CHECK: DW_AT_MIPS_linkage_name ; CHECK: DW_AT_name ("func") ; CHECK: DW_TAG_formal_parameter +; CHECK: DW_TAG_lexical_block +; CHECK: DW_TAG_imported_module +; CHECK: DW_AT_decl_file ([[F2]]) +; CHECK: DW_AT_decl_line (23) +; CHECK: DW_AT_import {{.*}} +; CHECK: NULL ; CHECK: DW_TAG_imported_module ; CHECK: DW_AT_decl_file ([[F2:.*]]) ; CHECK: DW_AT_decl_line (26) @@ -112,16 +117,10 @@ ; CHECK: DW_AT_decl_file ([[F2]]) ; CHECK: DW_AT_decl_line (37) ; CHECK: DW_AT_import ([[FUNC_FWD]]) -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_imported_module -; CHECK: DW_AT_decl_file ([[F2]]) -; CHECK: DW_AT_decl_line (23) -; CHECK: DW_AT_import {{.*}} -; CHECK: NULL ; CHECK: NULL -; CHECK: DW_TAG_base_type ; CHECK: DW_TAG_subprogram +; CHECK: DW_TAG_base_type ; CHECK: DW_TAG_imported_module ; CHECK: DW_AT_decl_file ([[F2:.*]]) ; CHECK: DW_AT_decl_line (18) From 61312fd5aa1e193cfa41004ff0fe84e0a2061c6e Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 23 Dec 2021 01:24:32 -0800 Subject: [PATCH 129/293] [ELF] sortSections: delete unneeded outSecOff assignment Related to D45368 but outSecOff is unneeded because resolveShfLinkOrder uses stable_sort. --- lld/ELF/Writer.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index b593a2950e53..221601d15c2f 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1469,13 +1469,6 @@ template void Writer::sortSections() { if (!os) continue; os->sortRank = getSectionRank(os); - - // We want to assign rude approximation values to outSecOff fields - // to know the relative order of the input sections. We use it for - // sorting SHF_LINK_ORDER sections. See resolveShfLinkOrder(). - uint64_t i = 0; - for (InputSection *sec : getInputSections(os)) - sec->outSecOff = i++; } if (!script->hasSectionsCommand) { From 33319dde2a4f594ba763f82e89b7cf4024416ff8 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 23 Dec 2021 01:52:54 -0800 Subject: [PATCH 130/293] [ELF] LTO: skip expensive usedStartStop initialization if bitcodeFiles.empty() This may cost 1.3+% of total link time. --- lld/ELF/LTO.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index b1632b8bae8a..65b943c4a54c 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -204,6 +204,8 @@ BitcodeCompiler::BitcodeCompiler() { config->ltoPartitions); // Initialize usedStartStop. + if (bitcodeFiles.empty()) + return; for (Symbol *sym : symtab->symbols()) { StringRef s = sym->getName(); for (StringRef prefix : {"__start_", "__stop_"}) From 09669e6c5fa1e8db9c1091cc264640fb0377d6b6 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 21 Dec 2021 15:28:45 +0100 Subject: [PATCH 131/293] [CodeGen] Avoid pointer element type access when creating LValue This required fixing two places that were passing the pointer type rather than the expected pointee type to the method. --- clang/lib/CodeGen/CGExpr.cpp | 3 ++- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 2 +- clang/lib/CodeGen/CodeGenFunction.cpp | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 577252fdfeac..34b4951a7f72 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -1309,7 +1309,8 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) { const ConstantExpr *CE = cast(E); if (llvm::Value *Result = ConstantEmitter(*this).tryEmitConstantExpr(CE)) { QualType RetType = cast(CE->getSubExpr()->IgnoreImplicit()) - ->getCallReturnType(getContext()); + ->getCallReturnType(getContext()) + ->getPointeeType(); return MakeNaturalAlignAddrLValue(Result, RetType); } return EmitLValue(cast(E)->getSubExpr()); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 7bd7d97da43a..8a75f06882bc 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -5945,7 +5945,7 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, } else { OrigLVal = CGF.MakeNaturalAlignAddrLValue( llvm::ConstantPointerNull::get(CGM.VoidPtrTy), - CGM.getContext().VoidPtrTy); + CGM.getContext().VoidTy); } // Emit the initializer: // %0 = bitcast void* %arg to * diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index b437ba01c676..e6adec6948af 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -188,8 +188,8 @@ LValue CodeGenFunction::MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) { LValueBaseInfo BaseInfo; TBAAAccessInfo TBAAInfo; CharUnits Alignment = CGM.getNaturalTypeAlignment(T, &BaseInfo, &TBAAInfo); - return LValue::MakeAddr(Address(V, Alignment), T, getContext(), BaseInfo, - TBAAInfo); + Address Addr(V, ConvertTypeForMem(T), Alignment); + return LValue::MakeAddr(Addr, T, getContext(), BaseInfo, TBAAInfo); } /// Given a value of type T* that may not be to a complete object, From 74ce7ff5dc5bad1d78fddd1ebbc63c66b7fa986b Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 23 Dec 2021 10:19:27 +0000 Subject: [PATCH 132/293] [AMDGPU] Remove a TODO that was done by D98081 --- llvm/lib/Target/AMDGPU/SIInstructions.td | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 2f0792203d4f..95744b6390c5 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2531,7 +2531,6 @@ def : GCNPat < (v2i16 (V_AND_OR_B32_e64 SReg_32:$src1, (i32 (V_MOV_B32_e32 (i32 0xffff0000))), (i32 (V_LSHRREV_B32_e64 (i32 16), SReg_32:$src0)))) >; -// TODO: Should source modifiers be matched to v_pack_b32_f16? def : GCNPat < (v2f16 (UniformBinFrag (f16 SReg_32:$src0), (f16 SReg_32:$src1))), (S_PACK_LL_B32_B16 SReg_32:$src0, SReg_32:$src1) From fcc0964ed43ed6430dec42ad89526463d08dc1d4 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Thu, 23 Dec 2021 11:55:38 +0100 Subject: [PATCH 133/293] Revert "[libc++] Remove unused headers from " This reverts commit 352945dd36c64fd68b4c73fcc393ee5828da639a. --- libcxx/include/filesystem | 23 +++++++++++++++++++ libcxx/src/filesystem/directory_iterator.cpp | 3 +-- libcxx/src/filesystem/filesystem_common.h | 1 - .../filesystems/convert_file_time.pass.cpp | 9 ++++---- .../directory_entry.obs/comparisons.pass.cpp | 4 ++-- .../class.directory_iterator/types.pass.cpp | 3 +-- .../filesystem_error.members.pass.cpp | 4 +--- .../path.member/path.assign/copy.pass.cpp | 3 +-- .../path.member/path.assign/move.pass.cpp | 3 +-- .../path.member/path.construct/copy.pass.cpp | 3 +-- .../path.member/path.construct/move.pass.cpp | 3 +-- .../filesystems/class.path/synop.pass.cpp | 3 +-- libcxx/test/support/filesystem_test_helper.h | 3 +-- 13 files changed, 38 insertions(+), 27 deletions(-) diff --git a/libcxx/include/filesystem b/libcxx/include/filesystem index 6203b9a123cc..09d90614aa0d 100644 --- a/libcxx/include/filesystem +++ b/libcxx/include/filesystem @@ -254,9 +254,32 @@ inline constexpr bool std::ranges::enable_view #include<__filesystem/space_info.h> #include<__filesystem/u8path.h> + +#include <__availability> +#include <__config> +#include <__debug> +#include <__ranges/enable_borrowed_range.h> +#include <__ranges/enable_view.h> +#include <__utility/forward.h> +#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +# include // for quoted +#endif + #if defined(_LIBCPP_HAS_NO_FILESYSTEM_LIBRARY) # error "The Filesystem library is not supported since libc++ has been configured with LIBCXX_ENABLE_FILESYSTEM disabled" #endif diff --git a/libcxx/src/filesystem/directory_iterator.cpp b/libcxx/src/filesystem/directory_iterator.cpp index 90b255d9877f..6219ceafd36e 100644 --- a/libcxx/src/filesystem/directory_iterator.cpp +++ b/libcxx/src/filesystem/directory_iterator.cpp @@ -6,9 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "__config" #include "filesystem" -#include "stack" +#include "__config" #if defined(_LIBCPP_WIN32API) #define WIN32_LEAN_AND_MEAN #define NOMINMAX diff --git a/libcxx/src/filesystem/filesystem_common.h b/libcxx/src/filesystem/filesystem_common.h index 717894e537d2..a2c340e61083 100644 --- a/libcxx/src/filesystem/filesystem_common.h +++ b/libcxx/src/filesystem/filesystem_common.h @@ -17,7 +17,6 @@ #include "cstdlib" #include "ctime" #include "filesystem" -#include "system_error" #if !defined(_LIBCPP_WIN32API) # include diff --git a/libcxx/test/libcxx/input.output/filesystems/convert_file_time.pass.cpp b/libcxx/test/libcxx/input.output/filesystems/convert_file_time.pass.cpp index bbeaedd51f22..f93737c2372d 100644 --- a/libcxx/test/libcxx/input.output/filesystems/convert_file_time.pass.cpp +++ b/libcxx/test/libcxx/input.output/filesystems/convert_file_time.pass.cpp @@ -14,13 +14,12 @@ // ADDITIONAL_COMPILE_FLAGS: -I %S/../../../../src/filesystem -#include -#include -#include #include -#include -#include +#include #include +#include +#include +#include #include "filesystem_common.h" diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/comparisons.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/comparisons.pass.cpp index f7e5770a7cd7..e71d4e24664a 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/comparisons.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/comparisons.pass.cpp @@ -19,10 +19,10 @@ // bool operator> (directory_entry const&) const noexcept; // bool operator>=(directory_entry const&) const noexcept; + #include "filesystem_include.h" -#include #include -#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/types.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/types.pass.cpp index 403d1ae338dd..97b5d23d9dbb 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/types.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/types.pass.cpp @@ -19,9 +19,8 @@ // typedef ... iterator_category #include "filesystem_include.h" -#include -#include #include +#include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/filesystems/class.filesystem_error/filesystem_error.members.pass.cpp b/libcxx/test/std/input.output/filesystems/class.filesystem_error/filesystem_error.members.pass.cpp index 8fd23b60081b..3c0ae5d0886c 100644 --- a/libcxx/test/std/input.output/filesystems/class.filesystem_error/filesystem_error.members.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.filesystem_error/filesystem_error.members.pass.cpp @@ -21,10 +21,8 @@ // const path& path2() const noexcept; #include "filesystem_include.h" -#include -#include -#include #include +#include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/copy.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/copy.pass.cpp index 9044b2f5a45e..a7182557d3f7 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/copy.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/copy.pass.cpp @@ -15,9 +15,8 @@ // path& operator=(path const&); #include "filesystem_include.h" -#include -#include #include +#include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp index 0f3e4745fd53..69dff085e58f 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp @@ -17,9 +17,8 @@ // path& operator=(path&&) noexcept #include "filesystem_include.h" -#include -#include #include +#include #include "test_macros.h" #include "count_new.h" diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/copy.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/copy.pass.cpp index 1155b7c1b3b5..31d00ffd2cd3 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/copy.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/copy.pass.cpp @@ -15,9 +15,8 @@ // path(path const&) #include "filesystem_include.h" -#include -#include #include +#include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp index 9f575ba61329..40a8c54f802d 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp @@ -16,9 +16,8 @@ // path(path&&) noexcept #include "filesystem_include.h" -#include -#include #include +#include #include "test_macros.h" #include "count_new.h" diff --git a/libcxx/test/std/input.output/filesystems/class.path/synop.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/synop.pass.cpp index ab18399fd5d6..e9e0a990165a 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/synop.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/synop.pass.cpp @@ -17,9 +17,8 @@ // static constexpr value_type preferred_separator = ...; #include "filesystem_include.h" -#include -#include #include +#include #include "test_macros.h" diff --git a/libcxx/test/support/filesystem_test_helper.h b/libcxx/test/support/filesystem_test_helper.h index 44a01645dc49..00f4701e881a 100644 --- a/libcxx/test/support/filesystem_test_helper.h +++ b/libcxx/test/support/filesystem_test_helper.h @@ -14,10 +14,9 @@ #endif #include -#include #include // for printf #include -#include +#include #include #include "make_string.h" From 1d50cf98b56fd17e675eb990572befbc440bceab Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Thu, 23 Dec 2021 12:03:36 +0100 Subject: [PATCH 134/293] [libc++] IWYU in tests Add headers in `` tests that were transitively included through `` Reviewed as part of D116146 --- libcxx/src/filesystem/directory_iterator.cpp | 3 ++- libcxx/src/filesystem/filesystem_common.h | 1 + .../input.output/filesystems/convert_file_time.pass.cpp | 9 +++++---- .../directory_entry.obs/comparisons.pass.cpp | 4 ++-- .../filesystems/class.directory_iterator/types.pass.cpp | 3 ++- .../filesystem_error.members.pass.cpp | 4 +++- .../class.path/path.member/path.assign/copy.pass.cpp | 3 ++- .../class.path/path.member/path.assign/move.pass.cpp | 3 ++- .../class.path/path.member/path.construct/copy.pass.cpp | 3 ++- .../class.path/path.member/path.construct/move.pass.cpp | 3 ++- .../input.output/filesystems/class.path/synop.pass.cpp | 3 ++- libcxx/test/support/filesystem_test_helper.h | 3 ++- 12 files changed, 27 insertions(+), 15 deletions(-) diff --git a/libcxx/src/filesystem/directory_iterator.cpp b/libcxx/src/filesystem/directory_iterator.cpp index 6219ceafd36e..90b255d9877f 100644 --- a/libcxx/src/filesystem/directory_iterator.cpp +++ b/libcxx/src/filesystem/directory_iterator.cpp @@ -6,8 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "filesystem" #include "__config" +#include "filesystem" +#include "stack" #if defined(_LIBCPP_WIN32API) #define WIN32_LEAN_AND_MEAN #define NOMINMAX diff --git a/libcxx/src/filesystem/filesystem_common.h b/libcxx/src/filesystem/filesystem_common.h index a2c340e61083..717894e537d2 100644 --- a/libcxx/src/filesystem/filesystem_common.h +++ b/libcxx/src/filesystem/filesystem_common.h @@ -17,6 +17,7 @@ #include "cstdlib" #include "ctime" #include "filesystem" +#include "system_error" #if !defined(_LIBCPP_WIN32API) # include diff --git a/libcxx/test/libcxx/input.output/filesystems/convert_file_time.pass.cpp b/libcxx/test/libcxx/input.output/filesystems/convert_file_time.pass.cpp index f93737c2372d..bbeaedd51f22 100644 --- a/libcxx/test/libcxx/input.output/filesystems/convert_file_time.pass.cpp +++ b/libcxx/test/libcxx/input.output/filesystems/convert_file_time.pass.cpp @@ -14,12 +14,13 @@ // ADDITIONAL_COMPILE_FLAGS: -I %S/../../../../src/filesystem -#include +#include #include -#include -#include #include -#include +#include +#include +#include +#include #include "filesystem_common.h" diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/comparisons.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/comparisons.pass.cpp index e71d4e24664a..f7e5770a7cd7 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/comparisons.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/comparisons.pass.cpp @@ -19,10 +19,10 @@ // bool operator> (directory_entry const&) const noexcept; // bool operator>=(directory_entry const&) const noexcept; - #include "filesystem_include.h" -#include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/types.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/types.pass.cpp index 97b5d23d9dbb..403d1ae338dd 100644 --- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/types.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/types.pass.cpp @@ -19,8 +19,9 @@ // typedef ... iterator_category #include "filesystem_include.h" -#include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/filesystems/class.filesystem_error/filesystem_error.members.pass.cpp b/libcxx/test/std/input.output/filesystems/class.filesystem_error/filesystem_error.members.pass.cpp index 3c0ae5d0886c..8fd23b60081b 100644 --- a/libcxx/test/std/input.output/filesystems/class.filesystem_error/filesystem_error.members.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.filesystem_error/filesystem_error.members.pass.cpp @@ -21,8 +21,10 @@ // const path& path2() const noexcept; #include "filesystem_include.h" -#include #include +#include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/copy.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/copy.pass.cpp index a7182557d3f7..9044b2f5a45e 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/copy.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/copy.pass.cpp @@ -15,8 +15,9 @@ // path& operator=(path const&); #include "filesystem_include.h" -#include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp index 69dff085e58f..0f3e4745fd53 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.assign/move.pass.cpp @@ -17,8 +17,9 @@ // path& operator=(path&&) noexcept #include "filesystem_include.h" -#include #include +#include +#include #include "test_macros.h" #include "count_new.h" diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/copy.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/copy.pass.cpp index 31d00ffd2cd3..1155b7c1b3b5 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/copy.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/copy.pass.cpp @@ -15,8 +15,9 @@ // path(path const&) #include "filesystem_include.h" -#include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp index 40a8c54f802d..9f575ba61329 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.construct/move.pass.cpp @@ -16,8 +16,9 @@ // path(path&&) noexcept #include "filesystem_include.h" -#include #include +#include +#include #include "test_macros.h" #include "count_new.h" diff --git a/libcxx/test/std/input.output/filesystems/class.path/synop.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/synop.pass.cpp index e9e0a990165a..ab18399fd5d6 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/synop.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/synop.pass.cpp @@ -17,8 +17,9 @@ // static constexpr value_type preferred_separator = ...; #include "filesystem_include.h" -#include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/support/filesystem_test_helper.h b/libcxx/test/support/filesystem_test_helper.h index 00f4701e881a..44a01645dc49 100644 --- a/libcxx/test/support/filesystem_test_helper.h +++ b/libcxx/test/support/filesystem_test_helper.h @@ -14,9 +14,10 @@ #endif #include +#include #include // for printf #include -#include +#include #include #include "make_string.h" From 53f0538181fd62f2a5efa9c73589f75e80454a8e Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 23 Dec 2021 13:00:46 +0100 Subject: [PATCH 135/293] [CodeGen] Use correct element type for store to sret sret is special in that it does not use the memory type representation. Manually construct the LValue using ConvertType instead of ConvertTypeForMem here. This fixes matrix-lowering-opt-levels.c on s390x. --- clang/lib/CodeGen/CGCall.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index b202326cf757..d70f78fea6b4 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -3469,12 +3469,19 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI, case TEK_Aggregate: // Do nothing; aggregrates get evaluated directly into the destination. break; - case TEK_Scalar: - EmitStoreOfScalar(Builder.CreateLoad(ReturnValue), - MakeNaturalAlignAddrLValue(&*AI, RetTy), - /*isInit*/ true); + case TEK_Scalar: { + LValueBaseInfo BaseInfo; + TBAAAccessInfo TBAAInfo; + CharUnits Alignment = + CGM.getNaturalTypeAlignment(RetTy, &BaseInfo, &TBAAInfo); + Address ArgAddr(&*AI, ConvertType(RetTy), Alignment); + LValue ArgVal = + LValue::MakeAddr(ArgAddr, RetTy, getContext(), BaseInfo, TBAAInfo); + EmitStoreOfScalar( + Builder.CreateLoad(ReturnValue), ArgVal, /*isInit*/ true); break; } + } break; } From 23f1cd9e635757edd7a7877a80a36f5f9e53ae28 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Thu, 23 Dec 2021 12:21:00 +0100 Subject: [PATCH 136/293] [libc++] Remove unused headers from Remove unused headers from `` Reviewed By: Quuxplusone, #libc, Mordante Spies: Mordante, libcxx-commits Differential Revision: https://reviews.llvm.org/D116146 --- libcxx/include/filesystem | 54 ++++++++++++--------------------------- 1 file changed, 16 insertions(+), 38 deletions(-) diff --git a/libcxx/include/filesystem b/libcxx/include/filesystem index 09d90614aa0d..9f5b42747b34 100644 --- a/libcxx/include/filesystem +++ b/libcxx/include/filesystem @@ -238,48 +238,26 @@ inline constexpr bool std::ranges::enable_view -#include<__filesystem/directory_entry.h> -#include<__filesystem/directory_iterator.h> -#include<__filesystem/directory_options.h> -#include<__filesystem/file_status.h> -#include<__filesystem/file_time_type.h> -#include<__filesystem/file_type.h> -#include<__filesystem/filesystem_error.h> -#include<__filesystem/operations.h> -#include<__filesystem/path_iterator.h> -#include<__filesystem/path.h> -#include<__filesystem/perm_options.h> -#include<__filesystem/perms.h> -#include<__filesystem/recursive_directory_iterator.h> -#include<__filesystem/space_info.h> -#include<__filesystem/u8path.h> - -#include <__availability> #include <__config> -#include <__debug> -#include <__ranges/enable_borrowed_range.h> -#include <__ranges/enable_view.h> -#include <__utility/forward.h> -#include +#include <__filesystem/copy_options.h> +#include <__filesystem/directory_entry.h> +#include <__filesystem/directory_iterator.h> +#include <__filesystem/directory_options.h> +#include <__filesystem/file_status.h> +#include <__filesystem/file_time_type.h> +#include <__filesystem/file_type.h> +#include <__filesystem/filesystem_error.h> +#include <__filesystem/operations.h> +#include <__filesystem/path_iterator.h> +#include <__filesystem/path.h> +#include <__filesystem/perm_options.h> +#include <__filesystem/perms.h> +#include <__filesystem/recursive_directory_iterator.h> +#include <__filesystem/space_info.h> +#include <__filesystem/u8path.h> #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) -# include -# include // for quoted -#endif - #if defined(_LIBCPP_HAS_NO_FILESYSTEM_LIBRARY) # error "The Filesystem library is not supported since libc++ has been configured with LIBCXX_ENABLE_FILESYSTEM disabled" #endif From 368318bcce66d9fef420fc34cca361b79d80cee5 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Thu, 23 Dec 2021 20:47:28 +0800 Subject: [PATCH 137/293] [C++20] [Coroutines] Mark imported module as imported if not exported In C++20 Modules, imported module which doesn't get exported wouldn't be recorded. This patch would record such modules to avoid possible incorrect visibility problems. Reviewed By: urnathan Differential Revision: https://reviews.llvm.org/D116098 --- clang/lib/Sema/SemaModule.cpp | 9 ++++++++- clang/test/Modules/module-transtive-instantiation.cpp | 6 ++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp index f497199badc1..a4b9f3c242c1 100644 --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -383,11 +383,18 @@ DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc, if (!ModuleScopes.empty()) Context.addModuleInitializer(ModuleScopes.back().Module, Import); - // Re-export the module if needed. if (!ModuleScopes.empty() && ModuleScopes.back().ModuleInterface) { + // Re-export the module if the imported module is exported. + // Note that we don't need to add re-exported module to Imports field + // since `Exports` implies the module is imported already. if (ExportLoc.isValid() || getEnclosingExportDecl(Import)) getCurrentModule()->Exports.emplace_back(Mod, false); + else + getCurrentModule()->Imports.insert(Mod); } else if (ExportLoc.isValid()) { + // [module.interface]p1: + // An export-declaration shall inhabit a namespace scope and appear in the + // purview of a module interface unit. Diag(ExportLoc, diag::err_export_not_in_module_interface); } diff --git a/clang/test/Modules/module-transtive-instantiation.cpp b/clang/test/Modules/module-transtive-instantiation.cpp index 2c5c7ead8ad5..b44f0bbfdf39 100644 --- a/clang/test/Modules/module-transtive-instantiation.cpp +++ b/clang/test/Modules/module-transtive-instantiation.cpp @@ -3,11 +3,9 @@ // RUN: %clang_cc1 -std=c++20 %S/Inputs/module-transtive-instantiation/Templ.cppm -emit-module-interface -o %t/Templ.pcm // RUN: %clang_cc1 -std=c++20 %S/Inputs/module-transtive-instantiation/bar.cppm -emit-module-interface -fprebuilt-module-path=%t -o %t/bar.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %s -fsyntax-only -verify +// expected-no-diagnostics import bar; int foo() { - // FIXME: It shouldn't be an error. Since the `G` is already imported in bar. - return bar(); // expected-error@Inputs/module-transtive-instantiation/bar.cppm:5 {{definition of 'G' must be imported from module 'Templ' before it is required}} - // expected-note@-1 {{in instantiation of function template specialization 'bar' requested here}} - // expected-note@Inputs/module-transtive-instantiation/Templ.cppm:3 {{definition here is not reachable}} + return bar(); } From 1e2c31c66be79b6ca6aeb42fc2835017935b8b27 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Thu, 23 Dec 2021 21:10:07 +0800 Subject: [PATCH 138/293] Revert "[C++20] [Coroutines] Mark imported module as imported if not exported" This reverts commit 368318bcce66d9fef420fc34cca361b79d80cee5. The title is not right. It should be a patch about modules instead of coroutines. --- clang/lib/Sema/SemaModule.cpp | 9 +-------- clang/test/Modules/module-transtive-instantiation.cpp | 6 ++++-- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp index a4b9f3c242c1..f497199badc1 100644 --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -383,18 +383,11 @@ DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc, if (!ModuleScopes.empty()) Context.addModuleInitializer(ModuleScopes.back().Module, Import); + // Re-export the module if needed. if (!ModuleScopes.empty() && ModuleScopes.back().ModuleInterface) { - // Re-export the module if the imported module is exported. - // Note that we don't need to add re-exported module to Imports field - // since `Exports` implies the module is imported already. if (ExportLoc.isValid() || getEnclosingExportDecl(Import)) getCurrentModule()->Exports.emplace_back(Mod, false); - else - getCurrentModule()->Imports.insert(Mod); } else if (ExportLoc.isValid()) { - // [module.interface]p1: - // An export-declaration shall inhabit a namespace scope and appear in the - // purview of a module interface unit. Diag(ExportLoc, diag::err_export_not_in_module_interface); } diff --git a/clang/test/Modules/module-transtive-instantiation.cpp b/clang/test/Modules/module-transtive-instantiation.cpp index b44f0bbfdf39..2c5c7ead8ad5 100644 --- a/clang/test/Modules/module-transtive-instantiation.cpp +++ b/clang/test/Modules/module-transtive-instantiation.cpp @@ -3,9 +3,11 @@ // RUN: %clang_cc1 -std=c++20 %S/Inputs/module-transtive-instantiation/Templ.cppm -emit-module-interface -o %t/Templ.pcm // RUN: %clang_cc1 -std=c++20 %S/Inputs/module-transtive-instantiation/bar.cppm -emit-module-interface -fprebuilt-module-path=%t -o %t/bar.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %s -fsyntax-only -verify -// expected-no-diagnostics import bar; int foo() { - return bar(); + // FIXME: It shouldn't be an error. Since the `G` is already imported in bar. + return bar(); // expected-error@Inputs/module-transtive-instantiation/bar.cppm:5 {{definition of 'G' must be imported from module 'Templ' before it is required}} + // expected-note@-1 {{in instantiation of function template specialization 'bar' requested here}} + // expected-note@Inputs/module-transtive-instantiation/Templ.cppm:3 {{definition here is not reachable}} } From 2810c3403e421560ad3e97053ca7227f81596daf Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Thu, 23 Dec 2021 14:12:34 +0100 Subject: [PATCH 139/293] [VE] Add VE support to update_llc_test_checks Add VE assembly scrubbing and triple support to update_llc_test_checks. Reviewed By: kaz7 Differential Revision: https://reviews.llvm.org/D116104 --- llvm/utils/UpdateTestChecks/asm.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/llvm/utils/UpdateTestChecks/asm.py b/llvm/utils/UpdateTestChecks/asm.py index a877fea8223e..f876b6649df5 100644 --- a/llvm/utils/UpdateTestChecks/asm.py +++ b/llvm/utils/UpdateTestChecks/asm.py @@ -166,6 +166,12 @@ class string: r'^\s*(\.Lfunc_end[0-9]+:\n|end_function)', flags=(re.M | re.S)) +ASM_FUNCTION_VE_RE = re.compile( + r'^_?(?P[^:]+):[ \t]*#+[ \t]*@(?P=func)\n' + r'(?P^##?[ \t]+[^:]+:.*?)\s*' + r'.Lfunc_end[0-9]+:\n', + flags=(re.M | re.S)) + SCRUB_X86_SHUFFLES_RE = ( re.compile( r'^(\s*\w+) [^#\n]+#+ ((?:[xyz]mm\d+|mem)( \{%k\d+\}( \{z\})?)? = .*)$', @@ -354,6 +360,16 @@ def scrub_asm_wasm32(asm, args): asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm) return asm +def scrub_asm_ve(asm, args): + # Scrub runs of whitespace out of the assembly, but leave the leading + # whitespace in place. + asm = common.SCRUB_WHITESPACE_RE.sub(r' ', asm) + # Expand the tabs used for indentation. + asm = string.expandtabs(asm, 2) + # Strip trailing whitespace. + asm = common.SCRUB_TRAILING_WHITESPACE_RE.sub(r'', asm) + return asm + def get_triple_from_march(march): triples = { 'amdgcn': 'amdgcn', @@ -361,6 +377,7 @@ def get_triple_from_march(march): 'mips': 'mips', 'sparc': 'sparc', 'hexagon': 'hexagon', + 've': 've', } for prefix, triple in triples.items(): if march.startswith(prefix): @@ -404,6 +421,7 @@ def get_run_handler(triple): 'sparc': (scrub_asm_sparc, ASM_FUNCTION_SPARC_RE), 's390x': (scrub_asm_systemz, ASM_FUNCTION_SYSTEMZ_RE), 'wasm32': (scrub_asm_wasm32, ASM_FUNCTION_WASM32_RE), + 've': (scrub_asm_ve, ASM_FUNCTION_VE_RE), } handler = None best_prefix = '' From 86b001a94172ac625f17f381c971dad3b9367278 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Thu, 23 Dec 2021 21:12:28 +0800 Subject: [PATCH 140/293] [C++20] [Modules] Mark imported module as imported if not exported In C++20 Modules, imported module which doesn't get exported wouldn't be recorded. This patch would record such modules to avoid possible incorrect visibility problems. Reviewed By: urnathan Differential Revision: https://reviews.llvm.org/D116098 --- clang/lib/Sema/SemaModule.cpp | 9 ++++++++- clang/test/Modules/module-transtive-instantiation.cpp | 6 ++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp index f497199badc1..a4b9f3c242c1 100644 --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -383,11 +383,18 @@ DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc, if (!ModuleScopes.empty()) Context.addModuleInitializer(ModuleScopes.back().Module, Import); - // Re-export the module if needed. if (!ModuleScopes.empty() && ModuleScopes.back().ModuleInterface) { + // Re-export the module if the imported module is exported. + // Note that we don't need to add re-exported module to Imports field + // since `Exports` implies the module is imported already. if (ExportLoc.isValid() || getEnclosingExportDecl(Import)) getCurrentModule()->Exports.emplace_back(Mod, false); + else + getCurrentModule()->Imports.insert(Mod); } else if (ExportLoc.isValid()) { + // [module.interface]p1: + // An export-declaration shall inhabit a namespace scope and appear in the + // purview of a module interface unit. Diag(ExportLoc, diag::err_export_not_in_module_interface); } diff --git a/clang/test/Modules/module-transtive-instantiation.cpp b/clang/test/Modules/module-transtive-instantiation.cpp index 2c5c7ead8ad5..b44f0bbfdf39 100644 --- a/clang/test/Modules/module-transtive-instantiation.cpp +++ b/clang/test/Modules/module-transtive-instantiation.cpp @@ -3,11 +3,9 @@ // RUN: %clang_cc1 -std=c++20 %S/Inputs/module-transtive-instantiation/Templ.cppm -emit-module-interface -o %t/Templ.pcm // RUN: %clang_cc1 -std=c++20 %S/Inputs/module-transtive-instantiation/bar.cppm -emit-module-interface -fprebuilt-module-path=%t -o %t/bar.pcm // RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %s -fsyntax-only -verify +// expected-no-diagnostics import bar; int foo() { - // FIXME: It shouldn't be an error. Since the `G` is already imported in bar. - return bar(); // expected-error@Inputs/module-transtive-instantiation/bar.cppm:5 {{definition of 'G' must be imported from module 'Templ' before it is required}} - // expected-note@-1 {{in instantiation of function template specialization 'bar' requested here}} - // expected-note@Inputs/module-transtive-instantiation/Templ.cppm:3 {{definition here is not reachable}} + return bar(); } From 3d36da77020b3770e59f829db2064dfa3c0c48f8 Mon Sep 17 00:00:00 2001 From: Simon Moll Date: Thu, 23 Dec 2021 14:14:29 +0100 Subject: [PATCH 141/293] [VE] Add manuals to CompilerWriterInfo Reference VE manual and documentations in CompilerWriterInfo. Reviewed By: kaz7 Differential Revision: https://reviews.llvm.org/D116212 --- llvm/docs/CompilerWriterInfo.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llvm/docs/CompilerWriterInfo.rst b/llvm/docs/CompilerWriterInfo.rst index 9184b852539d..0b02b40551a6 100644 --- a/llvm/docs/CompilerWriterInfo.rst +++ b/llvm/docs/CompilerWriterInfo.rst @@ -119,6 +119,12 @@ SystemZ * `z/Architecture Principles of Operation (registration required, free sign-up) `_ +VE +-- + +* `NEC SX-Aurora TSUBASA ISA Guide `_ +* `NEC SX-Aurora TSUBASA manuals and documentation `_ + X86 --- From d2863088ab8996169f1f4ff22b59066200c856e5 Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Thu, 23 Dec 2021 12:39:38 +0100 Subject: [PATCH 142/293] GlobalISel: Regen vector mir tests, add tests for vector arg lowering Precommit for D114198 (Rework more/fewer elements for vectors). Regenerate auto-generated mir tests for vectors (use CHECK-NEXT instead of CHECK). Remove -global-isel-abort=0 where it is no longer needed. Add mir tests for different AMDGPU sub-targets and they way they lower function vector arguments (tests for legalization artifact combiner). --- .../GlobalISel/call-lowering-vectors.ll | 69 +- .../AArch64/GlobalISel/legalize-saddsat.mir | 259 +- .../GlobalISel/legalize-shuffle-vector.mir | 209 +- .../AArch64/GlobalISel/legalize-ssubsat.mir | 259 +- .../artifact-combiner-unmerge-values.mir | 2 +- .../AMDGPU/GlobalISel/function-returns.ll | 2027 +++---- .../AMDGPU/GlobalISel/legalize-and.mir | 582 +- .../AMDGPU/GlobalISel/legalize-bitcast.mir | 3310 +++++------ .../GlobalISel/legalize-concat-vectors.mir | 2 +- .../legalize-extract-vector-elt.mir | 2 +- .../AMDGPU/GlobalISel/legalize-fabs.mir | 8 +- .../AMDGPU/GlobalISel/legalize-fadd.mir | 8 +- .../GlobalISel/legalize-fcanonicalize.mir | 8 +- .../AMDGPU/GlobalISel/legalize-fcos.mir | 8 +- .../AMDGPU/GlobalISel/legalize-fdiv.mir | 3984 ++++++------- .../AMDGPU/GlobalISel/legalize-fexp.mir | 270 +- .../AMDGPU/GlobalISel/legalize-ffloor.mir | 588 +- .../AMDGPU/GlobalISel/legalize-fma.mir | 994 ++-- .../AMDGPU/GlobalISel/legalize-fmaxnum.mir | 1144 ++-- .../AMDGPU/GlobalISel/legalize-fminnum.mir | 1144 ++-- .../AMDGPU/GlobalISel/legalize-fneg.mir | 8 +- .../AMDGPU/GlobalISel/legalize-fpext.mir | 2 +- .../AMDGPU/GlobalISel/legalize-fshl.mir | 1808 +++--- .../AMDGPU/GlobalISel/legalize-fshr.mir | 2510 ++++---- .../AMDGPU/GlobalISel/legalize-fsin.mir | 8 +- .../AMDGPU/GlobalISel/legalize-fsqrt.mir | 510 +- .../AMDGPU/GlobalISel/legalize-fsub.mir | 8 +- .../GlobalISel/legalize-implicit-def.mir | 152 +- .../GlobalISel/legalize-intrinsic-round.mir | 1840 +++--- .../AMDGPU/GlobalISel/legalize-inttoptr.mir | 2 +- .../legalize-llvm.amdgcn.image.load.2d.d16.ll | 2706 ++++----- .../legalize-llvm.amdgcn.image.load.2d.ll | 2 +- ...legalize-llvm.amdgcn.image.store.2d.d16.ll | 692 +-- .../legalize-llvm.amdgcn.s.buffer.load.mir | 184 +- .../legalize-load-constant-32bit.mir | 2 +- .../AMDGPU/GlobalISel/legalize-load-flat.mir | 8 +- .../CodeGen/AMDGPU/GlobalISel/legalize-or.mir | 516 +- .../AMDGPU/GlobalISel/legalize-phi.mir | 1678 +++--- .../AMDGPU/GlobalISel/legalize-saddo.mir | 530 +- .../AMDGPU/GlobalISel/legalize-saddsat.mir | 1710 +++--- .../AMDGPU/GlobalISel/legalize-sbfx.mir | 6 +- .../AMDGPU/GlobalISel/legalize-sdiv.mir | 8 +- .../AMDGPU/GlobalISel/legalize-sext-inreg.mir | 8 +- .../AMDGPU/GlobalISel/legalize-smax.mir | 610 +- .../AMDGPU/GlobalISel/legalize-smin.mir | 610 +- .../AMDGPU/GlobalISel/legalize-srem.mir | 8 +- .../AMDGPU/GlobalISel/legalize-sshlsat.mir | 1814 +++--- .../AMDGPU/GlobalISel/legalize-ssubo.mir | 530 +- .../AMDGPU/GlobalISel/legalize-ssubsat.mir | 1710 +++--- .../AMDGPU/GlobalISel/legalize-trunc.mir | 2 +- .../AMDGPU/GlobalISel/legalize-uadde.mir | 2 +- .../AMDGPU/GlobalISel/legalize-uaddo.mir | 414 +- .../AMDGPU/GlobalISel/legalize-uaddsat.mir | 1112 ++-- .../AMDGPU/GlobalISel/legalize-ubfx.mir | 6 +- .../AMDGPU/GlobalISel/legalize-umax.mir | 620 +- .../AMDGPU/GlobalISel/legalize-umin.mir | 620 +- .../AMDGPU/GlobalISel/legalize-umulh.mir | 1100 ++-- .../AMDGPU/GlobalISel/legalize-ushlsat.mir | 1442 ++--- .../AMDGPU/GlobalISel/legalize-usube.mir | 2 +- .../AMDGPU/GlobalISel/legalize-usubo.mir | 456 +- .../AMDGPU/GlobalISel/legalize-usubsat.mir | 1060 ++-- .../GlobalISel/legalize-vector-args-gfx7.mir | 355 ++ .../legalize-vector-args-gfx8-plus.mir | 531 ++ .../regbankselect-amdgcn.s.buffer.load.ll | 5066 +++++++++-------- .../AMDGPU/GlobalISel/regbankselect-load.mir | 384 +- ...gbankselect-split-scalar-load-metadata.mir | 32 +- 66 files changed, 24807 insertions(+), 23454 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx7.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll index bee323ad69d4..988de92eeacb 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll @@ -4,12 +4,13 @@ define i8 @v1s8_add(<1 x i8> %a0) { ; CHECK-LABEL: name: v1s8_add ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $d0 - ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<8 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK: $w0 = COPY [[ANYEXT]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<8 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %res = bitcast <1 x i8> %a0 to i8 ret i8 %res } @@ -17,16 +18,17 @@ define i8 @v1s8_add(<1 x i8> %a0) { define i24 @test_v3i8(<3 x i8> %a) { ; CHECK-LABEL: name: test_v3i8 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s24) = G_BITCAST [[TRUNC]](<3 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s24) - ; CHECK: $w0 = COPY [[ANYEXT]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: liveins: $w0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s24) = G_BITCAST [[TRUNC]](<3 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s24) + ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %res = bitcast <3 x i8> %a to i24 ret i24 %res } @@ -35,12 +37,13 @@ define i24 @test_v3i8(<3 x i8> %a) { define <1 x half> @test_v1s16(<1 x float> %x) { ; CHECK-LABEL: name: test_v1s16 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $d0 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[UV]](s32) - ; CHECK: $h0 = COPY [[FPTRUNC]](s16) - ; CHECK: RET_ReallyLR implicit $h0 + ; CHECK-NEXT: liveins: $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[UV]](s32) + ; CHECK-NEXT: $h0 = COPY [[FPTRUNC]](s16) + ; CHECK-NEXT: RET_ReallyLR implicit $h0 %tmp = fptrunc <1 x float> %x to <1 x half> ret <1 x half> %tmp } @@ -49,17 +52,17 @@ declare <3 x float> @bar(float) define void @test_return_v3f32() { ; CHECK-LABEL: name: test_return_v3f32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp - ; CHECK: $s0 = COPY [[DEF]](s32) - ; CHECK: BL @bar, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $s0, implicit-def $q0 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) - ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[BITCAST]](<4 x s32>), [[DEF1]](<4 x s32>), [[DEF1]](<4 x s32>) - ; CHECK: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: $s0 = COPY [[DEF]](s32) + ; CHECK-NEXT: BL @bar, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $s0, implicit-def $q0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[BITCAST]](<4 x s32>), [[DEF1]](<4 x s32>), [[DEF1]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp + ; CHECK-NEXT: RET_ReallyLR %call = call <3 x float> @bar(float undef) ret void } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir index de94098e2e9c..860b5f75b1f8 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir @@ -10,16 +10,17 @@ body: | liveins: $w0, $w1 ; CHECK-LABEL: name: s32 ; CHECK: liveins: $w0, $w1 - ; CHECK: %x:_(s32) = COPY $w0 - ; CHECK: %y:_(s32) = COPY $w1 - ; CHECK: [[SADDO:%[0-9]+]]:_(s32), [[SADDO1:%[0-9]+]]:_(s1) = G_SADDO %x, %y - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 - ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SADDO]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK: %saddsat:_(s32) = G_SELECT [[SADDO1]](s1), [[ADD]], [[SADDO]] - ; CHECK: $w0 = COPY %saddsat(s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: %y:_(s32) = COPY $w1 + ; CHECK-NEXT: [[SADDO:%[0-9]+]]:_(s32), [[SADDO1:%[0-9]+]]:_(s1) = G_SADDO %x, %y + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SADDO]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: %saddsat:_(s32) = G_SELECT [[SADDO1]](s1), [[ADD]], [[SADDO]] + ; CHECK-NEXT: $w0 = COPY %saddsat(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %x:_(s32) = COPY $w0 %y:_(s32) = COPY $w1 %saddsat:_(s32) = G_SADDSAT %x, %y @@ -35,16 +36,17 @@ body: | liveins: $x0, $x1 ; CHECK-LABEL: name: s64 ; CHECK: liveins: $x0, $x1 - ; CHECK: %x:_(s64) = COPY $x0 - ; CHECK: %y:_(s64) = COPY $x1 - ; CHECK: [[SADDO:%[0-9]+]]:_(s64), [[SADDO1:%[0-9]+]]:_(s1) = G_SADDO %x, %y - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SADDO]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] - ; CHECK: %saddsat:_(s64) = G_SELECT [[SADDO1]](s1), [[ADD]], [[SADDO]] - ; CHECK: $x0 = COPY %saddsat(s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s64) = COPY $x0 + ; CHECK-NEXT: %y:_(s64) = COPY $x1 + ; CHECK-NEXT: [[SADDO:%[0-9]+]]:_(s64), [[SADDO1:%[0-9]+]]:_(s1) = G_SADDO %x, %y + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SADDO]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: %saddsat:_(s64) = G_SELECT [[SADDO1]](s1), [[ADD]], [[SADDO]] + ; CHECK-NEXT: $x0 = COPY %saddsat(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %x:_(s64) = COPY $x0 %y:_(s64) = COPY $x1 %saddsat:_(s64) = G_SADDSAT %x, %y @@ -61,22 +63,23 @@ body: | ; CHECK-LABEL: name: s16 ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: %copy_1:_(s32) = COPY $w0 - ; CHECK: %copy_2:_(s32) = COPY $w1 - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 16 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 16 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 - ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s32), [[SEXT_INREG2]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 - ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD1]], [[ADD]] - ; CHECK: $w0 = COPY [[SELECT]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy_1:_(s32) = COPY $w0 + ; CHECK-NEXT: %copy_2:_(s32) = COPY $w1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 16 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s32), [[SEXT_INREG2]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD1]], [[ADD]] + ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 %x:_(s16) = G_TRUNC %copy_1(s32) %copy_2:_(s32) = COPY $w1 @@ -96,22 +99,23 @@ body: | ; CHECK-LABEL: name: s1 ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: %copy_1:_(s32) = COPY $w0 - ; CHECK: %copy_2:_(s32) = COPY $w1 - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 1 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 1 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 1 - ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s32), [[SEXT_INREG2]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 1 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD1]], [[ADD]] - ; CHECK: $w0 = COPY [[SELECT]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy_1:_(s32) = COPY $w0 + ; CHECK-NEXT: %copy_2:_(s32) = COPY $w1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 1 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s32), [[SEXT_INREG2]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD1]], [[ADD]] + ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 %x:_(s1) = G_TRUNC %copy_1(s32) %copy_2:_(s32) = COPY $w1 @@ -131,22 +135,23 @@ body: | ; CHECK-LABEL: name: s3 ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: %copy_1:_(s32) = COPY $w0 - ; CHECK: %copy_2:_(s32) = COPY $w1 - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 3 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 3 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 3 - ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s32), [[SEXT_INREG2]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 3 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD1]], [[ADD]] - ; CHECK: $w0 = COPY [[SELECT]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy_1:_(s32) = COPY $w0 + ; CHECK-NEXT: %copy_2:_(s32) = COPY $w1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 3 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 3 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 3 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s32), [[SEXT_INREG2]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 3 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD1]], [[ADD]] + ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 %x:_(s3) = G_TRUNC %copy_1(s32) %copy_2:_(s32) = COPY $w1 @@ -166,23 +171,24 @@ body: | ; CHECK-LABEL: name: s36 ; CHECK: liveins: $x0, $x1 - ; CHECK: %copy_1:_(s64) = COPY $x0 - ; CHECK: %copy_2:_(s64) = COPY $x1 - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG %copy_1, 36 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG %copy_2, 36 - ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[SEXT_INREG]], [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ADD]], 36 - ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s64), [[SEXT_INREG2]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 35 - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ADD]], 36 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG3]], [[COPY]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738368 - ; CHECK: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] - ; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC]](s1), [[ADD1]], [[ADD]] - ; CHECK: $x0 = COPY [[SELECT]](s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy_1:_(s64) = COPY $x0 + ; CHECK-NEXT: %copy_2:_(s64) = COPY $x1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG %copy_1, 36 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG %copy_2, 36 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ADD]], 36 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[ADD]](s64), [[SEXT_INREG2]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 35 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s64) = G_SEXT_INREG [[ADD]], 36 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG3]], [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738368 + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC]](s1), [[ADD1]], [[ADD]] + ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy_1:_(s64) = COPY $x0 %x:_(s36) = G_TRUNC %copy_1(s64) %copy_2:_(s64) = COPY $x1 @@ -201,47 +207,48 @@ body: | liveins: $q0, $q1, $x0 ; CHECK-LABEL: name: s88 ; CHECK: liveins: $q0, $q1, $x0 - ; CHECK: %copy_1:_(s128) = COPY $q0 - ; CHECK: %copy_2:_(s128) = COPY $q1 - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT %copy_1(s128), 0 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %copy_1(s128) - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT %copy_2(s128), 0 - ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %copy_2(s128) - ; CHECK: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[EXTRACT]], [[EXTRACT1]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 24 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 24 - ; CHECK: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[SEXT_INREG]], [[SEXT_INREG1]], [[UADDO1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UADDE]], 24 - ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UADDE]](s32), [[SEXT_INREG2]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDO]](s64) - ; CHECK: [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDE]](s32) - ; CHECK: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) - ; CHECK: [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) - ; CHECK: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8) - ; CHECK: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8) - ; CHECK: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[DEF]](s8) - ; CHECK: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV20]](s8) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 - ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV2]](s32), [[MV3]](s32) - ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV5]], [[C1]](s64) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV4]], [[C2]](s64) - ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]] - ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[OR]], [[C1]](s64) - ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 - ; CHECK: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C3]](s64) - ; CHECK: [[UADDO2:%[0-9]+]]:_(s64), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[ASHR1]], [[C]] - ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC2]](s1), [[UADDO2]], [[MV6]] - ; CHECK: $x0 = COPY [[SELECT]](s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy_1:_(s128) = COPY $q0 + ; CHECK-NEXT: %copy_2:_(s128) = COPY $q1 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT %copy_1(s128), 0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %copy_1(s128) + ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT %copy_2(s128), 0 + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %copy_2(s128) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[EXTRACT]], [[EXTRACT1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 24 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 24 + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[SEXT_INREG]], [[SEXT_INREG1]], [[UADDO1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UADDE]], 24 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[UADDE]](s32), [[SEXT_INREG2]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDO]](s64) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDE]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) + ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV20]](s8) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV2]](s32), [[MV3]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV5]], [[C1]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV4]], [[C2]](s64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]] + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[OR]], [[C1]](s64) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 + ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C3]](s64) + ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(s64), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[ASHR1]], [[C]] + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC2]](s1), [[UADDO2]], [[MV6]] + ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy_1:_(s128) = COPY $q0 %x:_(s88) = G_TRUNC %copy_1(s128) %copy_2:_(s128) = COPY $q1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir index 9b5bfc13d50b..6ea81e73fc7e 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir @@ -10,11 +10,12 @@ body: | ; CHECK-LABEL: name: shuffle_v4i32 ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(0, 0, 0, 0) - ; CHECK: $q0 = COPY [[SHUF]](<4 x s32>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(0, 0, 0, 0) + ; CHECK-NEXT: $q0 = COPY [[SHUF]](<4 x s32>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<4 x s32>) = COPY $q0 %1:_(<4 x s32>) = COPY $q1 %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 0, 0, 0) @@ -32,11 +33,12 @@ body: | ; CHECK-LABEL: name: shuffle_v2i64 ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 - ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s64>), [[COPY1]], shufflemask(0, 0) - ; CHECK: $q0 = COPY [[SHUF]](<2 x s64>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY]](<2 x s64>), [[COPY1]], shufflemask(0, 0) + ; CHECK-NEXT: $q0 = COPY [[SHUF]](<2 x s64>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<2 x s64>) = COPY $q0 %1:_(<2 x s64>) = COPY $q1 %2:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(<2 x s64>), %1, shufflemask(0, 0) @@ -54,11 +56,12 @@ body: | ; CHECK-LABEL: name: shuffle_v2p0 ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $q1 - ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x p0>) = G_SHUFFLE_VECTOR [[COPY]](<2 x p0>), [[COPY1]], shufflemask(0, 0) - ; CHECK: $q0 = COPY [[SHUF]](<2 x p0>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $q1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x p0>) = G_SHUFFLE_VECTOR [[COPY]](<2 x p0>), [[COPY1]], shufflemask(0, 0) + ; CHECK-NEXT: $q0 = COPY [[SHUF]](<2 x p0>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<2 x p0>) = COPY $q0 %1:_(<2 x p0>) = COPY $q1 %2:_(<2 x p0>) = G_SHUFFLE_VECTOR %0(<2 x p0>), %1, shufflemask(0, 0) @@ -76,11 +79,12 @@ body: | ; CHECK-LABEL: name: shuffle_v16i8 ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1 - ; CHECK: [[SHUF:%[0-9]+]]:_(<16 x s8>) = G_SHUFFLE_VECTOR [[COPY]](<16 x s8>), [[COPY1]], shufflemask(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) - ; CHECK: $q0 = COPY [[SHUF]](<16 x s8>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s8>) = G_SHUFFLE_VECTOR [[COPY]](<16 x s8>), [[COPY1]], shufflemask(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) + ; CHECK-NEXT: $q0 = COPY [[SHUF]](<16 x s8>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<16 x s8>) = COPY $q0 %1:_(<16 x s8>) = COPY $q1 %2:_(<16 x s8>) = G_SHUFFLE_VECTOR %0(<16 x s8>), %1, shufflemask(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) @@ -98,11 +102,12 @@ body: | ; CHECK-LABEL: name: shuffle_v8i16 ; CHECK: liveins: $q0, $q1 - ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1 - ; CHECK: [[SHUF:%[0-9]+]]:_(<8 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<8 x s16>), [[COPY1]], shufflemask(0, 0, 0, 0, 0, 0, 0, 0) - ; CHECK: $q0 = COPY [[SHUF]](<8 x s16>) - ; CHECK: RET_ReallyLR implicit $q0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s16>) = G_SHUFFLE_VECTOR [[COPY]](<8 x s16>), [[COPY1]], shufflemask(0, 0, 0, 0, 0, 0, 0, 0) + ; CHECK-NEXT: $q0 = COPY [[SHUF]](<8 x s16>) + ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<8 x s16>) = COPY $q0 %1:_(<8 x s16>) = COPY $q1 %2:_(<8 x s16>) = G_SHUFFLE_VECTOR %0(<8 x s16>), %1, shufflemask(0, 0, 0, 0, 0, 0, 0, 0) @@ -120,13 +125,14 @@ body: | ; CHECK-LABEL: name: shuffle_1elt_mask ; CHECK: liveins: $d0, $d1 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $d1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) - ; CHECK: $d0 = COPY [[COPY2]](s64) - ; CHECK: $d1 = COPY [[COPY3]](s64) - ; CHECK: RET_ReallyLR implicit $d0, implicit $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $d1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: $d0 = COPY [[COPY2]](s64) + ; CHECK-NEXT: $d1 = COPY [[COPY3]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $d0, implicit $d1 %0:_(s64) = COPY $d0 %1:_(s64) = COPY $d1 %3:_(s64) = G_SHUFFLE_VECTOR %0:_(s64), %1:_, shufflemask(0) @@ -146,18 +152,19 @@ body: | ; CHECK-LABEL: name: oversize_shuffle_v4i64 ; CHECK: liveins: $q0, $q1, $q2, $q3, $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 - ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 - ; CHECK: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3 - ; CHECK: [[COPY4:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY1]](<2 x s64>), [[COPY2]], shufflemask(1, 2) - ; CHECK: [[SHUF1:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY3]](<2 x s64>), [[COPY]], shufflemask(1, 2) - ; CHECK: G_STORE [[SHUF]](<2 x s64>), [[COPY4]](p0) :: (store (<2 x s64>), align 32) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s64) - ; CHECK: G_STORE [[SHUF1]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $q1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $q2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s64>) = COPY $q3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY1]](<2 x s64>), [[COPY2]], shufflemask(1, 2) + ; CHECK-NEXT: [[SHUF1:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[COPY3]](<2 x s64>), [[COPY]], shufflemask(1, 2) + ; CHECK-NEXT: G_STORE [[SHUF]](<2 x s64>), [[COPY4]](p0) :: (store (<2 x s64>), align 32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C]](s64) + ; CHECK-NEXT: G_STORE [[SHUF1]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16) + ; CHECK-NEXT: RET_ReallyLR %3:_(<2 x s64>) = COPY $q0 %4:_(<2 x s64>) = COPY $q1 %0:_(<4 x s64>) = G_CONCAT_VECTORS %3(<2 x s64>), %4(<2 x s64>) @@ -180,26 +187,27 @@ body: | ; CHECK-LABEL: name: oversize_shuffle_v8i32_build_vector ; CHECK: liveins: $q0, $q1, $q2, $q3, $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 - ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2 - ; CHECK: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $q3 - ; CHECK: [[COPY4:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C1]](s64) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<4 x s32>), [[C2]](s64) - ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CHECK: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY3]](<4 x s32>), [[C3]](s64) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32) - ; CHECK: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY1]](<4 x s32>), [[COPY]], shufflemask(2, 6, 5, 3) - ; CHECK: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](p0) :: (store (<4 x s32>), align 32) - ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C4]](s64) - ; CHECK: G_STORE [[SHUF]](<4 x s32>), [[PTR_ADD]](p0) :: (store (<4 x s32>) into unknown-address + 16) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $q2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $q3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY1]](<4 x s32>), [[C1]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY2]](<4 x s32>), [[C2]](s64) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY3]](<4 x s32>), [[C3]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[EVEC]](s32), [[EVEC1]](s32), [[EVEC2]](s32), [[EVEC3]](s32) + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY1]](<4 x s32>), [[COPY]], shufflemask(2, 6, 5, 3) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](p0) :: (store (<4 x s32>), align 32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C4]](s64) + ; CHECK-NEXT: G_STORE [[SHUF]](<4 x s32>), [[PTR_ADD]](p0) :: (store (<4 x s32>) into unknown-address + 16) + ; CHECK-NEXT: RET_ReallyLR %3:_(<4 x s32>) = COPY $q0 %4:_(<4 x s32>) = COPY $q1 %0:_(<8 x s32>) = G_CONCAT_VECTORS %3(<4 x s32>), %4(<4 x s32>) @@ -229,42 +237,43 @@ body: | ; CHECK-LABEL: name: oversize_shuffle_v6i64 ; CHECK: liveins: $d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7, $x0 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $d1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $d2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $d3 - ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY $d4 - ; CHECK: [[COPY5:%[0-9]+]]:_(s64) = COPY $d5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY3]](s64) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY4]](s64), [[COPY5]](s64) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64) - ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64) - ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $d6 - ; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $d7 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.2, align 16) - ; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 - ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s64) from %fixed-stack.3) - ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY6]](s64), [[COPY7]](s64) - ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) - ; CHECK: [[COPY8:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s64>), [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR1]](<2 x s64>), [[C1]](s64) - ; CHECK: [[EVEC2:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR4]](<2 x s64>), [[C]](s64) - ; CHECK: [[EVEC3:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<2 x s64>), [[C1]](s64) - ; CHECK: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC]](s64), [[EVEC1]](s64) - ; CHECK: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC2]](s64), [[EVEC3]](s64) - ; CHECK: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3) - ; CHECK: G_STORE [[BUILD_VECTOR6]](<2 x s64>), [[COPY8]](p0) :: (store (<2 x s64>), align 64) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C2]](s64) - ; CHECK: G_STORE [[BUILD_VECTOR7]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16) - ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C3]](s64) - ; CHECK: G_STORE [[SHUF]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into unknown-address + 32, align 32) - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $d1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $d2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $d3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $d4 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY $d5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY3]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY4]](s64), [[COPY5]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY $d6 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s64) = COPY $d7 + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.2, align 16) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s64) from %fixed-stack.3) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY6]](s64), [[COPY7]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s64>), [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR1]](<2 x s64>), [[C1]](s64) + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR4]](<2 x s64>), [[C]](s64) + ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<2 x s64>), [[C1]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC]](s64), [[EVEC1]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC2]](s64), [[EVEC3]](s64) + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR6]](<2 x s64>), [[COPY8]](p0) :: (store (<2 x s64>), align 64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C2]](s64) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR7]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C3]](s64) + ; CHECK-NEXT: G_STORE [[SHUF]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into unknown-address + 32, align 32) + ; CHECK-NEXT: RET_ReallyLR %3:_(s64) = COPY $d0 %4:_(s64) = COPY $d1 %5:_(s64) = COPY $d2 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir index b1ab6d4cee5e..6c5c2b783a49 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir @@ -10,16 +10,17 @@ body: | liveins: $w0, $w1 ; CHECK-LABEL: name: s32 ; CHECK: liveins: $w0, $w1 - ; CHECK: %x:_(s32) = COPY $w0 - ; CHECK: %y:_(s32) = COPY $w1 - ; CHECK: [[SSUBO:%[0-9]+]]:_(s32), [[SSUBO1:%[0-9]+]]:_(s1) = G_SSUBO %x, %y - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 - ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SSUBO]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK: %ssubsat:_(s32) = G_SELECT [[SSUBO1]](s1), [[ADD]], [[SSUBO]] - ; CHECK: $w0 = COPY %ssubsat(s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s32) = COPY $w0 + ; CHECK-NEXT: %y:_(s32) = COPY $w1 + ; CHECK-NEXT: [[SSUBO:%[0-9]+]]:_(s32), [[SSUBO1:%[0-9]+]]:_(s1) = G_SSUBO %x, %y + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 31 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SSUBO]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: %ssubsat:_(s32) = G_SELECT [[SSUBO1]](s1), [[ADD]], [[SSUBO]] + ; CHECK-NEXT: $w0 = COPY %ssubsat(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %x:_(s32) = COPY $w0 %y:_(s32) = COPY $w1 %ssubsat:_(s32) = G_SSUBSAT %x, %y @@ -35,16 +36,17 @@ body: | liveins: $x0, $x1 ; CHECK-LABEL: name: s64 ; CHECK: liveins: $x0, $x1 - ; CHECK: %x:_(s64) = COPY $x0 - ; CHECK: %y:_(s64) = COPY $x1 - ; CHECK: [[SSUBO:%[0-9]+]]:_(s64), [[SSUBO1:%[0-9]+]]:_(s1) = G_SSUBO %x, %y - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SSUBO]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] - ; CHECK: %ssubsat:_(s64) = G_SELECT [[SSUBO1]](s1), [[ADD]], [[SSUBO]] - ; CHECK: $x0 = COPY %ssubsat(s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %x:_(s64) = COPY $x0 + ; CHECK-NEXT: %y:_(s64) = COPY $x1 + ; CHECK-NEXT: [[SSUBO:%[0-9]+]]:_(s64), [[SSUBO1:%[0-9]+]]:_(s1) = G_SSUBO %x, %y + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SSUBO]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: %ssubsat:_(s64) = G_SELECT [[SSUBO1]](s1), [[ADD]], [[SSUBO]] + ; CHECK-NEXT: $x0 = COPY %ssubsat(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %x:_(s64) = COPY $x0 %y:_(s64) = COPY $x1 %ssubsat:_(s64) = G_SSUBSAT %x, %y @@ -61,22 +63,23 @@ body: | ; CHECK-LABEL: name: s16 ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: %copy_1:_(s32) = COPY $w0 - ; CHECK: %copy_2:_(s32) = COPY $w1 - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 16 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 16 - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SEXT_INREG]], [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 - ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s32), [[SEXT_INREG2]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 - ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD]], [[SUB]] - ; CHECK: $w0 = COPY [[SELECT]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy_1:_(s32) = COPY $w0 + ; CHECK-NEXT: %copy_2:_(s32) = COPY $w1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 16 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s32), [[SEXT_INREG2]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -32768 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD]], [[SUB]] + ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 %x:_(s16) = G_TRUNC %copy_1(s32) %copy_2:_(s32) = COPY $w1 @@ -96,22 +99,23 @@ body: | ; CHECK-LABEL: name: s1 ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: %copy_1:_(s32) = COPY $w0 - ; CHECK: %copy_2:_(s32) = COPY $w1 - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 1 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 1 - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SEXT_INREG]], [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 1 - ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s32), [[SEXT_INREG2]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 1 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD]], [[SUB]] - ; CHECK: $w0 = COPY [[SELECT]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy_1:_(s32) = COPY $w0 + ; CHECK-NEXT: %copy_2:_(s32) = COPY $w1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 1 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 1 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 1 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s32), [[SEXT_INREG2]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD]], [[SUB]] + ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 %x:_(s1) = G_TRUNC %copy_1(s32) %copy_2:_(s32) = COPY $w1 @@ -131,22 +135,23 @@ body: | ; CHECK-LABEL: name: s3 ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: %copy_1:_(s32) = COPY $w0 - ; CHECK: %copy_2:_(s32) = COPY $w1 - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 3 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 3 - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SEXT_INREG]], [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 3 - ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s32), [[SEXT_INREG2]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 3 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD]], [[SUB]] - ; CHECK: $w0 = COPY [[SELECT]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy_1:_(s32) = COPY $w0 + ; CHECK-NEXT: %copy_2:_(s32) = COPY $w1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_1, 3 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG %copy_2, 3 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 3 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s32), [[SEXT_INREG2]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 3 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[ADD]], [[SUB]] + ; CHECK-NEXT: $w0 = COPY [[SELECT]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy_1:_(s32) = COPY $w0 %x:_(s3) = G_TRUNC %copy_1(s32) %copy_2:_(s32) = COPY $w1 @@ -166,23 +171,24 @@ body: | ; CHECK-LABEL: name: s36 ; CHECK: liveins: $x0, $x1 - ; CHECK: %copy_1:_(s64) = COPY $x0 - ; CHECK: %copy_2:_(s64) = COPY $x1 - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG %copy_1, 36 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG %copy_2, 36 - ; CHECK: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[SEXT_INREG]], [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB]], 36 - ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s64), [[SEXT_INREG2]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 35 - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB]], 36 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) - ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG3]], [[COPY]](s64) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738368 - ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] - ; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC]](s1), [[ADD]], [[SUB]] - ; CHECK: $x0 = COPY [[SELECT]](s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy_1:_(s64) = COPY $x0 + ; CHECK-NEXT: %copy_2:_(s64) = COPY $x1 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG %copy_1, 36 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG %copy_2, 36 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB]], 36 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[SUB]](s64), [[SEXT_INREG2]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 35 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s64) = G_SEXT_INREG [[SUB]], 36 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[C]](s64) + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG3]], [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 34359738368 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ASHR]], [[C1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC]](s1), [[ADD]], [[SUB]] + ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy_1:_(s64) = COPY $x0 %x:_(s36) = G_TRUNC %copy_1(s64) %copy_2:_(s64) = COPY $x1 @@ -201,47 +207,48 @@ body: | liveins: $q0, $q1, $x0 ; CHECK-LABEL: name: s88 ; CHECK: liveins: $q0, $q1, $x0 - ; CHECK: %copy_1:_(s128) = COPY $q0 - ; CHECK: %copy_2:_(s128) = COPY $q1 - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT %copy_1(s128), 0 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %copy_1(s128) - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT %copy_2(s128), 0 - ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %copy_2(s128) - ; CHECK: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[EXTRACT]], [[EXTRACT1]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 24 - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 24 - ; CHECK: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[SEXT_INREG]], [[SEXT_INREG1]], [[USUBO1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[USUBE]], 24 - ; CHECK: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[USUBE]](s32), [[SEXT_INREG2]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) - ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBO]](s64) - ; CHECK: [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBE]](s32) - ; CHECK: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) - ; CHECK: [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) - ; CHECK: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8) - ; CHECK: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8) - ; CHECK: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[DEF]](s8) - ; CHECK: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV20]](s8) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 - ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV2]](s32), [[MV3]](s32) - ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV5]], [[C1]](s64) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV4]], [[C2]](s64) - ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]] - ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[OR]], [[C1]](s64) - ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 - ; CHECK: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C3]](s64) - ; CHECK: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[ASHR1]], [[C]] - ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) - ; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC2]](s1), [[UADDO]], [[MV6]] - ; CHECK: $x0 = COPY [[SELECT]](s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %copy_1:_(s128) = COPY $q0 + ; CHECK-NEXT: %copy_2:_(s128) = COPY $q1 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT %copy_1(s128), 0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %copy_1(s128) + ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT %copy_2(s128), 0 + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %copy_2(s128) + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[EXTRACT]], [[EXTRACT1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV1]](s64) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC]], 24 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[TRUNC1]], 24 + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[SEXT_INREG]], [[SEXT_INREG1]], [[USUBO1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[USUBE]], 24 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ne), [[USUBE]](s32), [[SEXT_INREG2]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s1) = G_TRUNC [[ICMP]](s32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBO]](s64) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBE]](s32) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) + ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8) + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8) + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[DEF]](s8) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV20]](s8) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 + ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) + ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV2]](s32), [[MV3]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[MV5]], [[C1]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[MV4]], [[C2]](s64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR]] + ; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[OR]], [[C1]](s64) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 23 + ; CHECK-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[ASHR]], [[C3]](s64) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[ASHR1]], [[C]] + ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[TRUNC2]](s1), [[UADDO]], [[MV6]] + ; CHECK-NEXT: $x0 = COPY [[SELECT]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %copy_1:_(s128) = COPY $q0 %x:_(s88) = G_TRUNC %copy_1(s128) %copy_2:_(s128) = COPY $q1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir index 2a7db868d005..507961fcffe8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck %s +# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck %s --- name: test_unmerge_values_s1_trunc_v2s1_of_build_vector_v2s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll index 3dfd3ab5a15a..147d60fdac64 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll @@ -1,19 +1,20 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -global-isel-abort=0 -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s +; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s ; FIXME: Also test with a pre-gfx8 target. define i1 @i1_func_void() #0 { ; CHECK-LABEL: name: i1_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s1) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i1, i1 addrspace(1)* undef ret i1 %val } @@ -21,14 +22,15 @@ define i1 @i1_func_void() #0 { define zeroext i1 @i1_zeroext_func_void() #0 { ; CHECK-LABEL: name: i1_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i1, i1 addrspace(1)* undef ret i1 %val } @@ -36,14 +38,15 @@ define zeroext i1 @i1_zeroext_func_void() #0 { define signext i1 @i1_signext_func_void() #0 { ; CHECK-LABEL: name: i1_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i1, i1 addrspace(1)* undef ret i1 %val } @@ -51,14 +54,15 @@ define signext i1 @i1_signext_func_void() #0 { define i7 @i7_func_void() #0 { ; CHECK-LABEL: name: i7_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s7) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s7) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i7, i7 addrspace(1)* undef ret i7 %val } @@ -66,14 +70,15 @@ define i7 @i7_func_void() #0 { define zeroext i7 @i7_zeroext_func_void() #0 { ; CHECK-LABEL: name: i7_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s7) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s7) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i7, i7 addrspace(1)* undef ret i7 %val } @@ -81,14 +86,15 @@ define zeroext i7 @i7_zeroext_func_void() #0 { define signext i7 @i7_signext_func_void() #0 { ; CHECK-LABEL: name: i7_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s7) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s7) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i7, i7 addrspace(1)* undef ret i7 %val } @@ -96,14 +102,15 @@ define signext i7 @i7_signext_func_void() #0 { define i8 @i8_func_void() #0 { ; CHECK-LABEL: name: i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val } @@ -111,14 +118,15 @@ define i8 @i8_func_void() #0 { define zeroext i8 @i8_zeroext_func_void() #0 { ; CHECK-LABEL: name: i8_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val } @@ -126,14 +134,15 @@ define zeroext i8 @i8_zeroext_func_void() #0 { define signext i8 @i8_signext_func_void() #0 { ; CHECK-LABEL: name: i8_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i8, i8 addrspace(1)* undef ret i8 %val } @@ -141,14 +150,15 @@ define signext i8 @i8_signext_func_void() #0 { define i16 @i16_func_void() #0 { ; CHECK-LABEL: name: i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i16, i16 addrspace(1)* undef ret i16 %val } @@ -156,14 +166,15 @@ define i16 @i16_func_void() #0 { define zeroext i16 @i16_zeroext_func_void() #0 { ; CHECK-LABEL: name: i16_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i16, i16 addrspace(1)* undef ret i16 %val } @@ -171,14 +182,15 @@ define zeroext i16 @i16_zeroext_func_void() #0 { define signext i16 @i16_signext_func_void() #0 { ; CHECK-LABEL: name: i16_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i16, i16 addrspace(1)* undef ret i16 %val } @@ -186,14 +198,15 @@ define signext i16 @i16_signext_func_void() #0 { define half @f16_func_void() #0 { ; CHECK-LABEL: name: f16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `half addrspace(1)* undef`, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `half addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load half, half addrspace(1)* undef ret half %val } @@ -201,14 +214,15 @@ define half @f16_func_void() #0 { define i24 @i24_func_void() #0 { ; CHECK-LABEL: name: i24_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s24) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i24, i24 addrspace(1)* undef ret i24 %val } @@ -216,14 +230,15 @@ define i24 @i24_func_void() #0 { define zeroext i24 @i24_zeroext_func_void() #0 { ; CHECK-LABEL: name: i24_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s24) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i24, i24 addrspace(1)* undef ret i24 %val } @@ -231,14 +246,15 @@ define zeroext i24 @i24_zeroext_func_void() #0 { define signext i24 @i24_signext_func_void() #0 { ; CHECK-LABEL: name: i24_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s24) - ; CHECK: $vgpr0 = COPY [[SEXT]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[SEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i24, i24 addrspace(1)* undef ret i24 %val } @@ -246,17 +262,18 @@ define signext i24 @i24_signext_func_void() #0 { define <2 x i24> @v2i24_func_void() #0 { ; CHECK-LABEL: name: v2i24_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s24>) = G_LOAD [[DEF]](p1) :: (load (<2 x s24>) from `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<2 x s24>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s24>) = G_LOAD [[DEF]](p1) :: (load (<2 x s24>) from `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<2 x s24>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load <2 x i24>, <2 x i24> addrspace(1)* undef ret <2 x i24> %val } @@ -264,19 +281,20 @@ define <2 x i24> @v2i24_func_void() #0 { define <3 x i24> @v3i24_func_void() #0 { ; CHECK-LABEL: name: v3i24_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s24>) = G_LOAD [[DEF]](p1) :: (load (<3 x s24>) from `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24), [[UV2:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<3 x s24>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s24) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: $vgpr1 = COPY [[ANYEXT1]](s32) - ; CHECK: $vgpr2 = COPY [[ANYEXT2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s24>) = G_LOAD [[DEF]](p1) :: (load (<3 x s24>) from `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24), [[UV2:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<3 x s24>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s24) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i24>, <3 x i24> addrspace(1)* undef ret <3 x i24> %val } @@ -284,13 +302,14 @@ define <3 x i24> @v3i24_func_void() #0 { define i32 @i32_func_void() #0 { ; CHECK-LABEL: name: i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load i32, i32 addrspace(1)* undef ret i32 %val } @@ -298,16 +317,17 @@ define i32 @i32_func_void() #0 { define i48 @i48_func_void() #0 { ; CHECK-LABEL: name: i48_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load i48, i48 addrspace(1)* undef, align 8 ret i48 %val } @@ -315,16 +335,17 @@ define i48 @i48_func_void() #0 { define signext i48 @i48_signext_func_void() #0 { ; CHECK-LABEL: name: i48_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load i48, i48 addrspace(1)* undef, align 8 ret i48 %val } @@ -332,16 +353,17 @@ define signext i48 @i48_signext_func_void() #0 { define zeroext i48 @i48_zeroext_func_void() #0 { ; CHECK-LABEL: name: i48_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load i48, i48 addrspace(1)* undef, align 8 ret i48 %val } @@ -349,15 +371,16 @@ define zeroext i48 @i48_zeroext_func_void() #0 { define i64 @i64_func_void() #0 { ; CHECK-LABEL: name: i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `i64 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `i64 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load i64, i64 addrspace(1)* undef ret i64 %val } @@ -365,17 +388,18 @@ define i64 @i64_func_void() #0 { define i65 @i65_func_void() #0 { ; CHECK-LABEL: name: i65_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s96) = G_ANYEXT [[LOAD]](s65) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s96) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s96) = G_ANYEXT [[LOAD]](s65) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s96) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, i65 addrspace(1)* undef ret i65 %val } @@ -383,17 +407,18 @@ define i65 @i65_func_void() #0 { define signext i65 @i65_signext_func_void() #0 { ; CHECK-LABEL: name: i65_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[LOAD]](s65) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s96) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[LOAD]](s65) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s96) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, i65 addrspace(1)* undef ret i65 %val } @@ -401,17 +426,18 @@ define signext i65 @i65_signext_func_void() #0 { define zeroext i65 @i65_zeroext_func_void() #0 { ; CHECK-LABEL: name: i65_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[LOAD]](s65) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s96) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[LOAD]](s65) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s96) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load i65, i65 addrspace(1)* undef ret i65 %val } @@ -419,13 +445,14 @@ define zeroext i65 @i65_zeroext_func_void() #0 { define float @f32_func_void() #0 { ; CHECK-LABEL: name: f32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `float addrspace(1)* undef`, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `float addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load float, float addrspace(1)* undef ret float %val } @@ -433,15 +460,16 @@ define float @f32_func_void() #0 { define double @f64_func_void() #0 { ; CHECK-LABEL: name: f64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `double addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `double addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load double, double addrspace(1)* undef ret double %val } @@ -449,17 +477,18 @@ define double @f64_func_void() #0 { define <2 x double> @v2f64_func_void() #0 { ; CHECK-LABEL: name: v2f64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x double> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x double> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <2 x double>, <2 x double> addrspace(1)* undef ret <2 x double> %val } @@ -467,15 +496,16 @@ define <2 x double> @v2f64_func_void() #0 { define <2 x i32> @v2i32_func_void() #0 { ; CHECK-LABEL: name: v2i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `<2 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `<2 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load <2 x i32>, <2 x i32> addrspace(1)* undef ret <2 x i32> %val } @@ -483,16 +513,17 @@ define <2 x i32> @v2i32_func_void() #0 { define <3 x i32> @v3i32_func_void() #0 { ; CHECK-LABEL: name: v3i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[DEF]](p1) :: (load (<3 x s32>) from `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<3 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[DEF]](p1) :: (load (<3 x s32>) from `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<3 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i32>, <3 x i32> addrspace(1)* undef ret <3 x i32> %val } @@ -500,17 +531,18 @@ define <3 x i32> @v3i32_func_void() #0 { define <4 x i32> @v4i32_func_void() #0 { ; CHECK-LABEL: name: v4i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `<4 x i32> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `<4 x i32> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <4 x i32>, <4 x i32> addrspace(1)* undef ret <4 x i32> %val } @@ -518,18 +550,19 @@ define <4 x i32> @v4i32_func_void() #0 { define <5 x i32> @v5i32_func_void() #0 { ; CHECK-LABEL: name: v5i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<5 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<5 x s32>) from `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<5 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<5 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<5 x s32>) from `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<5 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef ret <5 x i32> %val } @@ -537,22 +570,23 @@ define <5 x i32> @v5i32_func_void() #0 { define <8 x i32> @v8i32_func_void() #0 { ; CHECK-LABEL: name: v8i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr ret <8 x i32> %val @@ -561,30 +595,31 @@ define <8 x i32> @v8i32_func_void() #0 { define <16 x i32> @v16i32_func_void() #0 { ; CHECK-LABEL: name: v16i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr ret <16 x i32> %val @@ -593,46 +628,47 @@ define <16 x i32> @v16i32_func_void() #0 { define <32 x i32> @v32i32_func_void() #0 { ; CHECK-LABEL: name: v32i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr ret <32 x i32> %val @@ -641,17 +677,18 @@ define <32 x i32> @v32i32_func_void() #0 { define <2 x i64> @v2i64_func_void() #0 { ; CHECK-LABEL: name: v2i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = load <2 x i64>, <2 x i64> addrspace(1)* undef ret <2 x i64> %val } @@ -659,20 +696,21 @@ define <2 x i64> @v2i64_func_void() #0 { define <3 x i64> @v3i64_func_void() #0 { ; CHECK-LABEL: name: v3i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<3 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<3 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s64>) from %ir.ptr, align 32, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<3 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s64>) from %ir.ptr, align 32, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr ret <3 x i64> %val @@ -681,22 +719,23 @@ define <3 x i64> @v3i64_func_void() #0 { define <4 x i64> @v4i64_func_void() #0 { ; CHECK-LABEL: name: v4i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s64>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s64>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr ret <4 x i64> %val @@ -705,24 +744,25 @@ define <4 x i64> @v4i64_func_void() #0 { define <5 x i64> @v5i64_func_void() #0 { ; CHECK-LABEL: name: v5i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<5 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s64>) from %ir.ptr, align 64, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<5 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s64>) from %ir.ptr, align 64, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<5 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9 %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr ret <5 x i64> %val @@ -731,30 +771,31 @@ define <5 x i64> @v5i64_func_void() #0 { define <8 x i64> @v8i64_func_void() #0 { ; CHECK-LABEL: name: v8i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s64>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s64>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr ret <8 x i64> %val @@ -763,46 +804,47 @@ define <8 x i64> @v8i64_func_void() #0 { define <16 x i64> @v16i64_func_void() #0 { ; CHECK-LABEL: name: v16i64_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s64>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s64>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s64>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s64>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr ret <16 x i64> %val @@ -811,13 +853,14 @@ define <16 x i64> @v16i64_func_void() #0 { define <2 x i16> @v2i16_func_void() #0 { ; CHECK-LABEL: name: v2i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load <2 x i16>, <2 x i16> addrspace(1)* undef ret <2 x i16> %val } @@ -825,13 +868,14 @@ define <2 x i16> @v2i16_func_void() #0 { define <2 x half> @v2f16_func_void() #0 { ; CHECK-LABEL: name: v2f16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: $vgpr0 = COPY [[LOAD]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: $vgpr0 = COPY [[LOAD]](<2 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0 %val = load <2 x half>, <2 x half> addrspace(1)* undef ret <2 x half> %val } @@ -839,17 +883,18 @@ define <2 x half> @v2f16_func_void() #0 { define <3 x i16> @v3i16_func_void() #0 { ; CHECK-LABEL: name: v3i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef ret <3 x i16> %val } @@ -857,15 +902,16 @@ define <3 x i16> @v3i16_func_void() #0 { define <4 x i16> @v4i16_func_void() #0 { ; CHECK-LABEL: name: v4i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x i16> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x i16> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load <4 x i16>, <4 x i16> addrspace(1)* undef ret <4 x i16> %val } @@ -873,15 +919,16 @@ define <4 x i16> @v4i16_func_void() #0 { define <4 x half> @v4f16_func_void() #0 { ; CHECK-LABEL: name: v4f16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x half> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x half> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load <4 x half>, <4 x half> addrspace(1)* undef ret <4 x half> %val } @@ -889,19 +936,20 @@ define <4 x half> @v4f16_func_void() #0 { define <5 x i16> @v5i16_func_void() #0 { ; CHECK-LABEL: name: v5i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s16>) from %ir.ptr, align 16, addrspace 1) - ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD1]](<5 x s16>), [[DEF1]](<5 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s16>) from %ir.ptr, align 16, addrspace 1) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD1]](<5 x s16>), [[DEF1]](<5 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr ret <5 x i16> %val @@ -910,18 +958,19 @@ define <5 x i16> @v5i16_func_void() #0 { define <8 x i16> @v8i16_func_void() #0 { ; CHECK-LABEL: name: v8i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s16>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<8 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<8 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s16>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<8 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr ret <8 x i16> %val @@ -930,22 +979,23 @@ define <8 x i16> @v8i16_func_void() #0 { define <16 x i16> @v16i16_func_void() #0 { ; CHECK-LABEL: name: v16i16_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s16>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s16>) - ; CHECK: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK: $vgpr4 = COPY [[UV4]](<2 x s16>) - ; CHECK: $vgpr5 = COPY [[UV5]](<2 x s16>) - ; CHECK: $vgpr6 = COPY [[UV6]](<2 x s16>) - ; CHECK: $vgpr7 = COPY [[UV7]](<2 x s16>) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s16>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](<2 x s16>) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](<2 x s16>) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](<2 x s16>) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr ret <16 x i16> %val @@ -954,62 +1004,63 @@ define <16 x i16> @v16i16_func_void() #0 { define <16 x i8> @v16i8_func_void() #0 { ; CHECK-LABEL: name: v16i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s8>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) - ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) - ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) - ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) - ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) - ; CHECK: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT16]](s32) - ; CHECK: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK: $vgpr1 = COPY [[ANYEXT17]](s32) - ; CHECK: [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK: $vgpr2 = COPY [[ANYEXT18]](s32) - ; CHECK: [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK: $vgpr3 = COPY [[ANYEXT19]](s32) - ; CHECK: [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16) - ; CHECK: $vgpr4 = COPY [[ANYEXT20]](s32) - ; CHECK: [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16) - ; CHECK: $vgpr5 = COPY [[ANYEXT21]](s32) - ; CHECK: [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16) - ; CHECK: $vgpr6 = COPY [[ANYEXT22]](s32) - ; CHECK: [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16) - ; CHECK: $vgpr7 = COPY [[ANYEXT23]](s32) - ; CHECK: [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT8]](s16) - ; CHECK: $vgpr8 = COPY [[ANYEXT24]](s32) - ; CHECK: [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT9]](s16) - ; CHECK: $vgpr9 = COPY [[ANYEXT25]](s32) - ; CHECK: [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT10]](s16) - ; CHECK: $vgpr10 = COPY [[ANYEXT26]](s32) - ; CHECK: [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT11]](s16) - ; CHECK: $vgpr11 = COPY [[ANYEXT27]](s32) - ; CHECK: [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT12]](s16) - ; CHECK: $vgpr12 = COPY [[ANYEXT28]](s32) - ; CHECK: [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT13]](s16) - ; CHECK: $vgpr13 = COPY [[ANYEXT29]](s32) - ; CHECK: [[ANYEXT30:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT14]](s16) - ; CHECK: $vgpr14 = COPY [[ANYEXT30]](s32) - ; CHECK: [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16) - ; CHECK: $vgpr15 = COPY [[ANYEXT31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s8>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s16) = G_ANYEXT [[UV8]](s8) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s16) = G_ANYEXT [[UV9]](s8) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s16) = G_ANYEXT [[UV10]](s8) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s16) = G_ANYEXT [[UV11]](s8) + ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(s16) = G_ANYEXT [[UV12]](s8) + ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(s16) = G_ANYEXT [[UV13]](s8) + ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s16) = G_ANYEXT [[UV14]](s8) + ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s16) = G_ANYEXT [[UV15]](s8) + ; CHECK-NEXT: [[ANYEXT16:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT16]](s32) + ; CHECK-NEXT: [[ANYEXT17:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT17]](s32) + ; CHECK-NEXT: [[ANYEXT18:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT18]](s32) + ; CHECK-NEXT: [[ANYEXT19:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) + ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT19]](s32) + ; CHECK-NEXT: [[ANYEXT20:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT4]](s16) + ; CHECK-NEXT: $vgpr4 = COPY [[ANYEXT20]](s32) + ; CHECK-NEXT: [[ANYEXT21:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT5]](s16) + ; CHECK-NEXT: $vgpr5 = COPY [[ANYEXT21]](s32) + ; CHECK-NEXT: [[ANYEXT22:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT6]](s16) + ; CHECK-NEXT: $vgpr6 = COPY [[ANYEXT22]](s32) + ; CHECK-NEXT: [[ANYEXT23:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT7]](s16) + ; CHECK-NEXT: $vgpr7 = COPY [[ANYEXT23]](s32) + ; CHECK-NEXT: [[ANYEXT24:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT8]](s16) + ; CHECK-NEXT: $vgpr8 = COPY [[ANYEXT24]](s32) + ; CHECK-NEXT: [[ANYEXT25:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT9]](s16) + ; CHECK-NEXT: $vgpr9 = COPY [[ANYEXT25]](s32) + ; CHECK-NEXT: [[ANYEXT26:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT10]](s16) + ; CHECK-NEXT: $vgpr10 = COPY [[ANYEXT26]](s32) + ; CHECK-NEXT: [[ANYEXT27:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT11]](s16) + ; CHECK-NEXT: $vgpr11 = COPY [[ANYEXT27]](s32) + ; CHECK-NEXT: [[ANYEXT28:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT12]](s16) + ; CHECK-NEXT: $vgpr12 = COPY [[ANYEXT28]](s32) + ; CHECK-NEXT: [[ANYEXT29:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT13]](s16) + ; CHECK-NEXT: $vgpr13 = COPY [[ANYEXT29]](s32) + ; CHECK-NEXT: [[ANYEXT30:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT14]](s16) + ; CHECK-NEXT: $vgpr14 = COPY [[ANYEXT30]](s32) + ; CHECK-NEXT: [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16) + ; CHECK-NEXT: $vgpr15 = COPY [[ANYEXT31]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr ret <16 x i8> %val @@ -1018,19 +1069,20 @@ define <16 x i8> @v16i8_func_void() #0 { define <2 x i8> @v2i8_func_void() #0 { ; CHECK-LABEL: name: v2i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[DEF]](p1) :: (load (<2 x s8>) from `<2 x i8> addrspace(1)* undef`, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<2 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT2]](s32) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK: $vgpr1 = COPY [[ANYEXT3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[DEF]](p1) :: (load (<2 x s8>) from `<2 x i8> addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<2 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT2]](s32) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT3]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load <2 x i8>, <2 x i8> addrspace(1)* undef ret <2 x i8> %val } @@ -1038,22 +1090,23 @@ define <2 x i8> @v2i8_func_void() #0 { define <3 x i8> @v3i8_func_void() #0 { ; CHECK-LABEL: name: v3i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[DEF]](p1) :: (load (<3 x s8>) from `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<3 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT3]](s32) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK: $vgpr1 = COPY [[ANYEXT4]](s32) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK: $vgpr2 = COPY [[ANYEXT5]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[DEF]](p1) :: (load (<3 x s8>) from `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<3 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT3]](s32) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT4]](s32) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT5]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = load <3 x i8>, <3 x i8> addrspace(1)* undef ret <3 x i8> %val } @@ -1061,26 +1114,27 @@ define <3 x i8> @v3i8_func_void() #0 { define <4 x i8> @v4i8_func_void() #0 { ; CHECK-LABEL: name: v4i8_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s8>) from %ir.ptr, addrspace 1) - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT4]](s32) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) - ; CHECK: $vgpr1 = COPY [[ANYEXT5]](s32) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) - ; CHECK: $vgpr2 = COPY [[ANYEXT6]](s32) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) - ; CHECK: $vgpr3 = COPY [[ANYEXT7]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s8>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT4]](s32) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16) + ; CHECK-NEXT: $vgpr1 = COPY [[ANYEXT5]](s32) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16) + ; CHECK-NEXT: $vgpr2 = COPY [[ANYEXT6]](s32) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16) + ; CHECK-NEXT: $vgpr3 = COPY [[ANYEXT7]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef %val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr ret <4 x i8> %val @@ -1089,18 +1143,19 @@ define <4 x i8> @v4i8_func_void() #0 { define {i8, i32} @struct_i8_i32_func_void() #0 { ; CHECK-LABEL: name: struct_i8_i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) - ; CHECK: $vgpr1 = COPY [[LOAD1]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[LOAD1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef ret { i8, i32 } %val } @@ -1108,19 +1163,20 @@ define {i8, i32} @struct_i8_i32_func_void() #0 { define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }) %arg0) #0 { ; CHECK-LABEL: name: void_func_sret_struct_i8_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[COPY2:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p1) :: (volatile load (s32) from `i32 addrspace(1)* undef`, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; CHECK: G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store (s8) into %ir.gep01, addrspace 5) - ; CHECK: G_STORE [[LOAD1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY3]] + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p1) :: (volatile load (s32) from `i32 addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store (s8) into %ir.gep01, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] %val0 = load volatile i8, i8 addrspace(1)* undef %val1 = load volatile i32, i32 addrspace(1)* undef %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 @@ -1137,15 +1193,16 @@ define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i define <33 x i32> @v33i32_func_void() #0 { ; CHECK-LABEL: name: v33i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<33 x s32>) from %ir.ptr, align 256, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<33 x s32>) from %ir.ptr, align 256, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr ret <33 x i32> %val @@ -1154,22 +1211,23 @@ define <33 x i32> @v33i32_func_void() #0 { define <33 x i32> @v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx) #0 { ; CHECK-LABEL: name: v33i32_func_v33i32_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY3]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 - ; CHECK: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[MUL]](s64) - ; CHECK: [[COPY5:%[0-9]+]]:_(p1) = COPY [[PTR_ADD]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[COPY5]](p1) :: (load (<33 x s32>) from %ir.gep, align 256, addrspace 1) - ; CHECK: G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) - ; CHECK: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY6]] + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[MUL]](s64) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(p1) = COPY [[PTR_ADD]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[COPY5]](p1) :: (load (<33 x s32>) from %ir.gep, align 256, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY6]] %gep = getelementptr inbounds <33 x i32>, <33 x i32> addrspace(1)* %p, i32 %idx %val = load <33 x i32>, <33 x i32> addrspace(1)* %gep ret <33 x i32> %val @@ -1178,21 +1236,22 @@ define <33 x i32> @v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx) define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { ; CHECK-LABEL: name: struct_v32i32_i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr + 128, align 128, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK: G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p5) :: (store (s32), align 128, addrspace 5) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from %ir.ptr + 128, align 128, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p5) :: (store (s32), align 128, addrspace 5) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr ret { <32 x i32>, i32 }%val @@ -1201,21 +1260,22 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { ; CHECK-LABEL: name: struct_i32_v32i32_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ i32, <32 x i32> } addrspace(1)* addrspace(4)* undef`, addrspace 4) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p1) :: (load (s32) from %ir.ptr, align 128, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<32 x s32>) from %ir.ptr + 128, addrspace 1) - ; CHECK: G_STORE [[LOAD1]](s32), [[COPY]](p5) :: (store (s32), align 128, addrspace 5) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK: G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (<32 x s32>), addrspace 5) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `{ i32, <32 x i32> } addrspace(1)* addrspace(4)* undef`, addrspace 4) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p1) :: (load (s32) from %ir.ptr, align 128, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<32 x s32>) from %ir.ptr + 128, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[COPY]](p5) :: (store (s32), align 128, addrspace 5) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (<32 x s32>), addrspace 5) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr ret { i32, <32 x i32> }%val @@ -1225,28 +1285,29 @@ define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { ; CHECK-LABEL: name: v3i32_struct_func_void_wasted_reg ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) - ; CHECK: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) - ; CHECK: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[LOAD3]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) + ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) + ; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[LOAD3]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %load0 = load volatile i32, i32 addrspace(3)* undef %load1 = load volatile i32, i32 addrspace(3)* undef %load2 = load volatile i32, i32 addrspace(3)* undef @@ -1263,29 +1324,30 @@ define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { ; CHECK-LABEL: name: v3f32_struct_func_void_wasted_reg ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[COPY1:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) - ; CHECK: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) - ; CHECK: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[LOAD3]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY [[DEF]](p3) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32) + ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32) + ; CHECK-NEXT: [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IVEC2]](<3 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[LOAD3]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %load0 = load volatile float, float addrspace(3)* undef %load1 = load volatile float, float addrspace(3)* undef %load2 = load volatile float, float addrspace(3)* undef @@ -1302,22 +1364,23 @@ define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret(i8) %arg0) #0 { ; CHECK-LABEL: name: void_func_sret_max_known_zero_bits ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 - ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5) - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) - ; CHECK: G_STORE [[LSHR]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[LSHR1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: G_STORE [[LSHR2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 17 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 18 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p5) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32) + ; CHECK-NEXT: G_STORE [[LSHR]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[LSHR1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: G_STORE [[LSHR2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] %arg0.int = ptrtoint i8 addrspace(5)* %arg0 to i32 %lshr0 = lshr i32 %arg0.int, 16 @@ -1333,46 +1396,47 @@ define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret(i8) %arg0) define i1022 @i1022_func_void() #0 { ; CHECK-LABEL: name: i1022_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s1024) = G_ANYEXT [[LOAD]](s1022) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s1024) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s1024) = G_ANYEXT [[LOAD]](s1022) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s1024) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, i1022 addrspace(1)* undef ret i1022 %val } @@ -1380,46 +1444,47 @@ define i1022 @i1022_func_void() #0 { define signext i1022 @i1022_signext_func_void() #0 { ; CHECK-LABEL: name: i1022_signext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[LOAD]](s1022) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s1024) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[LOAD]](s1022) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s1024) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, i1022 addrspace(1)* undef ret i1022 %val } @@ -1427,46 +1492,47 @@ define signext i1022 @i1022_signext_func_void() #0 { define zeroext i1022 @i1022_zeroext_func_void() #0 { ; CHECK-LABEL: name: i1022_zeroext_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s1024) = G_ZEXT [[LOAD]](s1022) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s1024) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: $vgpr16 = COPY [[UV16]](s32) - ; CHECK: $vgpr17 = COPY [[UV17]](s32) - ; CHECK: $vgpr18 = COPY [[UV18]](s32) - ; CHECK: $vgpr19 = COPY [[UV19]](s32) - ; CHECK: $vgpr20 = COPY [[UV20]](s32) - ; CHECK: $vgpr21 = COPY [[UV21]](s32) - ; CHECK: $vgpr22 = COPY [[UV22]](s32) - ; CHECK: $vgpr23 = COPY [[UV23]](s32) - ; CHECK: $vgpr24 = COPY [[UV24]](s32) - ; CHECK: $vgpr25 = COPY [[UV25]](s32) - ; CHECK: $vgpr26 = COPY [[UV26]](s32) - ; CHECK: $vgpr27 = COPY [[UV27]](s32) - ; CHECK: $vgpr28 = COPY [[UV28]](s32) - ; CHECK: $vgpr29 = COPY [[UV29]](s32) - ; CHECK: $vgpr30 = COPY [[UV30]](s32) - ; CHECK: $vgpr31 = COPY [[UV31]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] - ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 + ; CHECK-NEXT: liveins: $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s1024) = G_ZEXT [[LOAD]](s1022) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s1024) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](s32) + ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](s32) + ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](s32) + ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](s32) + ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](s32) + ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](s32) + ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](s32) + ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](s32) + ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](s32) + ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](s32) + ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](s32) + ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](s32) + ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](s32) + ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](s32) + ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](s32) + ; CHECK-NEXT: $vgpr31 = COPY [[UV31]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31 %val = load i1022, i1022 addrspace(1)* undef ret i1022 %val } @@ -1476,32 +1542,33 @@ define zeroext i1022 @i1022_zeroext_func_void() #0 { define %struct.with.ptrs @ptr_in_struct_func_void() #0 { ; CHECK-LABEL: name: ptr_in_struct_func_void ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<32 x s32>) from `%struct.with.ptrs addrspace(1)* undef`, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p1) :: (volatile load (p3) from `%struct.with.ptrs addrspace(1)* undef` + 128, align 128, addrspace 1) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p1) :: (volatile load (p1) from `%struct.with.ptrs addrspace(1)* undef` + 136, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD2]](p1) :: (volatile load (<2 x p1>) from `%struct.with.ptrs addrspace(1)* undef` + 144, addrspace 1) - ; CHECK: G_STORE [[LOAD]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) - ; CHECK: G_STORE [[LOAD1]](p3), [[PTR_ADD3]](p5) :: (store (p3), align 128, addrspace 5) - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 - ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) - ; CHECK: G_STORE [[LOAD2]](p1), [[PTR_ADD4]](p5) :: (store (p1), addrspace 5) - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 - ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) - ; CHECK: G_STORE [[LOAD3]](<2 x p1>), [[PTR_ADD5]](p5) :: (store (<2 x p1>), addrspace 5) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]] + ; CHECK-NEXT: liveins: $vgpr0, $sgpr30_sgpr31 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<32 x s32>) from `%struct.with.ptrs addrspace(1)* undef`, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p1) :: (volatile load (p3) from `%struct.with.ptrs addrspace(1)* undef` + 128, align 128, addrspace 1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p1) :: (volatile load (p1) from `%struct.with.ptrs addrspace(1)* undef` + 136, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD2]](p1) :: (volatile load (<2 x p1>) from `%struct.with.ptrs addrspace(1)* undef` + 144, addrspace 1) + ; CHECK-NEXT: G_STORE [[LOAD]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32) + ; CHECK-NEXT: G_STORE [[LOAD1]](p3), [[PTR_ADD3]](p5) :: (store (p3), align 128, addrspace 5) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 + ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C4]](s32) + ; CHECK-NEXT: G_STORE [[LOAD2]](p1), [[PTR_ADD4]](p5) :: (store (p1), addrspace 5) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 + ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32) + ; CHECK-NEXT: G_STORE [[LOAD3]](<2 x p1>), [[PTR_ADD5]](p5) :: (store (<2 x p1>), addrspace 5) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] + ; CHECK-NEXT: S_SETPC_B64_return [[COPY2]] %val = load volatile %struct.with.ptrs, %struct.with.ptrs addrspace(1)* undef ret %struct.with.ptrs %val } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir index a8ea15ced70a..6d67456bf6f5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -9,9 +9,9 @@ body: | ; CHECK-LABEL: name: test_and_s32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_AND %0, %1 @@ -26,9 +26,9 @@ body: | ; CHECK-LABEL: name: test_and_s1 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] - ; CHECK: S_NOP 0, implicit [[AND]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: S_NOP 0, implicit [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_CONSTANT i32 0 @@ -46,22 +46,22 @@ body: | ; CHECK-LABEL: name: test_and_v2s1 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]] - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[UV6]] - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV5]](s32), [[UV7]] - ; CHECK: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP3]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s1) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]] + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[UV6]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV5]](s32), [[UV7]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 @@ -80,26 +80,26 @@ body: | ; CHECK-LABEL: name: test_and_v3s1 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] - ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV6]](s32), [[UV9]] - ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV7]](s32), [[UV10]] - ; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[UV11]] - ; CHECK: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP3]] - ; CHECK: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP4]] - ; CHECK: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP2]], [[ICMP5]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[AND2]](s1) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV6]](s32), [[UV9]] + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV7]](s32), [[UV10]] + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[UV11]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP]], [[ICMP3]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP4]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP2]], [[ICMP5]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[AND2]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 @@ -118,9 +118,9 @@ body: | ; CHECK-LABEL: name: test_and_s64 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[COPY1]] - ; CHECK: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_AND %0, %1 @@ -135,16 +135,16 @@ body: | ; CHECK-LABEL: name: test_and_s96 ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64 - ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY1]](s96), 0 - ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64 - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[EXTRACT]], [[EXTRACT2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT1]], [[EXTRACT3]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) - ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[AND1]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0 + ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64 + ; CHECK-NEXT: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY1]](s96), 0 + ; CHECK-NEXT: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[EXTRACT]], [[EXTRACT2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT1]], [[EXTRACT3]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](s64) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[AND1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5 %2:_(s96) = G_AND %0, %1 @@ -159,13 +159,13 @@ body: | ; CHECK-LABEL: name: test_and_128 ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](s128) - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[UV2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[UV3]] - ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[AND]](s64), [[AND1]](s64) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](s128) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[AND]](s64), [[AND1]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(s128) = G_AND %0, %1 @@ -180,9 +180,9 @@ body: | ; CHECK-LABEL: name: test_and_s7 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -200,9 +200,9 @@ body: | ; CHECK-LABEL: name: test_and_s8 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -220,12 +220,12 @@ body: | ; CHECK-LABEL: name: test_and_s16 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -243,9 +243,9 @@ body: | ; CHECK-LABEL: name: test_and_s24 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s24) = G_TRUNC %0 @@ -263,9 +263,9 @@ body: | ; CHECK-LABEL: name: test_and_s48 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[COPY1]] - ; CHECK: $vgpr0_vgpr1 = COPY [[AND]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s48) = G_TRUNC %0 @@ -283,9 +283,9 @@ body: | ; CHECK-LABEL: name: test_and_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[COPY1]] - ; CHECK: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_AND %0, %1 @@ -300,20 +300,20 @@ body: | ; CHECK-LABEL: name: test_and_v3s32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) - ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) - ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[DEF1]](<2 x s32>) - ; CHECK: [[UV6:%[0-9]+]]:_(<3 x s32>), [[UV7:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[UV6]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[DEF1]](<2 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<3 x s32>), [[UV7:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV6]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_AND %0, %1 @@ -328,13 +328,13 @@ body: | ; CHECK-LABEL: name: test_and_v4s32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[UV]], [[UV2]] - ; CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[UV1]], [[UV3]] - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<4 x s32>) = G_AND %0, %1 @@ -348,25 +348,25 @@ body: | ; CHECK-LABEL: name: test_and_v5s32 ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) - ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32) - ; CHECK: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) - ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) - ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) - ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR3]] - ; CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR4]] - ; CHECK: [[AND2:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR5]] - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[AND2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>) - ; CHECK: [[UV10:%[0-9]+]]:_(<5 x s32>), [[UV11:%[0-9]+]]:_(<5 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s32>) - ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[UV10]](<5 x s32>), 0 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32) + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR3]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR4]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR5]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[AND2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>) + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<5 x s32>), [[UV11:%[0-9]+]]:_(<5 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s32>) + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[UV10]](<5 x s32>), 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>) %0:_(<5 x s32>) = G_IMPLICIT_DEF %1:_(<5 x s32>) = G_IMPLICIT_DEF %2:_(<5 x s32>) = G_AND %0, %1 @@ -383,13 +383,13 @@ body: | ; CHECK-LABEL: name: test_and_v2s64 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[UV2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[UV3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[AND]](s64), [[AND1]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_AND %0, %1 @@ -404,9 +404,9 @@ body: | ; CHECK-LABEL: name: test_and_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[AND]](<2 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_AND %0, %1 @@ -420,41 +420,41 @@ body: | liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-LABEL: name: test_and_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; CHECK: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT]], [[INSERT1]] - ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL1]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL2]] - ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 + ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT]], [[INSERT1]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -474,9 +474,9 @@ body: | ; CHECK-LABEL: name: test_and_v4s16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[COPY]], [[COPY1]] - ; CHECK: $vgpr0_vgpr1 = COPY [[AND]](<4 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[AND]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_AND %0, %1 @@ -490,54 +490,54 @@ body: | ; CHECK-LABEL: name: test_and_v5s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CHECK: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) - ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0 - ; CHECK: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) - ; CHECK: [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) - ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0 - ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV14]](<3 x s16>), 0 - ; CHECK: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT2]], [[INSERT3]] - ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV13]](<3 x s16>), 0 - ; CHECK: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV15]](<3 x s16>), 0 - ; CHECK: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT4]], [[INSERT5]] - ; CHECK: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>) - ; CHECK: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND1]](<4 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL1]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL2]] - ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK: [[CONCAT_VECTORS3:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK: [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>), [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<30 x s16>) - ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV20]](<5 x s16>), 0 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 + ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0 + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) + ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0 + ; CHECK-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV14]](<3 x s16>), 0 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT2]], [[INSERT3]] + ; CHECK-NEXT: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV13]](<3 x s16>), 0 + ; CHECK-NEXT: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV15]](<3 x s16>), 0 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT4]], [[INSERT5]] + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>) + ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>), [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<30 x s16>) + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV20]](<5 x s16>), 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF %2:_(<5 x s16>) = G_AND %0, %1 @@ -553,33 +553,33 @@ body: | ; CHECK-LABEL: name: test_and_v3s8 ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF1]](<3 x s8>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) - ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[AND]](s32) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[ANYEXT3]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[AND1]](s32) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[ANYEXT5]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[AND2]](s32) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[ANYEXT7]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[AND3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF2]](<4 x s8>), [[DEF2]](<4 x s8>) - ; CHECK: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0 + ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF1]](<3 x s8>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[AND]](s32) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[ANYEXT3]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[AND1]](s32) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[ANYEXT5]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[AND2]](s32) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[ANYEXT7]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[AND3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF2]](<4 x s8>), [[DEF2]](<4 x s8>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s8>) = G_IMPLICIT_DEF %2:_(<3 x s8>) = G_AND %0, %1 @@ -594,15 +594,15 @@ body: | ; CHECK-LABEL: name: test_and_v4s8 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV4]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV5]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV6]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV7]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV4]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV5]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV6]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV7]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(<4 x s8>) = G_IMPLICIT_DEF %2:_(<4 x s8>) = G_AND %0, %1 @@ -617,19 +617,19 @@ body: | ; CHECK-LABEL: name: test_and_v8s8 ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<8 x s32>) - ; CHECK: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<8 x s32>) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV8]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV9]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV10]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV11]] - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[UV12]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[UV13]] - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[UV14]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[UV15]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32), [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<8 x s32>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<8 x s32>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV8]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV9]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV10]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV11]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[UV12]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[UV13]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[UV14]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[UV15]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32), [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) %0:_(<8 x s8>) = G_IMPLICIT_DEF %1:_(<8 x s8>) = G_IMPLICIT_DEF %2:_(<8 x s8>) = G_AND %0, %1 @@ -644,29 +644,29 @@ body: | ; CHECK-LABEL: name: test_and_v16s8 ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<16 x s32>) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV16]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV17]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV18]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV19]] - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[UV20]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[UV21]] - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[UV22]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[UV23]] - ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<16 x s32>) - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV40]], [[UV56]] - ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV41]], [[UV57]] - ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV42]], [[UV58]] - ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV43]], [[UV59]] - ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV44]], [[UV60]] - ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV45]], [[UV61]] - ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[UV46]], [[UV62]] - ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[UV47]], [[UV63]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32), [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32), [[AND8]](s32), [[AND9]](s32), [[AND10]](s32), [[AND11]](s32), [[AND12]](s32), [[AND13]](s32), [[AND14]](s32), [[AND15]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<16 x s32>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV16]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV17]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV18]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[UV19]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[UV20]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[UV21]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[UV22]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[UV23]] + ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<16 x s32>) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV40]], [[UV56]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV41]], [[UV57]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV42]], [[UV58]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV43]], [[UV59]] + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV44]], [[UV60]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV45]], [[UV61]] + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[UV46]], [[UV62]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[UV47]], [[UV63]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32), [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32), [[AND8]](s32), [[AND9]](s32), [[AND10]](s32), [[AND11]](s32), [[AND12]](s32), [[AND13]](s32), [[AND14]](s32), [[AND15]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>) %0:_(<16 x s8>) = G_IMPLICIT_DEF %1:_(<16 x s8>) = G_IMPLICIT_DEF %2:_(<16 x s8>) = G_AND %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir index 6188cb9b4e9e..ab6796076810 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir @@ -9,8 +9,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_s32_to_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY]](s32) - ; CHECK: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[COPY]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) %0:_(s32) = COPY $vgpr0 %1:_(<2 x s16>) = G_BITCAST %0 $vgpr0 = COPY %1 @@ -24,8 +24,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s16_to_s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: $vgpr0 = COPY [[BITCAST]](s32) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(s32) = G_BITCAST %0 $vgpr0 = COPY %1 @@ -39,8 +39,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s32_to_s64 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<2 x s32>) - ; CHECK: $vgpr0_vgpr1 = COPY [[BITCAST]](s64) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<2 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](s64) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s64) = G_BITCAST %0 $vgpr0_vgpr1 = COPY %1 @@ -54,8 +54,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_s64_to_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](s64) - ; CHECK: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<2 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_BITCAST %0 $vgpr0_vgpr1 = COPY %1 @@ -69,8 +69,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s64_to_v4s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<4 x s32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<4 x s32>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<4 x s32>) = G_BITCAST %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -84,8 +84,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v4s32_to_v2s64 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[COPY]](<4 x s32>) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x s64>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[COPY]](<4 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x s64>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = G_BITCAST %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -99,8 +99,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_s128_to_v4s32 ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](s128) - ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<4 x s32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](s128) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<4 x s32>) %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<4 x s32>) = G_BITCAST %0 S_ENDPGM 0, implicit %1 @@ -114,8 +114,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v4s32_to_s128 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[COPY]](<4 x s32>) - ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](s128) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[COPY]](<4 x s32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](s128) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s128) = G_BITCAST %0 S_ENDPGM 0, implicit %1 @@ -129,8 +129,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v4s16_to_s64 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<4 x s16>) - ; CHECK: $vgpr0_vgpr1 = COPY [[BITCAST]](s64) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[COPY]](<4 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](s64) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s64) = G_BITCAST %0 $vgpr0_vgpr1 = COPY %1 @@ -144,8 +144,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_s64_to_v4s16 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s16>) = G_BITCAST [[COPY]](s64) - ; CHECK: $vgpr0_vgpr1 = COPY [[BITCAST]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s16>) = G_BITCAST [[COPY]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](<4 x s16>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_BITCAST %0 $vgpr0_vgpr1 = COPY %1 @@ -159,8 +159,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s64_to_v8s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[COPY]](<2 x s64>) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[COPY]](<2 x s64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<8 x s16>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<8 x s16>) = G_BITCAST %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -174,8 +174,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s16_to_v2s64 ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[COPY]](<8 x s16>) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x s64>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s64>) = G_BITCAST [[COPY]](<8 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BITCAST]](<2 x s64>) %0:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = G_BITCAST %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -189,8 +189,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_p0_to_p1 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(p1) = G_BITCAST [[COPY]](p0) - ; CHECK: $vgpr0_vgpr1 = COPY [[BITCAST]](p1) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(p1) = G_BITCAST [[COPY]](p0) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p1) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(p1) = G_BITCAST %0 $vgpr0_vgpr1 = COPY %1 @@ -204,8 +204,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_p1_to_p0 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(p0) = G_BITCAST [[COPY]](p1) - ; CHECK: $vgpr0_vgpr1 = COPY [[BITCAST]](p0) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(p0) = G_BITCAST [[COPY]](p1) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p0) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(p0) = G_BITCAST %0 $vgpr0_vgpr1 = COPY %1 @@ -219,8 +219,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_p999_to_p0 ; CHECK: [[COPY:%[0-9]+]]:_(p999) = COPY $vgpr0_vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(p0) = G_BITCAST [[COPY]](p999) - ; CHECK: $vgpr0_vgpr1 = COPY [[BITCAST]](p0) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(p0) = G_BITCAST [[COPY]](p999) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p0) %0:_(p999) = COPY $vgpr0_vgpr1 %1:_(p0) = G_BITCAST %0 $vgpr0_vgpr1 = COPY %1 @@ -234,8 +234,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_p123_to_p999 ; CHECK: [[COPY:%[0-9]+]]:_(p123) = COPY $vgpr0_vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(p999) = G_BITCAST [[COPY]](p123) - ; CHECK: $vgpr0_vgpr1 = COPY [[BITCAST]](p999) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(p999) = G_BITCAST [[COPY]](p123) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BITCAST]](p999) %0:_(p123) = COPY $vgpr0_vgpr1 %1:_(p999) = G_BITCAST %0 $vgpr0_vgpr1 = COPY %1 @@ -251,9 +251,10 @@ body: | ; CHECK-LABEL: name: test_bitcast_v4s64_to_v8s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY]](<4 x s64>) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<8 x s32>) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s32>) = G_BITCAST [[COPY]](<4 x s64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<8 x s32>) %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<8 x s32>) = G_BITCAST %0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -269,9 +270,10 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s32_to_v4s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s64>) = G_BITCAST [[COPY]](<8 x s32>) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x s64>) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s64>) = G_BITCAST [[COPY]](<8 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BITCAST]](<4 x s64>) %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<4 x s64>) = G_BITCAST %0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1 @@ -287,9 +289,10 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s64_to_v16s32 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<16 x s32>) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s32>) = G_BITCAST [[COPY]](<8 x s64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<16 x s32>) %0:_(<8 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(<16 x s32>) = G_BITCAST %0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 @@ -305,9 +308,10 @@ body: | ; CHECK-LABEL: name: test_bitcast_v16s32_to_v8s64 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<8 x s64>) = G_BITCAST [[COPY]](<16 x s32>) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<8 x s64>) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s64>) = G_BITCAST [[COPY]](<16 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BITCAST]](<8 x s64>) %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(<8 x s64>) = G_BITCAST %0 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1 @@ -321,8 +325,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v32s32_to_v16s64 ; CHECK: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<16 x s64>) = G_BITCAST [[COPY]](<32 x s32>) - ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<16 x s64>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s64>) = G_BITCAST [[COPY]](<32 x s32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<16 x s64>) %0:_(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 %1:_(<16 x s64>) = G_BITCAST %0 S_ENDPGM 0, implicit %1 @@ -336,8 +340,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v16s64_to_v32s32 ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<32 x s32>) = G_BITCAST [[COPY]](<16 x s64>) - ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<32 x s32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<32 x s32>) = G_BITCAST [[COPY]](<16 x s64>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<32 x s32>) %0:_(<16 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 %1:_(<32 x s32>) = G_BITCAST %0 S_ENDPGM 0, implicit %1 @@ -351,12 +355,12 @@ body: | ; CHECK-LABEL: name: test_bitcast_s24_to_v3s8 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[LSHR]](s32), [[LSHR1]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[LSHR]](s32), [[LSHR1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(s32) = COPY $vgpr0 %1:_(s24) = G_TRUNC %0 %2:_(<3 x s8>) = G_BITCAST %1 @@ -372,11 +376,11 @@ body: | ; CHECK-LABEL: name: test_bitcast_s48_to_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[UV1]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[UV1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s48) = G_TRUNC %0 %2:_(<3 x s16>) = G_BITCAST %1 @@ -392,28 +396,28 @@ body: | ; CHECK-LABEL: name: test_bitcast_v3s8_to_s24 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) - ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; CHECK: $vgpr0 = COPY [[OR2]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CHECK-NEXT: $vgpr0 = COPY [[OR2]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s8>) = G_TRUNC %0 %2:_(s24) = G_BITCAST %1 @@ -429,19 +433,19 @@ body: | ; CHECK-LABEL: name: test_bitcast_v3s16_to_s48 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>) = G_TRUNC %0 %2:_(s48) = G_BITCAST %1 @@ -457,10 +461,10 @@ body: | ; CHECK-LABEL: name: test_bitcast_s16_to_v2s8 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[LSHR]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[LSHR]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 %2:_(<2 x s8>) = G_BITCAST %1 @@ -476,17 +480,17 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s8_to_s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_TRUNC %0 %2:_(s16) = G_BITCAST %1 @@ -503,32 +507,32 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s16_to_v4s8 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<4 x s8>) = G_BITCAST %0 %2:_(<4 x s8>) = G_ADD %1, %1 @@ -544,37 +548,37 @@ body: | ; CHECK-LABEL: name: test_bitcast_v4s8_to_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC3]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC5]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC7]] - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ADD2]], [[C]] - ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ADD3]], [[C]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) - ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC3]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC5]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC7]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ADD2]], [[C]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ADD3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<4 x s8>) = G_TRUNC %0 %2:_(<4 x s8>) = G_ADD %1, %1 @@ -590,56 +594,56 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s16_to_v8s4 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C3]](s32) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C4]](s32) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C3]](s32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C4]](s32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<8 x s4>) = G_BITCAST %0 %2:_(<8 x s4>) = G_ADD %1, %1 @@ -655,63 +659,63 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s4_to_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC3]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC5]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC7]] - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV12]](s32) - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[TRUNC9]] - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV13]](s32) - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[TRUNC11]] - ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV14]](s32) - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[TRUNC13]] - ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV15]](s32) - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[TRUNC15]] - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 - ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ADD2]], [[C]] - ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND2]], [[C2]](s16) - ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[SHL1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ADD3]], [[C]] - ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 12 - ; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) - ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[SHL2]] - ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[ADD4]], [[C]] - ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[ADD5]], [[C]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) - ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] - ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[ADD6]], [[C]] - ; CHECK: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND6]], [[C2]](s16) - ; CHECK: [[OR4:%[0-9]+]]:_(s16) = G_OR [[OR3]], [[SHL4]] - ; CHECK: [[AND7:%[0-9]+]]:_(s16) = G_AND [[ADD7]], [[C]] - ; CHECK: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C3]](s16) - ; CHECK: [[OR5:%[0-9]+]]:_(s16) = G_OR [[OR4]], [[SHL5]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32) - ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC3]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC5]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC7]] + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV12]](s32) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[TRUNC9]] + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV13]](s32) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[TRUNC11]] + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV14]](s32) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[TRUNC13]] + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV15]](s32) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[TRUNC15]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 4 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ADD2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND2]], [[C2]](s16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ADD3]], [[C]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 12 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C3]](s16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[ADD4]], [[C]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[ADD5]], [[C]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[ADD6]], [[C]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND6]], [[C2]](s16) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[OR3]], [[SHL4]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[ADD7]], [[C]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C3]](s16) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[OR4]], [[SHL5]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C4]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<8 x s4>) = G_TRUNC %0 %2:_(<8 x s4>) = G_ADD %1, %1 @@ -727,8 +731,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v4s16_to_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<2 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_BITCAST %0 S_ENDPGM 0, implicit %1 @@ -742,42 +746,42 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s32_to_v4s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s16>) = G_BITCAST [[COPY]](<2 x s32>) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s16>) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s16>) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC4]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC5]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC6]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC7]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s16>) = G_BITCAST [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC4]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC5]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC6]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC7]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_BITCAST %0 %2:_(<4 x s16>) = G_ADD %1, %1 @@ -793,51 +797,51 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s32_to_v8s8 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) - ; CHECK: S_ENDPGM 0, implicit [[TRUNC8]](<8 x s8>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC8]](<8 x s8>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<8 x s8>) = G_BITCAST %0 %2:_(<8 x s8>) = G_ADD %1, %1 @@ -853,34 +857,34 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s8_to_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV12]], [[C]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV13]], [[C]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV14]], [[C]] - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV15]], [[C]] - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) - ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV12]], [[C]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV13]], [[C]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV14]], [[C]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV15]], [[C]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<8 x s8>) = G_TRUNC %0 %2:_(<2 x s32>) = G_BITCAST %1 @@ -895,44 +899,44 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s8_to_s64 ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) - ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) - ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) - ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C2]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CHECK: S_ENDPGM 0, implicit [[MV]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C2]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s64) %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<8 x s8>) = G_TRUNC %0 %2:_(s64) = G_BITCAST %1 @@ -947,135 +951,135 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s32_to_v16s4 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32) - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C4]](s32) - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C5]](s32) - ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C6]](s32) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; CHECK: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32) - ; CHECK: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C4]](s32) - ; CHECK: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C5]](s32) - ; CHECK: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C6]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) - ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) - ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) - ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; CHECK: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) - ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] - ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; CHECK: [[COPY13:%[0-9]+]]:_(s16) = COPY [[TRUNC12]](s16) - ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[COPY13]] - ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; CHECK: [[COPY14:%[0-9]+]]:_(s16) = COPY [[TRUNC13]](s16) - ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[TRUNC13]], [[COPY14]] - ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; CHECK: [[COPY15:%[0-9]+]]:_(s16) = COPY [[TRUNC14]](s16) - ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[COPY15]] - ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; CHECK: [[COPY16:%[0-9]+]]:_(s16) = COPY [[TRUNC15]](s16) - ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[TRUNC15]], [[COPY16]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) - ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD12]](s16) - ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD13]](s16) - ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD14]](s16) - ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD15]](s16) - ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C7]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C7]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C7]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C7]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C7]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C7]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C3]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C7]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C7]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT8]], [[C7]] - ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT9]], [[C7]] - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C3]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT10]], [[C7]] - ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT11]], [[C7]] - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ANYEXT12]], [[C7]] - ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT13]], [[C7]] - ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C3]](s32) - ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]] - ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ANYEXT14]], [[C7]] - ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ANYEXT15]], [[C7]] - ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C3]](s32) - ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL7]] - ; CHECK: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; CHECK: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<16 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C4]](s32) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C5]](s32) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C6]](s32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) + ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32) + ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C4]](s32) + ; CHECK-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C5]](s32) + ; CHECK-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C6]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s16) = COPY [[TRUNC12]](s16) + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[COPY13]] + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s16) = COPY [[TRUNC13]](s16) + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[TRUNC13]], [[COPY14]] + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s16) = COPY [[TRUNC14]](s16) + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[COPY15]] + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s16) = COPY [[TRUNC15]](s16) + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[TRUNC15]], [[COPY16]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) + ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD12]](s16) + ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD13]](s16) + ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD14]](s16) + ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD15]](s16) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C7]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C7]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C7]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C7]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C7]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C7]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C3]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C7]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C7]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT8]], [[C7]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT9]], [[C7]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C3]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT10]], [[C7]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT11]], [[C7]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[ANYEXT12]], [[C7]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT13]], [[C7]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C3]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[ANYEXT14]], [[C7]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ANYEXT15]], [[C7]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C3]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL7]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<16 x s16>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<16 x s4>) = G_BITCAST %0 %2:_(<16 x s4>) = G_ADD %1, %1 @@ -1092,78 +1096,78 @@ body: | ; CHECK-LABEL: name: test_bitcast_v16s4_to_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<16 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C3]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) - ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] - ; CHECK: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<16 x s16>) - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) - ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL7]] - ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C3]](s32) - ; CHECK: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; CHECK: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32) - ; CHECK: [[OR9:%[0-9]+]]:_(s32) = G_OR [[OR8]], [[SHL9]] - ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; CHECK: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32) - ; CHECK: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] - ; CHECK: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C5]](s32) - ; CHECK: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] - ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; CHECK: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C6]](s32) - ; CHECK: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR11]], [[SHL12]] - ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] - ; CHECK: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C7]](s32) - ; CHECK: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR6]](s32), [[OR13]](s32) - ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<16 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C3]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C6]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<16 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL7]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C3]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C4]](s32) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[OR8]], [[SHL9]] + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C5]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] + ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C6]](s32) + ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR11]], [[SHL12]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] + ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C7]](s32) + ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[OR12]], [[SHL13]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR6]](s32), [[OR13]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s32>) %0:_(<16 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<16 x s4>) = G_TRUNC %0 %2:_(<2 x s32>) = G_BITCAST %1 @@ -1178,38 +1182,38 @@ body: | ; CHECK-LABEL: name: test_bitcast_s64_to_v8s8 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR2]], [[LSHR2]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR3]], [[LSHR3]] - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) - ; CHECK: S_ENDPGM 0, implicit [[TRUNC4]](<8 x s8>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR2]], [[LSHR2]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR3]], [[LSHR3]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[BUILD_VECTOR]](<8 x s32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC4]](<8 x s8>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(<8 x s8>) = G_BITCAST %0 %2:_(<8 x s8>) = G_ADD %1, %1 @@ -1225,70 +1229,70 @@ body: | ; CHECK-LABEL: name: test_bitcast_v3s32_to_v12s8 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) - ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) - ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) - ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) - ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<12 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32) - ; CHECK: [[TRUNC12:%[0-9]+]]:_(<12 x s8>) = G_TRUNC [[BUILD_VECTOR]](<12 x s32>) - ; CHECK: S_ENDPGM 0, implicit [[TRUNC12]](<12 x s8>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<12 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32) + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(<12 x s8>) = G_TRUNC [[BUILD_VECTOR]](<12 x s32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC12]](<12 x s8>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<12 x s8>) = G_BITCAST %0 %2:_(<12 x s8>) = G_ADD %1, %1 @@ -1304,47 +1308,47 @@ body: | ; CHECK-LABEL: name: test_bitcast_v12s8_to_v3s32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C]] - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C]] - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C]] - ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV9]], [[C]] - ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) - ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] - ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV10]], [[C]] - ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32) - ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV11]], [[C]] - ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32) - ; CHECK: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) - ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV9]], [[C]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV10]], [[C]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV11]], [[C]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 @@ -1362,55 +1366,55 @@ body: | ; CHECK-LABEL: name: test_bitcast_v6s8_to_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<6 x s32>) - ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<6 x s32>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC3]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC5]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC7]] - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32) - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[TRUNC9]] - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32) - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[TRUNC11]] - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ADD2]], [[C]] - ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ADD3]], [[C]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) - ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[ADD4]], [[C]] - ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[ADD5]], [[C]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) - ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT]], [[C2]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT2]], [[C2]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT1]], [[SHL5]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) - ; CHECK: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<6 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<6 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC3]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC5]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC7]] + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[TRUNC9]] + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[TRUNC11]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ADD2]], [[C]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ADD3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[ADD4]], [[C]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[ADD5]], [[C]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT]], [[C2]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT2]], [[C2]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT1]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 %1:_(<6 x s8>) = G_TRUNC %0 %2:_(<6 x s8>) = G_ADD %1, %1 @@ -1427,46 +1431,46 @@ body: | ; CHECK-LABEL: name: test_bitcast_v3s16_to_v6s8 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC3]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC4]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC5]] - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[ADD]], [[C]](s16) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[ADD1]], [[C]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[ADD2]], [[C]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C1]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C1]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) - ; CHECK: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC3]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC4]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC5]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[ADD]], [[C]](s16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[ADD1]], [[C]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[ADD2]], [[C]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<6 x s16>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>) = G_TRUNC %0 %2:_(<3 x s16>) = G_ADD %1, %1 @@ -1483,65 +1487,65 @@ body: | ; CHECK-LABEL: name: test_bitcast_v2s64_to_v16s8 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C1]](s16) - ; CHECK: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C1]](s16) - ; CHECK: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C1]](s16) - ; CHECK: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C1]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR2]], [[LSHR2]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR3]], [[LSHR3]] - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] - ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC4]] - ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[LSHR8]], [[LSHR8]] - ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[TRUNC5]] - ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[LSHR9]], [[LSHR9]] - ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC6]] - ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[LSHR10]], [[LSHR10]] - ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[TRUNC7]] - ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[LSHR11]], [[LSHR11]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) - ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD12]](s16) - ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD13]](s16) - ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD14]](s16) - ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD15]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32), [[ANYEXT13]](s32), [[ANYEXT14]](s32), [[ANYEXT15]](s32) - ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C1]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR2]], [[LSHR2]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR3]], [[LSHR3]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC4]] + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[LSHR8]], [[LSHR8]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[TRUNC5]] + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[LSHR9]], [[LSHR9]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC6]] + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[LSHR10]], [[LSHR10]] + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[TRUNC7]] + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[LSHR11]], [[LSHR11]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) + ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD12]](s16) + ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD13]](s16) + ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD14]](s16) + ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD15]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32), [[ANYEXT13]](s32), [[ANYEXT14]](s32), [[ANYEXT15]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s32>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<16 x s8>) = G_BITCAST %0 %2:_(<16 x s8>) = G_ADD %1, %1 @@ -1557,79 +1561,79 @@ body: | ; CHECK-LABEL: name: test_bitcast_v16s8_to_v2s64 ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) - ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) - ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) - ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C2]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV24]](s32) - ; CHECK: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV25]](s32) - ; CHECK: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]] - ; CHECK: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) - ; CHECK: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV26]](s32) - ; CHECK: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]] - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV27]](s32) - ; CHECK: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] - ; CHECK: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C1]](s16) - ; CHECK: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] - ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV28]](s32) - ; CHECK: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C]] - ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV29]](s32) - ; CHECK: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C]] - ; CHECK: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C1]](s16) - ; CHECK: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] - ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV30]](s32) - ; CHECK: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C]] - ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV31]](s32) - ; CHECK: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]] - ; CHECK: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C1]](s16) - ; CHECK: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] - ; CHECK: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; CHECK: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; CHECK: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C2]](s32) - ; CHECK: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; CHECK: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; CHECK: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; CHECK: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C2]](s32) - ; CHECK: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C2]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV24]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV25]](s32) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV26]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]] + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV27]](s32) + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C1]](s16) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV28]](s32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C]] + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV29]](s32) + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C1]](s16) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV30]](s32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C]] + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV31]](s32) + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C1]](s16) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C2]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C2]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<2 x s64>) %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(<16 x s8>) = G_TRUNC %0 %2:_(<2 x s64>) = G_BITCAST %1 @@ -1643,88 +1647,88 @@ body: | liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-LABEL: name: test_bitcast_v4s32_to_v16s8 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) - ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) - ; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; CHECK: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) - ; CHECK: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) - ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) - ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) - ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) - ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] - ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK: [[COPY13:%[0-9]+]]:_(s16) = COPY [[TRUNC12]](s16) - ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[COPY13]] - ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; CHECK: [[COPY14:%[0-9]+]]:_(s16) = COPY [[TRUNC13]](s16) - ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[TRUNC13]], [[COPY14]] - ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; CHECK: [[COPY15:%[0-9]+]]:_(s16) = COPY [[TRUNC14]](s16) - ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[COPY15]] - ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; CHECK: [[COPY16:%[0-9]+]]:_(s16) = COPY [[TRUNC15]](s16) - ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[TRUNC15]], [[COPY16]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) - ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD12]](s16) - ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD13]](s16) - ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD14]](s16) - ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD15]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32), [[ANYEXT13]](s32), [[ANYEXT14]](s32), [[ANYEXT15]](s32) - ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) + ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C2]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[COPY2]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[COPY3]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[COPY4]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[COPY5]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[COPY6]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[COPY7]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[COPY8]] + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16) + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[COPY9]] + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s16) = COPY [[TRUNC9]](s16) + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[TRUNC9]], [[COPY10]] + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s16) = COPY [[TRUNC10]](s16) + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[COPY11]] + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s16) = COPY [[TRUNC11]](s16) + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[TRUNC11]], [[COPY12]] + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s16) = COPY [[TRUNC12]](s16) + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[COPY13]] + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s16) = COPY [[TRUNC13]](s16) + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[TRUNC13]], [[COPY14]] + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s16) = COPY [[TRUNC14]](s16) + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[COPY15]] + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s16) = COPY [[TRUNC15]](s16) + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[TRUNC15]], [[COPY16]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) + ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD12]](s16) + ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD13]](s16) + ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD14]](s16) + ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD15]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32), [[ANYEXT13]](s32), [[ANYEXT14]](s32), [[ANYEXT15]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<16 x s8>) = G_BITCAST %0 %2:_(<16 x s8>) = G_ADD %1, %1 @@ -1740,56 +1744,56 @@ body: | ; CHECK-LABEL: name: test_bitcast_v16s8_to_v4s32 ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV20]], [[C]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV21]], [[C]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV22]], [[C]] - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV23]], [[C]] - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV40]], [[C]] - ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV41]], [[C]] - ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) - ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] - ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV42]], [[C]] - ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32) - ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV43]], [[C]] - ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32) - ; CHECK: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CHECK: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV60]], [[C]] - ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV61]], [[C]] - ; CHECK: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32) - ; CHECK: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] - ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[UV62]], [[C]] - ; CHECK: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C2]](s32) - ; CHECK: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[UV63]], [[C]] - ; CHECK: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C3]](s32) - ; CHECK: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) - ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C2]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV20]], [[C]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV21]], [[C]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV22]], [[C]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV23]], [[C]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV40]], [[C]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV41]], [[C]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV42]], [[C]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV43]], [[C]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] + ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV60]], [[C]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV61]], [[C]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[UV62]], [[C]] + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C2]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[UV63]], [[C]] + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C3]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<4 x s32>) %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(<16 x s8>) = G_TRUNC %0 %2:_(<4 x s32>) = G_BITCAST %1 @@ -1804,67 +1808,67 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s16_to_v16s8 ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) - ; CHECK: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C1]](s16) - ; CHECK: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C1]](s16) - ; CHECK: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C1]](s16) - ; CHECK: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C1]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR6]], [[LSHR6]] - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR7]], [[LSHR7]] - ; CHECK: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC4]] - ; CHECK: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[LSHR8]], [[LSHR8]] - ; CHECK: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[TRUNC5]] - ; CHECK: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[LSHR9]], [[LSHR9]] - ; CHECK: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC6]] - ; CHECK: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[LSHR10]], [[LSHR10]] - ; CHECK: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[TRUNC7]] - ; CHECK: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[LSHR11]], [[LSHR11]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) - ; CHECK: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD12]](s16) - ; CHECK: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD13]](s16) - ; CHECK: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD14]](s16) - ; CHECK: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD15]](s16) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32), [[ANYEXT13]](s32), [[ANYEXT14]](s32), [[ANYEXT15]](s32) - ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C1]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR6]], [[LSHR6]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR7]], [[LSHR7]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC4]] + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s16) = G_ADD [[LSHR8]], [[LSHR8]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s16) = G_ADD [[TRUNC5]], [[TRUNC5]] + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s16) = G_ADD [[LSHR9]], [[LSHR9]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC6]] + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s16) = G_ADD [[LSHR10]], [[LSHR10]] + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s16) = G_ADD [[TRUNC7]], [[TRUNC7]] + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s16) = G_ADD [[LSHR11]], [[LSHR11]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD8]](s16) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD9]](s16) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD10]](s16) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD11]](s16) + ; CHECK-NEXT: [[ANYEXT12:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD12]](s16) + ; CHECK-NEXT: [[ANYEXT13:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD13]](s16) + ; CHECK-NEXT: [[ANYEXT14:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD14]](s16) + ; CHECK-NEXT: [[ANYEXT15:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD15]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32), [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32), [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32), [[ANYEXT12]](s32), [[ANYEXT13]](s32), [[ANYEXT14]](s32), [[ANYEXT15]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s32>) %0:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<16 x s8>) = G_BITCAST %0 %2:_(<16 x s8>) = G_ADD %1, %1 @@ -1880,87 +1884,87 @@ body: | ; CHECK-LABEL: name: test_bitcast_v16s8_to_v8s16 ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV18]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV19]](s32) - ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) - ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV36]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV37]](s32) - ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) - ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV54]](s32) - ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV55]](s32) - ; CHECK: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) - ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; CHECK: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32), [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32), [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32), [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32), [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32), [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32), [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32), [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV72]](s32) - ; CHECK: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV73]](s32) - ; CHECK: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]] - ; CHECK: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) - ; CHECK: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] - ; CHECK: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32), [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32), [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32), [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32), [[UV88:%[0-9]+]]:_(s32), [[UV89:%[0-9]+]]:_(s32), [[UV90:%[0-9]+]]:_(s32), [[UV91:%[0-9]+]]:_(s32), [[UV92:%[0-9]+]]:_(s32), [[UV93:%[0-9]+]]:_(s32), [[UV94:%[0-9]+]]:_(s32), [[UV95:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV90]](s32) - ; CHECK: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]] - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV91]](s32) - ; CHECK: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] - ; CHECK: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C1]](s16) - ; CHECK: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; CHECK: [[UV96:%[0-9]+]]:_(s32), [[UV97:%[0-9]+]]:_(s32), [[UV98:%[0-9]+]]:_(s32), [[UV99:%[0-9]+]]:_(s32), [[UV100:%[0-9]+]]:_(s32), [[UV101:%[0-9]+]]:_(s32), [[UV102:%[0-9]+]]:_(s32), [[UV103:%[0-9]+]]:_(s32), [[UV104:%[0-9]+]]:_(s32), [[UV105:%[0-9]+]]:_(s32), [[UV106:%[0-9]+]]:_(s32), [[UV107:%[0-9]+]]:_(s32), [[UV108:%[0-9]+]]:_(s32), [[UV109:%[0-9]+]]:_(s32), [[UV110:%[0-9]+]]:_(s32), [[UV111:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV108]](s32) - ; CHECK: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C]] - ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV109]](s32) - ; CHECK: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C]] - ; CHECK: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C1]](s16) - ; CHECK: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] - ; CHECK: [[UV112:%[0-9]+]]:_(s32), [[UV113:%[0-9]+]]:_(s32), [[UV114:%[0-9]+]]:_(s32), [[UV115:%[0-9]+]]:_(s32), [[UV116:%[0-9]+]]:_(s32), [[UV117:%[0-9]+]]:_(s32), [[UV118:%[0-9]+]]:_(s32), [[UV119:%[0-9]+]]:_(s32), [[UV120:%[0-9]+]]:_(s32), [[UV121:%[0-9]+]]:_(s32), [[UV122:%[0-9]+]]:_(s32), [[UV123:%[0-9]+]]:_(s32), [[UV124:%[0-9]+]]:_(s32), [[UV125:%[0-9]+]]:_(s32), [[UV126:%[0-9]+]]:_(s32), [[UV127:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV126]](s32) - ; CHECK: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C]] - ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV127]](s32) - ; CHECK: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]] - ; CHECK: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C1]](s16) - ; CHECK: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; CHECK: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C2]](s32) - ; CHECK: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] - ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; CHECK: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; CHECK: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; CHECK: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C2]](s32) - ; CHECK: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; CHECK: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; CHECK: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; CHECK: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C2]](s32) - ; CHECK: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; CHECK: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<8 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV18]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV19]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV36]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV37]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV54]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV55]](s32) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32), [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32), [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32), [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32), [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32), [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32), [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32), [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV72]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV73]](s32) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] + ; CHECK-NEXT: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32), [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32), [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32), [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32), [[UV88:%[0-9]+]]:_(s32), [[UV89:%[0-9]+]]:_(s32), [[UV90:%[0-9]+]]:_(s32), [[UV91:%[0-9]+]]:_(s32), [[UV92:%[0-9]+]]:_(s32), [[UV93:%[0-9]+]]:_(s32), [[UV94:%[0-9]+]]:_(s32), [[UV95:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV90]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]] + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV91]](s32) + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C1]](s16) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] + ; CHECK-NEXT: [[UV96:%[0-9]+]]:_(s32), [[UV97:%[0-9]+]]:_(s32), [[UV98:%[0-9]+]]:_(s32), [[UV99:%[0-9]+]]:_(s32), [[UV100:%[0-9]+]]:_(s32), [[UV101:%[0-9]+]]:_(s32), [[UV102:%[0-9]+]]:_(s32), [[UV103:%[0-9]+]]:_(s32), [[UV104:%[0-9]+]]:_(s32), [[UV105:%[0-9]+]]:_(s32), [[UV106:%[0-9]+]]:_(s32), [[UV107:%[0-9]+]]:_(s32), [[UV108:%[0-9]+]]:_(s32), [[UV109:%[0-9]+]]:_(s32), [[UV110:%[0-9]+]]:_(s32), [[UV111:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV108]](s32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C]] + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV109]](s32) + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C1]](s16) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] + ; CHECK-NEXT: [[UV112:%[0-9]+]]:_(s32), [[UV113:%[0-9]+]]:_(s32), [[UV114:%[0-9]+]]:_(s32), [[UV115:%[0-9]+]]:_(s32), [[UV116:%[0-9]+]]:_(s32), [[UV117:%[0-9]+]]:_(s32), [[UV118:%[0-9]+]]:_(s32), [[UV119:%[0-9]+]]:_(s32), [[UV120:%[0-9]+]]:_(s32), [[UV121:%[0-9]+]]:_(s32), [[UV122:%[0-9]+]]:_(s32), [[UV123:%[0-9]+]]:_(s32), [[UV124:%[0-9]+]]:_(s32), [[UV125:%[0-9]+]]:_(s32), [[UV126:%[0-9]+]]:_(s32), [[UV127:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV126]](s32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C]] + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV127]](s32) + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C1]](s16) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL8]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C2]](s32) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL9]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C2]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C2]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<8 x s16>) %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(<16 x s8>) = G_TRUNC %0 %2:_(<8 x s16>) = G_BITCAST %1 @@ -1975,8 +1979,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v3s64_to_v6s32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[COPY]](<3 x s64>) - ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<6 x s32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[COPY]](<3 x s64>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<6 x s32>) %0:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 %1:_(<6 x s32>) = G_BITCAST %0 S_ENDPGM 0, implicit %1 @@ -1990,8 +1994,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v6s32_to_v3s64 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<3 x s64>) = G_BITCAST [[COPY]](<6 x s32>) - ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<3 x s64>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s64>) = G_BITCAST [[COPY]](<6 x s32>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<3 x s64>) %0:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 %1:_(<3 x s64>) = G_BITCAST %0 S_ENDPGM 0, implicit %1 @@ -2005,8 +2009,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v3s64_to_v12s16 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<12 x s16>) = G_BITCAST [[COPY]](<3 x s64>) - ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<12 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<12 x s16>) = G_BITCAST [[COPY]](<3 x s64>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<12 x s16>) %0:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 %1:_(<12 x s16>) = G_BITCAST %0 S_ENDPGM 0, implicit %1 @@ -2020,8 +2024,8 @@ body: | ; CHECK-LABEL: name: test_bitcast_v12s16_to_v3s64 ; CHECK: [[COPY:%[0-9]+]]:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK: [[BITCAST:%[0-9]+]]:_(<3 x s64>) = G_BITCAST [[COPY]](<12 x s16>) - ; CHECK: S_ENDPGM 0, implicit [[BITCAST]](<3 x s64>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s64>) = G_BITCAST [[COPY]](<12 x s16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<3 x s64>) %0:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 %1:_(<3 x s64>) = G_BITCAST %0 S_ENDPGM 0, implicit %1 @@ -2035,117 +2039,117 @@ body: | ; CHECK-LABEL: name: test_bitcast_v3s64_to_v24s8 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<3 x s64>) - ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) - ; CHECK: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C1]](s16) - ; CHECK: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C1]](s16) - ; CHECK: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C1]](s16) - ; CHECK: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C1]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR9]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) - ; CHECK: [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32) - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) - ; CHECK: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32) - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; CHECK: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C1]](s16) - ; CHECK: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC9]], [[C1]](s16) - ; CHECK: [[LSHR16:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C1]](s16) - ; CHECK: [[LSHR17:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC11]], [[C1]](s16) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR15]](s16) - ; CHECK: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR16]](s16) - ; CHECK: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR17]](s16) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C2]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C2]] - ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C2]] - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] - ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C2]] - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C2]] - ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C2]] - ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32) - ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]] - ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C2]] - ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C2]] - ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C]](s32) - ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL7]] - ; CHECK: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; CHECK: [[AND16:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C2]] - ; CHECK: [[AND17:%[0-9]+]]:_(s32) = G_AND [[ANYEXT8]], [[C2]] - ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32) - ; CHECK: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL8]] - ; CHECK: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; CHECK: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C2]] - ; CHECK: [[AND19:%[0-9]+]]:_(s32) = G_AND [[ANYEXT9]], [[C2]] - ; CHECK: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C]](s32) - ; CHECK: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL9]] - ; CHECK: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; CHECK: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C2]] - ; CHECK: [[AND21:%[0-9]+]]:_(s32) = G_AND [[ANYEXT10]], [[C2]] - ; CHECK: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C]](s32) - ; CHECK: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL10]] - ; CHECK: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; CHECK: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C2]] - ; CHECK: [[AND23:%[0-9]+]]:_(s32) = G_AND [[ANYEXT11]], [[C2]] - ; CHECK: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C]](s32) - ; CHECK: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND22]], [[SHL11]] - ; CHECK: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<24 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>) - ; CHECK: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<24 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<3 x s64>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR2]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR4]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR5]](s16) + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C1]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR9]](s16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR10]](s16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR11]](s16) + ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) + ; CHECK-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) + ; CHECK-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC8]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC9]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR16:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR17:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC11]], [[C1]](s16) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR14]](s16) + ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR15]](s16) + ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR16]](s16) + ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR17]](s16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C2]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C2]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C2]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C2]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C2]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C2]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C2]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C2]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL7]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C2]] + ; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[ANYEXT8]], [[C2]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL8]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; CHECK-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C2]] + ; CHECK-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[ANYEXT9]], [[C2]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C]](s32) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL9]] + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; CHECK-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C2]] + ; CHECK-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[ANYEXT10]], [[C2]] + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[C]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL10]] + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; CHECK-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C2]] + ; CHECK-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[ANYEXT11]], [[C2]] + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[C]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND22]], [[SHL11]] + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<24 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<24 x s16>) %0:_(<3 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 %1:_(<24 x s8>) = G_BITCAST %0 %2:_(<24 x s16>) = G_ANYEXT %1 @@ -2160,139 +2164,139 @@ body: | ; CHECK-LABEL: name: test_bitcast_v24s8_to_v3s64 ; CHECK: [[COPY:%[0-9]+]]:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 - ; CHECK: [[COPY1:%[0-9]+]]:_(<12 x s16>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] - ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) - ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] - ; CHECK: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) - ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>) - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<12 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] - ; CHECK: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]] - ; CHECK: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16) - ; CHECK: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] - ; CHECK: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]] - ; CHECK: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C1]] - ; CHECK: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C2]](s16) - ; CHECK: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] - ; CHECK: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C1]] - ; CHECK: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C1]] - ; CHECK: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C2]](s16) - ; CHECK: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] - ; CHECK: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C1]] - ; CHECK: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C1]] - ; CHECK: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C2]](s16) - ; CHECK: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] - ; CHECK: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; CHECK: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) - ; CHECK: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C]](s32) - ; CHECK: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] - ; CHECK: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) - ; CHECK: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; CHECK: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32) - ; CHECK: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; CHECK: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<12 x s16>) - ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV20]](<2 x s16>) - ; CHECK: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) - ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV21]](<2 x s16>) - ; CHECK: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) - ; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) - ; CHECK: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; CHECK: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV22]](<2 x s16>) - ; CHECK: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32) - ; CHECK: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) - ; CHECK: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; CHECK: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV23]](<2 x s16>) - ; CHECK: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST11]](s32) - ; CHECK: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) - ; CHECK: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; CHECK: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C1]] - ; CHECK: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C1]] - ; CHECK: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C2]](s16) - ; CHECK: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] - ; CHECK: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C1]] - ; CHECK: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C1]] - ; CHECK: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C2]](s16) - ; CHECK: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] - ; CHECK: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C1]] - ; CHECK: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C1]] - ; CHECK: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C2]](s16) - ; CHECK: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] - ; CHECK: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C1]] - ; CHECK: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C1]] - ; CHECK: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C2]](s16) - ; CHECK: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] - ; CHECK: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) - ; CHECK: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) - ; CHECK: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C]](s32) - ; CHECK: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] - ; CHECK: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) - ; CHECK: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) - ; CHECK: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C]](s32) - ; CHECK: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s64>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<12 x s16>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<12 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C2]](s16) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C1]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C2]](s16) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C1]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C1]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C2]](s16) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C1]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C1]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C2]](s16) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] + ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR7]](s16) + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL10]] + ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR8]](s16) + ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) + ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<12 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV20]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) + ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV21]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) + ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV22]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32) + ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV23]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST11]](s32) + ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) + ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C1]] + ; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C1]] + ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND17]], [[C2]](s16) + ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[SHL12]] + ; CHECK-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C1]] + ; CHECK-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C1]] + ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND19]], [[C2]](s16) + ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s16) = G_OR [[AND18]], [[SHL13]] + ; CHECK-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C1]] + ; CHECK-NEXT: [[AND21:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C1]] + ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[AND21]], [[C2]](s16) + ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s16) = G_OR [[AND20]], [[SHL14]] + ; CHECK-NEXT: [[AND22:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C1]] + ; CHECK-NEXT: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C1]] + ; CHECK-NEXT: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[AND23]], [[C2]](s16) + ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND22]], [[SHL15]] + ; CHECK-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; CHECK-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR13]](s16) + ; CHECK-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C]](s32) + ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL16]] + ; CHECK-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16) + ; CHECK-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR15]](s16) + ; CHECK-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C]](s32) + ; CHECK-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL17]] + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s64>) %0:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 %1:_(<12 x s16>) = COPY $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 %2:_(<24 x s16>) = G_CONCAT_VECTORS %0, %1 @@ -2309,60 +2313,60 @@ body: | ; CHECK-LABEL: name: test_bitcast_v4s16_to_v8s8 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR2]], [[LSHR2]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR3]], [[LSHR3]] - ; CHECK: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] - ; CHECK: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] - ; CHECK: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] - ; CHECK: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C2]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C2]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C2]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C2]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<8 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C1]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[LSHR2]], [[LSHR2]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC1]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[LSHR3]], [[LSHR3]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC2]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[LSHR4]], [[LSHR4]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC3]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[LSHR5]], [[LSHR5]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD3]](s16) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD4]](s16) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD5]](s16) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD6]](s16) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD7]](s16) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C2]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C2]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C2]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<8 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<8 x s8>) = G_BITCAST %0 %2:_(<8 x s8>) = G_ADD %1, %1 @@ -2378,61 +2382,61 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s8_to_v4s16 ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; CHECK: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; CHECK: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] - ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) - ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] - ; CHECK: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) - ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[OR]], [[OR]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[OR1]], [[OR1]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[OR2]], [[OR2]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[OR3]], [[OR3]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) - ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[OR]], [[OR]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[OR1]], [[OR1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[OR2]], [[OR2]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[OR3]], [[OR3]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<4 x s16>) %0:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<8 x s8>) = G_TRUNC %0 %2:_(<4 x s16>) = G_BITCAST %1 @@ -2448,43 +2452,43 @@ body: | ; CHECK-LABEL: name: test_bitcast_v64s32_to_v32s64 ; CHECK: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>), [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>), [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>), [[UV8:%[0-9]+]]:_(<2 x s32>), [[UV9:%[0-9]+]]:_(<2 x s32>), [[UV10:%[0-9]+]]:_(<2 x s32>), [[UV11:%[0-9]+]]:_(<2 x s32>), [[UV12:%[0-9]+]]:_(<2 x s32>), [[UV13:%[0-9]+]]:_(<2 x s32>), [[UV14:%[0-9]+]]:_(<2 x s32>), [[UV15:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>) - ; CHECK: [[UV16:%[0-9]+]]:_(<2 x s32>), [[UV17:%[0-9]+]]:_(<2 x s32>), [[UV18:%[0-9]+]]:_(<2 x s32>), [[UV19:%[0-9]+]]:_(<2 x s32>), [[UV20:%[0-9]+]]:_(<2 x s32>), [[UV21:%[0-9]+]]:_(<2 x s32>), [[UV22:%[0-9]+]]:_(<2 x s32>), [[UV23:%[0-9]+]]:_(<2 x s32>), [[UV24:%[0-9]+]]:_(<2 x s32>), [[UV25:%[0-9]+]]:_(<2 x s32>), [[UV26:%[0-9]+]]:_(<2 x s32>), [[UV27:%[0-9]+]]:_(<2 x s32>), [[UV28:%[0-9]+]]:_(<2 x s32>), [[UV29:%[0-9]+]]:_(<2 x s32>), [[UV30:%[0-9]+]]:_(<2 x s32>), [[UV31:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[UV]](<2 x s32>) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s64) = G_BITCAST [[UV1]](<2 x s32>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s64) = G_BITCAST [[UV2]](<2 x s32>) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s64) = G_BITCAST [[UV3]](<2 x s32>) - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s64) = G_BITCAST [[UV4]](<2 x s32>) - ; CHECK: [[BITCAST5:%[0-9]+]]:_(s64) = G_BITCAST [[UV5]](<2 x s32>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s64) = G_BITCAST [[UV6]](<2 x s32>) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s64) = G_BITCAST [[UV7]](<2 x s32>) - ; CHECK: [[BITCAST8:%[0-9]+]]:_(s64) = G_BITCAST [[UV8]](<2 x s32>) - ; CHECK: [[BITCAST9:%[0-9]+]]:_(s64) = G_BITCAST [[UV9]](<2 x s32>) - ; CHECK: [[BITCAST10:%[0-9]+]]:_(s64) = G_BITCAST [[UV10]](<2 x s32>) - ; CHECK: [[BITCAST11:%[0-9]+]]:_(s64) = G_BITCAST [[UV11]](<2 x s32>) - ; CHECK: [[BITCAST12:%[0-9]+]]:_(s64) = G_BITCAST [[UV12]](<2 x s32>) - ; CHECK: [[BITCAST13:%[0-9]+]]:_(s64) = G_BITCAST [[UV13]](<2 x s32>) - ; CHECK: [[BITCAST14:%[0-9]+]]:_(s64) = G_BITCAST [[UV14]](<2 x s32>) - ; CHECK: [[BITCAST15:%[0-9]+]]:_(s64) = G_BITCAST [[UV15]](<2 x s32>) - ; CHECK: [[BITCAST16:%[0-9]+]]:_(s64) = G_BITCAST [[UV16]](<2 x s32>) - ; CHECK: [[BITCAST17:%[0-9]+]]:_(s64) = G_BITCAST [[UV17]](<2 x s32>) - ; CHECK: [[BITCAST18:%[0-9]+]]:_(s64) = G_BITCAST [[UV18]](<2 x s32>) - ; CHECK: [[BITCAST19:%[0-9]+]]:_(s64) = G_BITCAST [[UV19]](<2 x s32>) - ; CHECK: [[BITCAST20:%[0-9]+]]:_(s64) = G_BITCAST [[UV20]](<2 x s32>) - ; CHECK: [[BITCAST21:%[0-9]+]]:_(s64) = G_BITCAST [[UV21]](<2 x s32>) - ; CHECK: [[BITCAST22:%[0-9]+]]:_(s64) = G_BITCAST [[UV22]](<2 x s32>) - ; CHECK: [[BITCAST23:%[0-9]+]]:_(s64) = G_BITCAST [[UV23]](<2 x s32>) - ; CHECK: [[BITCAST24:%[0-9]+]]:_(s64) = G_BITCAST [[UV24]](<2 x s32>) - ; CHECK: [[BITCAST25:%[0-9]+]]:_(s64) = G_BITCAST [[UV25]](<2 x s32>) - ; CHECK: [[BITCAST26:%[0-9]+]]:_(s64) = G_BITCAST [[UV26]](<2 x s32>) - ; CHECK: [[BITCAST27:%[0-9]+]]:_(s64) = G_BITCAST [[UV27]](<2 x s32>) - ; CHECK: [[BITCAST28:%[0-9]+]]:_(s64) = G_BITCAST [[UV28]](<2 x s32>) - ; CHECK: [[BITCAST29:%[0-9]+]]:_(s64) = G_BITCAST [[UV29]](<2 x s32>) - ; CHECK: [[BITCAST30:%[0-9]+]]:_(s64) = G_BITCAST [[UV30]](<2 x s32>) - ; CHECK: [[BITCAST31:%[0-9]+]]:_(s64) = G_BITCAST [[UV31]](<2 x s32>) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[BITCAST]](s64), [[BITCAST1]](s64), [[BITCAST2]](s64), [[BITCAST3]](s64), [[BITCAST4]](s64), [[BITCAST5]](s64), [[BITCAST6]](s64), [[BITCAST7]](s64), [[BITCAST8]](s64), [[BITCAST9]](s64), [[BITCAST10]](s64), [[BITCAST11]](s64), [[BITCAST12]](s64), [[BITCAST13]](s64), [[BITCAST14]](s64), [[BITCAST15]](s64) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[BITCAST16]](s64), [[BITCAST17]](s64), [[BITCAST18]](s64), [[BITCAST19]](s64), [[BITCAST20]](s64), [[BITCAST21]](s64), [[BITCAST22]](s64), [[BITCAST23]](s64), [[BITCAST24]](s64), [[BITCAST25]](s64), [[BITCAST26]](s64), [[BITCAST27]](s64), [[BITCAST28]](s64), [[BITCAST29]](s64), [[BITCAST30]](s64), [[BITCAST31]](s64) - ; CHECK: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s64>), implicit [[BUILD_VECTOR1]](<16 x s64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>), [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>), [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>), [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>), [[UV8:%[0-9]+]]:_(<2 x s32>), [[UV9:%[0-9]+]]:_(<2 x s32>), [[UV10:%[0-9]+]]:_(<2 x s32>), [[UV11:%[0-9]+]]:_(<2 x s32>), [[UV12:%[0-9]+]]:_(<2 x s32>), [[UV13:%[0-9]+]]:_(<2 x s32>), [[UV14:%[0-9]+]]:_(<2 x s32>), [[UV15:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<2 x s32>), [[UV17:%[0-9]+]]:_(<2 x s32>), [[UV18:%[0-9]+]]:_(<2 x s32>), [[UV19:%[0-9]+]]:_(<2 x s32>), [[UV20:%[0-9]+]]:_(<2 x s32>), [[UV21:%[0-9]+]]:_(<2 x s32>), [[UV22:%[0-9]+]]:_(<2 x s32>), [[UV23:%[0-9]+]]:_(<2 x s32>), [[UV24:%[0-9]+]]:_(<2 x s32>), [[UV25:%[0-9]+]]:_(<2 x s32>), [[UV26:%[0-9]+]]:_(<2 x s32>), [[UV27:%[0-9]+]]:_(<2 x s32>), [[UV28:%[0-9]+]]:_(<2 x s32>), [[UV29:%[0-9]+]]:_(<2 x s32>), [[UV30:%[0-9]+]]:_(<2 x s32>), [[UV31:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[UV]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s64) = G_BITCAST [[UV1]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s64) = G_BITCAST [[UV2]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s64) = G_BITCAST [[UV3]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s64) = G_BITCAST [[UV4]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s64) = G_BITCAST [[UV5]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s64) = G_BITCAST [[UV6]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s64) = G_BITCAST [[UV7]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s64) = G_BITCAST [[UV8]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s64) = G_BITCAST [[UV9]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s64) = G_BITCAST [[UV10]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s64) = G_BITCAST [[UV11]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(s64) = G_BITCAST [[UV12]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(s64) = G_BITCAST [[UV13]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(s64) = G_BITCAST [[UV14]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST15:%[0-9]+]]:_(s64) = G_BITCAST [[UV15]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST16:%[0-9]+]]:_(s64) = G_BITCAST [[UV16]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST17:%[0-9]+]]:_(s64) = G_BITCAST [[UV17]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST18:%[0-9]+]]:_(s64) = G_BITCAST [[UV18]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST19:%[0-9]+]]:_(s64) = G_BITCAST [[UV19]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST20:%[0-9]+]]:_(s64) = G_BITCAST [[UV20]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST21:%[0-9]+]]:_(s64) = G_BITCAST [[UV21]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST22:%[0-9]+]]:_(s64) = G_BITCAST [[UV22]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST23:%[0-9]+]]:_(s64) = G_BITCAST [[UV23]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST24:%[0-9]+]]:_(s64) = G_BITCAST [[UV24]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST25:%[0-9]+]]:_(s64) = G_BITCAST [[UV25]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST26:%[0-9]+]]:_(s64) = G_BITCAST [[UV26]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST27:%[0-9]+]]:_(s64) = G_BITCAST [[UV27]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST28:%[0-9]+]]:_(s64) = G_BITCAST [[UV28]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST29:%[0-9]+]]:_(s64) = G_BITCAST [[UV29]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST30:%[0-9]+]]:_(s64) = G_BITCAST [[UV30]](<2 x s32>) + ; CHECK-NEXT: [[BITCAST31:%[0-9]+]]:_(s64) = G_BITCAST [[UV31]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[BITCAST]](s64), [[BITCAST1]](s64), [[BITCAST2]](s64), [[BITCAST3]](s64), [[BITCAST4]](s64), [[BITCAST5]](s64), [[BITCAST6]](s64), [[BITCAST7]](s64), [[BITCAST8]](s64), [[BITCAST9]](s64), [[BITCAST10]](s64), [[BITCAST11]](s64), [[BITCAST12]](s64), [[BITCAST13]](s64), [[BITCAST14]](s64), [[BITCAST15]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[BITCAST16]](s64), [[BITCAST17]](s64), [[BITCAST18]](s64), [[BITCAST19]](s64), [[BITCAST20]](s64), [[BITCAST21]](s64), [[BITCAST22]](s64), [[BITCAST23]](s64), [[BITCAST24]](s64), [[BITCAST25]](s64), [[BITCAST26]](s64), [[BITCAST27]](s64), [[BITCAST28]](s64), [[BITCAST29]](s64), [[BITCAST30]](s64), [[BITCAST31]](s64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s64>), implicit [[BUILD_VECTOR1]](<16 x s64>) %0:_(<32 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 %1:_(<64 x s32>) = G_CONCAT_VECTORS %0, %0 %2:_(<32 x s64>) = G_BITCAST %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir index 792d064567e6..846e528f9527 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-concat-vectors.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: concat_vectors_v2s32_v2s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir index e93b5d047146..55dfdbcc9ec0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s --- name: extract_vector_elt_0_v2i32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir index 5f772be31f72..df4112042a13 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir index 497d0ceeb620..e500e59bff7d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fadd_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir index fc5d260bd826..6631fbf4ec7f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcanonicalize.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir index 61ee24f03151..6c7b5d73a016 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_fcos_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir index c25666e8cced..4e3a1a74a38c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir @@ -1,9 +1,9 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 -enable-unsafe-fp-math -o - %s | FileCheck -check-prefix=GFX9-UNSAFE %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -enable-unsafe-fp-math -o - %s | FileCheck -check-prefix=GFX9-UNSAFE %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s --- name: test_fdiv_s16 @@ -20,75 +20,75 @@ body: | ; SI-LABEL: name: test_fdiv_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_fdiv_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; VI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; VI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; VI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; VI: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC1]](s16), [[TRUNC]](s16) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC1]](s16), [[TRUNC]](s16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_fdiv_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX9: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX9: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX9: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC1]](s16), [[TRUNC]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC1]](s16), [[TRUNC]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s16 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC1]](s16) - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] - ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) - ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC1]](s16) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-LABEL: name: test_fdiv_s16 ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX10: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX10: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX10: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX10: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC1]](s16), [[TRUNC]](s16) - ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX10-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC1]](s16), [[TRUNC]](s16) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -113,78 +113,78 @@ body: | ; SI-LABEL: name: test_fdiv_s32_denorms_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; SI: $vgpr0 = COPY [[INT6]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; SI-NEXT: $vgpr0 = COPY [[INT6]](s32) ; VI-LABEL: name: test_fdiv_s32_denorms_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; VI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; VI: $vgpr0 = COPY [[INT6]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; VI-NEXT: $vgpr0 = COPY [[INT6]](s32) ; GFX9-LABEL: name: test_fdiv_s32_denorms_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX9: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; GFX9: $vgpr0 = COPY [[INT6]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[INT6]](s32) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_denorms_on ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[INT]] - ; GFX9-UNSAFE: $vgpr0 = COPY [[FMUL]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[INT]] + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMUL]](s32) ; GFX10-LABEL: name: test_fdiv_s32_denorms_on ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX10: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; GFX10: $vgpr0 = COPY [[INT6]](s32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[INT6]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FDIV %0, %1 @@ -206,86 +206,86 @@ body: | ; SI-LABEL: name: test_fdiv_s32_denorms_off ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; SI: $vgpr0 = COPY [[INT6]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; SI-NEXT: $vgpr0 = COPY [[INT6]](s32) ; VI-LABEL: name: test_fdiv_s32_denorms_off ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; VI: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; VI: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; VI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; VI: $vgpr0 = COPY [[INT6]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; VI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; VI-NEXT: $vgpr0 = COPY [[INT6]](s32) ; GFX9-LABEL: name: test_fdiv_s32_denorms_off ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX9: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX9: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; GFX9: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; GFX9: $vgpr0 = COPY [[INT6]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX9-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[INT6]](s32) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_denorms_off ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[INT]] - ; GFX9-UNSAFE: $vgpr0 = COPY [[FMUL]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[INT]] + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMUL]](s32) ; GFX10-LABEL: name: test_fdiv_s32_denorms_off ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX10: S_DENORM_MODE 15, implicit-def $mode, implicit $mode - ; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX10: S_DENORM_MODE 12, implicit-def $mode, implicit $mode - ; GFX10: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; GFX10: $vgpr0 = COPY [[INT6]](s32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX10-NEXT: S_DENORM_MODE 15, implicit-def $mode, implicit $mode + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: S_DENORM_MODE 12, implicit-def $mode, implicit $mode + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[INT6]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FDIV %0, %1 @@ -307,86 +307,86 @@ body: | ; SI-LABEL: name: test_fdiv_s32_denorms_off_arcp ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = arcp G_FNEG [[INT]] - ; SI: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; SI: [[FMA:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; SI: [[INT5:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; SI: $vgpr0 = COPY [[INT6]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = arcp G_FNEG [[INT]] + ; SI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; SI-NEXT: $vgpr0 = COPY [[INT6]](s32) ; VI-LABEL: name: test_fdiv_s32_denorms_off_arcp ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; VI: [[INT4:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI: [[FNEG:%[0-9]+]]:_(s32) = arcp G_FNEG [[INT]] - ; VI: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; VI: [[FMA:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] - ; VI: [[FMA1:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[INT2]], [[FMA1]] - ; VI: [[FMA2:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI: [[FMA3:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI: [[FMA4:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; VI: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; VI: [[INT5:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; VI: $vgpr0 = COPY [[INT6]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = arcp G_FNEG [[INT]] + ; VI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; VI-NEXT: $vgpr0 = COPY [[INT6]](s32) ; GFX9-LABEL: name: test_fdiv_s32_denorms_off_arcp ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; GFX9: [[INT4:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = arcp G_FNEG [[INT]] - ; GFX9: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[INT2]], [[FMA1]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9: [[FMA3:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX9: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; GFX9: [[INT5:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; GFX9: $vgpr0 = COPY [[INT6]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = arcp G_FNEG [[INT]] + ; GFX9-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[INT6]](s32) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_denorms_off_arcp ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[COPY]], [[INT]] - ; GFX9-UNSAFE: $vgpr0 = COPY [[FMUL]](s32) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s32) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[COPY]], [[INT]] + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMUL]](s32) ; GFX10-LABEL: name: test_fdiv_s32_denorms_off_arcp ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 - ; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 - ; GFX10: [[INT4:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10: [[FNEG:%[0-9]+]]:_(s32) = arcp G_FNEG [[INT]] - ; GFX10: S_DENORM_MODE 15, implicit-def $mode, implicit $mode - ; GFX10: [[FMA:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[INT2]], [[FMA1]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10: [[FMA3:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX10: S_DENORM_MODE 12, implicit-def $mode, implicit $mode - ; GFX10: [[INT5:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) - ; GFX10: $vgpr0 = COPY [[INT6]](s32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = arcp G_FNEG [[INT]] + ; GFX10-NEXT: S_DENORM_MODE 15, implicit-def $mode, implicit $mode + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = arcp G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = arcp G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = arcp G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: S_DENORM_MODE 12, implicit-def $mode, implicit $mode + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = arcp G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY1]](s32), [[COPY]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[INT6]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = arcp G_FDIV %0, %1 @@ -408,93 +408,93 @@ body: | ; SI-LABEL: name: test_fdiv_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 0 - ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; SI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; SI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; SI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; SI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 1 - ; SI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; SI: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; SI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64) - ; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64) - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV7]] - ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[UV5]] - ; SI: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]] - ; SI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[XOR]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64) - ; SI: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 0 + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; SI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 1 + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64) + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV7]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[UV5]] + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[XOR]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) ; VI-LABEL: name: test_fdiv_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; VI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 0 - ; VI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; VI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; VI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; VI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; VI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; VI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 1 - ; VI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; VI: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; VI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; VI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64) - ; VI: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 0 + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; VI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 1 + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) ; GFX9-LABEL: name: test_fdiv_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 0 - ; GFX9: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX9: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX9: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX9: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 1 - ; GFX9: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX9: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64) - ; GFX9: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 0 + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 1 + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s64 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9-UNSAFE: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] - ; GFX9-UNSAFE: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s64) - ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT]], [[C]] - ; GFX9-UNSAFE: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FMA]], [[INT]], [[INT]] - ; GFX9-UNSAFE: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX9-UNSAFE: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[FMA3]] - ; GFX9-UNSAFE: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[COPY]] - ; GFX9-UNSAFE: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] - ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[FMA5]](s64) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY1]] + ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY1]](s64) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FMA]], [[INT]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[COPY]], [[FMA3]] + ; GFX9-UNSAFE-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[COPY]] + ; GFX9-UNSAFE-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[FMA5]](s64) ; GFX10-LABEL: name: test_fdiv_s64 ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX10: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX10: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 0 - ; GFX10: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX10: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX10: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX10: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 1 - ; GFX10: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX10: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64) - ; GFX10: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 0 + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s64), [[COPY1]](s64), 1 + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY1]](s64), [[COPY]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_FDIV %0, %1 @@ -516,159 +516,159 @@ body: | ; SI-LABEL: name: test_fdiv_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; SI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; SI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; SI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; SI: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; SI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; SI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; SI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; SI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; SI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; SI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; SI: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; SI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; SI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) + ; SI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 + ; SI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 + ; SI-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; SI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; SI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] + ; SI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; SI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; SI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; SI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; SI-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; SI-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_fdiv_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; VI: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; VI: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; VI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; VI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; VI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; VI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; VI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; VI: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; VI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; VI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; VI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; VI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; VI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; VI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; VI: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; VI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; VI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; VI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) + ; VI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 + ; VI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 + ; VI-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; VI-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; VI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; VI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] + ; VI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; VI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; VI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; VI-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; VI-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; VI-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_fdiv_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX9: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX9: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; GFX9: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; GFX9: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; GFX9: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; GFX9: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; GFX9: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; GFX9: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode - ; GFX9: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; GFX9: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; GFX9: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; GFX9: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; GFX9: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; GFX9: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode - ; GFX9: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; GFX9: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX9-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) + ; GFX9-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 + ; GFX9-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 + ; GFX9-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; GFX9-NEXT: S_SETREG_IMM32_B32 3, 2305, implicit-def $mode, implicit $mode + ; GFX9-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; GFX9-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] + ; GFX9-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; GFX9-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; GFX9-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; GFX9-NEXT: S_SETREG_IMM32_B32 0, 2305, implicit-def $mode, implicit $mode + ; GFX9-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; GFX9-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-UNSAFE-LABEL: name: test_fdiv_v2s32 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-UNSAFE: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](s32) - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[INT]] - ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s32) - ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[INT1]] - ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) - ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](s32) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s32) + ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[INT1]] + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX10-LABEL: name: test_fdiv_v2s32 ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX10: S_DENORM_MODE 15, implicit-def $mode, implicit $mode - ; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX10: S_DENORM_MODE 12, implicit-def $mode, implicit $mode - ; GFX10: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; GFX10: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; GFX10: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; GFX10: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; GFX10: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; GFX10: S_DENORM_MODE 15, implicit-def $mode, implicit $mode - ; GFX10: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; GFX10: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; GFX10: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; GFX10: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; GFX10: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; GFX10: S_DENORM_MODE 12, implicit-def $mode, implicit $mode - ; GFX10: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; GFX10: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX10-NEXT: S_DENORM_MODE 15, implicit-def $mode, implicit $mode + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: S_DENORM_MODE 12, implicit-def $mode, implicit $mode + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) + ; GFX10-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 + ; GFX10-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 + ; GFX10-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; GFX10-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; GFX10-NEXT: S_DENORM_MODE 15, implicit-def $mode, implicit $mode + ; GFX10-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; GFX10-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] + ; GFX10-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; GFX10-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; GFX10-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; GFX10-NEXT: S_DENORM_MODE 12, implicit-def $mode, implicit $mode + ; GFX10-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; GFX10-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_FDIV %0, %1 @@ -683,143 +683,143 @@ body: | ; SI-LABEL: name: test_fdiv_v2s32_flags ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] - ; SI: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; SI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; SI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; SI: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; SI: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] - ; SI: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] - ; SI: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; SI: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] - ; SI: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; SI: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; SI: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; SI: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; SI: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) + ; SI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 + ; SI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 + ; SI-NEXT: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] + ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] + ; SI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] + ; SI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; SI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; SI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; SI-NEXT: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; SI-NEXT: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_fdiv_v2s32_flags ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; VI: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] - ; VI: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] - ; VI: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] - ; VI: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; VI: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; VI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; VI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; VI: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; VI: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] - ; VI: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] - ; VI: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; VI: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] - ; VI: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; VI: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; VI: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; VI: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; VI: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) + ; VI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 + ; VI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 + ; VI-NEXT: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] + ; VI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] + ; VI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] + ; VI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; VI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; VI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; VI-NEXT: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; VI-NEXT: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_fdiv_v2s32_flags ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; GFX9: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX9: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; GFX9: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; GFX9: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; GFX9: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; GFX9: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] - ; GFX9: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] - ; GFX9: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] - ; GFX9: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; GFX9: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; GFX9: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; GFX9: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; GFX9: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) + ; GFX9-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 + ; GFX9-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 + ; GFX9-NEXT: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] + ; GFX9-NEXT: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] + ; GFX9-NEXT: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] + ; GFX9-NEXT: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; GFX9-NEXT: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; GFX9-NEXT: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; GFX9-NEXT: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; GFX9-NEXT: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-UNSAFE-LABEL: name: test_fdiv_v2s32_flags ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9-UNSAFE: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](s32) - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV]], [[INT]] - ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s32) - ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV1]], [[INT1]] - ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) - ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](s32) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s32) + ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[UV1]], [[INT1]] + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32) + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX10-LABEL: name: test_fdiv_v2s32_flags ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX10: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 - ; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 - ; GFX10: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] - ; GFX10: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX10: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) - ; GFX10: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 - ; GFX10: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 - ; GFX10: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; GFX10: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] - ; GFX10: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] - ; GFX10: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] - ; GFX10: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; GFX10: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; GFX10: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; GFX10: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; GFX10: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) - ; GFX10: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV2]](s32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT]] + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV2]](s32), [[UV]](s32) + ; GFX10-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 0 + ; GFX10-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV3]](s32), 1 + ; GFX10-NEXT: [[INT11:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; GFX10-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = nnan G_FNEG [[INT7]] + ; GFX10-NEXT: [[FMA5:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[INT11]], [[C]] + ; GFX10-NEXT: [[FMA6:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = nnan G_FMUL [[INT9]], [[FMA6]] + ; GFX10-NEXT: [[FMA7:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; GFX10-NEXT: [[FMA8:%[0-9]+]]:_(s32) = nnan G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; GFX10-NEXT: [[FMA9:%[0-9]+]]:_(s32) = nnan G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; GFX10-NEXT: [[INT12:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; GFX10-NEXT: [[INT13:%[0-9]+]]:_(s32) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV3]](s32), [[UV1]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = nnan G_FDIV %0, %1 @@ -834,193 +834,193 @@ body: | ; SI-LABEL: name: test_fdiv_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) - ; SI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 0 - ; SI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 1 - ; SI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; SI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; SI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; SI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; SI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; SI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; SI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; SI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; SI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) - ; SI: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 0 - ; SI: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 1 - ; SI: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) - ; SI: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; SI: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] - ; SI: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] - ; SI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] - ; SI: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] - ; SI: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] - ; SI: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; SI: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) - ; SI: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) + ; SI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 0 + ; SI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 1 + ; SI-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; SI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] + ; SI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; SI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; SI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; SI-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; SI-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) + ; SI-NEXT: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 0 + ; SI-NEXT: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 1 + ; SI-NEXT: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) + ; SI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] + ; SI-NEXT: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] + ; SI-NEXT: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] + ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] + ; SI-NEXT: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] + ; SI-NEXT: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] + ; SI-NEXT: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] + ; SI-NEXT: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) + ; SI-NEXT: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_fdiv_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 0 - ; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 1 - ; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; VI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) - ; VI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 0 - ; VI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 1 - ; VI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; VI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; VI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; VI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; VI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; VI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; VI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; VI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; VI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; VI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) - ; VI: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 0 - ; VI: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 1 - ; VI: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) - ; VI: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; VI: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] - ; VI: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] - ; VI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] - ; VI: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] - ; VI: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] - ; VI: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; VI: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) - ; VI: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) + ; VI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 0 + ; VI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 1 + ; VI-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; VI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; VI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] + ; VI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; VI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; VI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; VI-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; VI-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) + ; VI-NEXT: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 0 + ; VI-NEXT: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 1 + ; VI-NEXT: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) + ; VI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] + ; VI-NEXT: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] + ; VI-NEXT: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] + ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] + ; VI-NEXT: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] + ; VI-NEXT: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] + ; VI-NEXT: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] + ; VI-NEXT: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) + ; VI-NEXT: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_fdiv_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 0 - ; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 1 - ; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX9: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) - ; GFX9: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 0 - ; GFX9: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 1 - ; GFX9: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; GFX9: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; GFX9: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; GFX9: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; GFX9: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; GFX9: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; GFX9: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; GFX9: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; GFX9: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) - ; GFX9: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 0 - ; GFX9: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 1 - ; GFX9: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) - ; GFX9: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; GFX9: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] - ; GFX9: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] - ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] - ; GFX9: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] - ; GFX9: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] - ; GFX9: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; GFX9: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) - ; GFX9: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) + ; GFX9-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 0 + ; GFX9-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 1 + ; GFX9-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; GFX9-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; GFX9-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] + ; GFX9-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; GFX9-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; GFX9-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; GFX9-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; GFX9-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) + ; GFX9-NEXT: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 0 + ; GFX9-NEXT: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 1 + ; GFX9-NEXT: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) + ; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] + ; GFX9-NEXT: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] + ; GFX9-NEXT: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] + ; GFX9-NEXT: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] + ; GFX9-NEXT: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] + ; GFX9-NEXT: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] + ; GFX9-NEXT: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) + ; GFX9-NEXT: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-UNSAFE-LABEL: name: test_fdiv_v3s32 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9-UNSAFE: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s32) - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[INT]] - ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV4]](s32) - ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[INT1]] - ; GFX9-UNSAFE: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV5]](s32) - ; GFX9-UNSAFE: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[INT2]] - ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32), [[FMUL2]](s32) - ; GFX9-UNSAFE: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9-UNSAFE-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s32) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV4]](s32) + ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[INT1]] + ; GFX9-UNSAFE-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV5]](s32) + ; GFX9-UNSAFE-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[INT2]] + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMUL]](s32), [[FMUL1]](s32), [[FMUL2]](s32) + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX10-LABEL: name: test_fdiv_v3s32 ; GFX10: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX10: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 0 - ; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 1 - ; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX10: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) - ; GFX10: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 0 - ; GFX10: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 1 - ; GFX10: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; GFX10: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; GFX10: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] - ; GFX10: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; GFX10: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; GFX10: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; GFX10: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; GFX10: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; GFX10: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) - ; GFX10: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 0 - ; GFX10: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 1 - ; GFX10: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) - ; GFX10: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; GFX10: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] - ; GFX10: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] - ; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] - ; GFX10: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] - ; GFX10: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] - ; GFX10: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; GFX10: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) - ; GFX10: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) - ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX10-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s32), [[UV3]](s32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[UV3]](s32), [[UV]](s32) + ; GFX10-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 0 + ; GFX10-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s32), [[UV4]](s32), 1 + ; GFX10-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; GFX10-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; GFX10-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C]] + ; GFX10-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] + ; GFX10-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; GFX10-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; GFX10-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; GFX10-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; GFX10-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[UV4]](s32), [[UV1]](s32) + ; GFX10-NEXT: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 0 + ; GFX10-NEXT: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV2]](s32), [[UV5]](s32), 1 + ; GFX10-NEXT: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) + ; GFX10-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] + ; GFX10-NEXT: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C]] + ; GFX10-NEXT: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] + ; GFX10-NEXT: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] + ; GFX10-NEXT: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] + ; GFX10-NEXT: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] + ; GFX10-NEXT: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) + ; GFX10-NEXT: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[UV5]](s32), [[UV2]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[INT6]](s32), [[INT13]](s32), [[INT20]](s32) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_FDIV %0, %1 @@ -1035,172 +1035,172 @@ body: | ; SI-LABEL: name: test_fdiv_v2s64 ; SI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 0 - ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; SI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; SI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; SI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; SI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 1 - ; SI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; SI: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; SI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; SI: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64) - ; SI: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64) - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV5]](s32), [[UV11]] - ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV7]](s32), [[UV9]] - ; SI: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]] - ; SI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[XOR]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[UV2]](s64), [[UV]](s64) - ; SI: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 0 - ; SI: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]] - ; SI: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64) - ; SI: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]] - ; SI: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] - ; SI: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] - ; SI: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 1 - ; SI: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] - ; SI: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INT10]], [[FMA8]] - ; SI: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] - ; SI: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; SI: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; SI: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT7]](s64) - ; SI: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT10]](s64) - ; SI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV13]](s32), [[UV19]] - ; SI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV15]](s32), [[UV17]] - ; SI: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP3]] - ; SI: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[FMUL1]](s64), [[XOR1]](s1) - ; SI: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT6]](s64), [[INT13]](s64) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 0 + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; SI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 1 + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; SI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64) + ; SI-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64) + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV5]](s32), [[UV11]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV7]](s32), [[UV9]] + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[XOR]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[UV2]](s64), [[UV]](s64) + ; SI-NEXT: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 0 + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]] + ; SI-NEXT: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64) + ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]] + ; SI-NEXT: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] + ; SI-NEXT: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] + ; SI-NEXT: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 1 + ; SI-NEXT: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INT10]], [[FMA8]] + ; SI-NEXT: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] + ; SI-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; SI-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; SI-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT7]](s64) + ; SI-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT10]](s64) + ; SI-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV13]](s32), [[UV19]] + ; SI-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV15]](s32), [[UV17]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP3]] + ; SI-NEXT: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[FMUL1]](s64), [[XOR1]](s1) + ; SI-NEXT: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT6]](s64), [[INT13]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_fdiv_v2s64 ; VI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; VI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; VI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 0 - ; VI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; VI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; VI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; VI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; VI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; VI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 1 - ; VI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; VI: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; VI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; VI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[UV2]](s64), [[UV]](s64) - ; VI: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 0 - ; VI: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]] - ; VI: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64) - ; VI: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]] - ; VI: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] - ; VI: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] - ; VI: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 1 - ; VI: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] - ; VI: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INT10]], [[FMA8]] - ; VI: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] - ; VI: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[FMUL1]](s64), [[INT11]](s1) - ; VI: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT6]](s64), [[INT13]](s64) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 0 + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; VI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 1 + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[UV2]](s64), [[UV]](s64) + ; VI-NEXT: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 0 + ; VI-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]] + ; VI-NEXT: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64) + ; VI-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]] + ; VI-NEXT: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] + ; VI-NEXT: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] + ; VI-NEXT: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 1 + ; VI-NEXT: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INT10]], [[FMA8]] + ; VI-NEXT: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] + ; VI-NEXT: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[FMUL1]](s64), [[INT11]](s1) + ; VI-NEXT: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT6]](s64), [[INT13]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_fdiv_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 0 - ; GFX9: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX9: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX9: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX9: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 1 - ; GFX9: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX9: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[UV2]](s64), [[UV]](s64) - ; GFX9: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 0 - ; GFX9: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]] - ; GFX9: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64) - ; GFX9: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]] - ; GFX9: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] - ; GFX9: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] - ; GFX9: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 1 - ; GFX9: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] - ; GFX9: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INT10]], [[FMA8]] - ; GFX9: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] - ; GFX9: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[FMUL1]](s64), [[INT11]](s1) - ; GFX9: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT6]](s64), [[INT13]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 0 + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 1 + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[UV2]](s64), [[UV]](s64) + ; GFX9-NEXT: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 0 + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]] + ; GFX9-NEXT: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64) + ; GFX9-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]] + ; GFX9-NEXT: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] + ; GFX9-NEXT: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] + ; GFX9-NEXT: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 1 + ; GFX9-NEXT: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INT10]], [[FMA8]] + ; GFX9-NEXT: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] + ; GFX9-NEXT: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[FMUL1]](s64), [[INT11]](s1) + ; GFX9-NEXT: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT6]](s64), [[INT13]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-UNSAFE-LABEL: name: test_fdiv_v2s64 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9-UNSAFE: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9-UNSAFE: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[UV2]] - ; GFX9-UNSAFE: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](s64) - ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT]], [[C]] - ; GFX9-UNSAFE: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FMA]], [[INT]], [[INT]] - ; GFX9-UNSAFE: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX9-UNSAFE: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[FMA3]] - ; GFX9-UNSAFE: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[UV]] - ; GFX9-UNSAFE: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] - ; GFX9-UNSAFE: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[UV3]] - ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s64) - ; GFX9-UNSAFE: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT1]], [[C]] - ; GFX9-UNSAFE: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[INT1]], [[INT1]] - ; GFX9-UNSAFE: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA7]], [[C]] - ; GFX9-UNSAFE: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FMA8]], [[FMA7]], [[FMA7]] - ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[FMA9]] - ; GFX9-UNSAFE: [[FMA10:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[UV1]] - ; GFX9-UNSAFE: [[FMA11:%[0-9]+]]:_(s64) = G_FMA [[FMA10]], [[FMA9]], [[FMUL1]] - ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA5]](s64), [[FMA11]](s64) - ; GFX9-UNSAFE: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[UV2]] + ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV2]](s64) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FMA]], [[INT]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[UV]], [[FMA3]] + ; GFX9-UNSAFE-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[UV]] + ; GFX9-UNSAFE-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] + ; GFX9-UNSAFE-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[UV3]] + ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[UV3]](s64) + ; GFX9-UNSAFE-NEXT: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT1]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[INT1]], [[INT1]] + ; GFX9-UNSAFE-NEXT: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA7]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FMA8]], [[FMA7]], [[FMA7]] + ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[UV1]], [[FMA9]] + ; GFX9-UNSAFE-NEXT: [[FMA10:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[UV1]] + ; GFX9-UNSAFE-NEXT: [[FMA11:%[0-9]+]]:_(s64) = G_FMA [[FMA10]], [[FMA9]], [[FMUL1]] + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA5]](s64), [[FMA11]](s64) + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX10-LABEL: name: test_fdiv_v2s64 ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX10: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX10: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX10: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX10: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 0 - ; GFX10: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX10: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX10: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX10: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 1 - ; GFX10: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX10: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[UV2]](s64), [[UV]](s64) - ; GFX10: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 0 - ; GFX10: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]] - ; GFX10: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64) - ; GFX10: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]] - ; GFX10: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] - ; GFX10: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] - ; GFX10: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 1 - ; GFX10: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] - ; GFX10: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INT10]], [[FMA8]] - ; GFX10: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] - ; GFX10: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[FMUL1]](s64), [[INT11]](s1) - ; GFX10: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64) - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT6]](s64), [[INT13]](s64) - ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 0 + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV]](s64), [[UV2]](s64), 1 + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[UV2]](s64), [[UV]](s64) + ; GFX10-NEXT: [[INT7:%[0-9]+]]:_(s64), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 0 + ; GFX10-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INT7]] + ; GFX10-NEXT: [[INT9:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s64) + ; GFX10-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[INT9]], [[C]] + ; GFX10-NEXT: [[FMA6:%[0-9]+]]:_(s64) = G_FMA [[INT9]], [[FMA5]], [[INT9]] + ; GFX10-NEXT: [[FMA7:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMA6]], [[C]] + ; GFX10-NEXT: [[INT10:%[0-9]+]]:_(s64), [[INT11:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[UV1]](s64), [[UV3]](s64), 1 + ; GFX10-NEXT: [[FMA8:%[0-9]+]]:_(s64) = G_FMA [[FMA6]], [[FMA7]], [[FMA6]] + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s64) = G_FMUL [[INT10]], [[FMA8]] + ; GFX10-NEXT: [[FMA9:%[0-9]+]]:_(s64) = G_FMA [[FNEG1]], [[FMUL1]], [[INT10]] + ; GFX10-NEXT: [[INT12:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s64), [[FMA8]](s64), [[FMUL1]](s64), [[INT11]](s1) + ; GFX10-NEXT: [[INT13:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s64), [[UV3]](s64), [[UV1]](s64) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[INT6]](s64), [[INT13]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_FDIV %0, %1 @@ -1215,159 +1215,159 @@ body: | ; SI-LABEL: name: test_fdiv_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 0 - ; SI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 1 - ; SI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; SI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C1]] - ; SI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; SI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; SI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; SI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; SI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; SI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; SI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32) - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 0 + ; SI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 1 + ; SI-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C1]] + ; SI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] + ; SI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; SI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; SI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; SI-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; SI-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32) + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_fdiv_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; VI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; VI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; VI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; VI: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC2]](s16), [[TRUNC]](s16) - ; VI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; VI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; VI: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; VI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; VI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; VI: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC3]](s16), [[TRUNC1]](s16) - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC2]](s16), [[TRUNC]](s16) + ; VI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; VI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] + ; VI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; VI-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC3]](s16), [[TRUNC1]](s16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_fdiv_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX9: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX9: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX9: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC2]](s16), [[TRUNC]](s16) - ; GFX9: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX9: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX9: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; GFX9: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; GFX9: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC3]](s16), [[TRUNC1]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC2]](s16), [[TRUNC]](s16) + ; GFX9-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX9-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] + ; GFX9-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC3]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; GFX9-UNSAFE-LABEL: name: test_fdiv_v2s16 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-UNSAFE: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-UNSAFE: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-UNSAFE: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-UNSAFE: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-UNSAFE: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-UNSAFE: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC2]](s16) - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] - ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC3]](s16) - ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]] - ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) - ; GFX9-UNSAFE: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL1]](s16) - ; GFX9-UNSAFE: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9-UNSAFE: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-UNSAFE-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-UNSAFE-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-UNSAFE-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC2]](s16) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC3]](s16) + ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]] + ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL1]](s16) + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) ; GFX10-LABEL: name: test_fdiv_v2s16 ; GFX10: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX10: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX10: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX10: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX10: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC2]](s16), [[TRUNC]](s16) - ; GFX10: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX10: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX10: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; GFX10: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; GFX10: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC3]](s16), [[TRUNC1]](s16) - ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) - ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX10: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX10-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC2]](s16), [[TRUNC]](s16) + ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] + ; GFX10-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC3]](s16), [[TRUNC1]](s16) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX10-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_FDIV %0, %1 @@ -1382,225 +1382,225 @@ body: | ; SI-LABEL: name: test_fdiv_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 0 - ; SI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 1 - ; SI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; SI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C1]] - ; SI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; SI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; SI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; SI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; SI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; SI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; SI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32) - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 0 - ; SI: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 1 - ; SI: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) - ; SI: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; SI: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C1]] - ; SI: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] - ; SI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] - ; SI: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] - ; SI: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] - ; SI: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; SI: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) - ; SI: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[FPEXT5]](s32), [[FPEXT4]](s32) - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT20]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; SI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 0 + ; SI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 1 + ; SI-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C1]] + ; SI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] + ; SI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; SI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; SI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; SI-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; SI-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32) + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; SI-NEXT: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 0 + ; SI-NEXT: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 1 + ; SI-NEXT: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) + ; SI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] + ; SI-NEXT: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C1]] + ; SI-NEXT: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] + ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] + ; SI-NEXT: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] + ; SI-NEXT: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] + ; SI-NEXT: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] + ; SI-NEXT: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) + ; SI-NEXT: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[FPEXT5]](s32), [[FPEXT4]](s32) + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT20]](s32) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_fdiv_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; VI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; VI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; VI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; VI: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC3]](s16), [[TRUNC]](s16) - ; VI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; VI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; VI: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; VI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; VI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; VI: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC4]](s16), [[TRUNC1]](s16) - ; VI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; VI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) - ; VI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] - ; VI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; VI: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC3]](s16), [[TRUNC]](s16) + ; VI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; VI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] + ; VI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; VI-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC4]](s16), [[TRUNC1]](s16) + ; VI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; VI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) + ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] + ; VI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_fdiv_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX9: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX9: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX9: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC3]](s16), [[TRUNC]](s16) - ; GFX9: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX9: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; GFX9: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; GFX9: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; GFX9: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC4]](s16), [[TRUNC1]](s16) - ; GFX9: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX9: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) - ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] - ; GFX9: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; GFX9: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC3]](s16), [[TRUNC]](s16) + ; GFX9-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX9-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] + ; GFX9-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC4]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX9-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] + ; GFX9-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-UNSAFE-LABEL: name: test_fdiv_v3s16 ; GFX9-UNSAFE: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNSAFE: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-UNSAFE: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-UNSAFE: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-UNSAFE: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-UNSAFE: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-UNSAFE: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-UNSAFE: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-UNSAFE: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-UNSAFE: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-UNSAFE: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-UNSAFE: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-UNSAFE: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC3]](s16) - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] - ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC4]](s16) - ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]] - ; GFX9-UNSAFE: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC5]](s16) - ; GFX9-UNSAFE: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[INT2]] - ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) - ; GFX9-UNSAFE: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL1]](s16) - ; GFX9-UNSAFE: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL2]](s16) - ; GFX9-UNSAFE: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9-UNSAFE: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-UNSAFE-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-UNSAFE-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-UNSAFE-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC3]](s16) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC4]](s16) + ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]] + ; GFX9-UNSAFE-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC5]](s16) + ; GFX9-UNSAFE-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[INT2]] + ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL1]](s16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL2]](s16) + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; GFX9-UNSAFE-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX10-LABEL: name: test_fdiv_v3s16 ; GFX10: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX10: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX10: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX10: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX10: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX10: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX10: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX10: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX10: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC3]](s16), [[TRUNC]](s16) - ; GFX10: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX10: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; GFX10: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; GFX10: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; GFX10: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC4]](s16), [[TRUNC1]](s16) - ; GFX10: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX10: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) - ; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] - ; GFX10: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; GFX10: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16) - ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) - ; GFX10: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX10: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX10-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC3]](s16), [[TRUNC]](s16) + ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] + ; GFX10-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC4]](s16), [[TRUNC1]](s16) + ; GFX10-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX10-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] + ; GFX10-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC5]](s16), [[TRUNC2]](s16) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX10-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; GFX10-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; GFX10-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_FDIV %0, %1 @@ -1616,303 +1616,303 @@ body: | ; SI-LABEL: name: test_fdiv_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 0 - ; SI: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 1 - ; SI: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) - ; SI: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] - ; SI: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C1]] - ; SI: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] - ; SI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] - ; SI: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] - ; SI: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] - ; SI: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] - ; SI: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) - ; SI: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32) - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; SI: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 0 - ; SI: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 1 - ; SI: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) - ; SI: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] - ; SI: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C1]] - ; SI: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] - ; SI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] - ; SI: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] - ; SI: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] - ; SI: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] - ; SI: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) - ; SI: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[FPEXT5]](s32), [[FPEXT4]](s32) - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT20]](s32) - ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; SI: [[INT21:%[0-9]+]]:_(s32), [[INT22:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT6]](s32), [[FPEXT7]](s32), 0 - ; SI: [[INT23:%[0-9]+]]:_(s32), [[INT24:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT6]](s32), [[FPEXT7]](s32), 1 - ; SI: [[INT25:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT21]](s32) - ; SI: [[FNEG3:%[0-9]+]]:_(s32) = G_FNEG [[INT21]] - ; SI: [[FMA15:%[0-9]+]]:_(s32) = G_FMA [[FNEG3]], [[INT25]], [[C1]] - ; SI: [[FMA16:%[0-9]+]]:_(s32) = G_FMA [[FMA15]], [[INT25]], [[INT25]] - ; SI: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT23]], [[FMA16]] - ; SI: [[FMA17:%[0-9]+]]:_(s32) = G_FMA [[FNEG3]], [[FMUL3]], [[INT23]] - ; SI: [[FMA18:%[0-9]+]]:_(s32) = G_FMA [[FMA17]], [[FMA16]], [[FMUL3]] - ; SI: [[FMA19:%[0-9]+]]:_(s32) = G_FMA [[FNEG3]], [[FMA18]], [[INT23]] - ; SI: [[INT26:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA19]](s32), [[FMA16]](s32), [[FMA18]](s32), [[INT24]](s1) - ; SI: [[INT27:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT26]](s32), [[FPEXT7]](s32), [[FPEXT6]](s32) - ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT27]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; SI-NEXT: [[INT7:%[0-9]+]]:_(s32), [[INT8:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 0 + ; SI-NEXT: [[INT9:%[0-9]+]]:_(s32), [[INT10:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT2]](s32), [[FPEXT3]](s32), 1 + ; SI-NEXT: [[INT11:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT7]](s32) + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[INT7]] + ; SI-NEXT: [[FMA5:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[INT11]], [[C1]] + ; SI-NEXT: [[FMA6:%[0-9]+]]:_(s32) = G_FMA [[FMA5]], [[INT11]], [[INT11]] + ; SI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[INT9]], [[FMA6]] + ; SI-NEXT: [[FMA7:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMUL1]], [[INT9]] + ; SI-NEXT: [[FMA8:%[0-9]+]]:_(s32) = G_FMA [[FMA7]], [[FMA6]], [[FMUL1]] + ; SI-NEXT: [[FMA9:%[0-9]+]]:_(s32) = G_FMA [[FNEG1]], [[FMA8]], [[INT9]] + ; SI-NEXT: [[INT12:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA9]](s32), [[FMA6]](s32), [[FMA8]](s32), [[INT10]](s1) + ; SI-NEXT: [[INT13:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT12]](s32), [[FPEXT3]](s32), [[FPEXT2]](s32) + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT13]](s32) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; SI-NEXT: [[INT14:%[0-9]+]]:_(s32), [[INT15:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 0 + ; SI-NEXT: [[INT16:%[0-9]+]]:_(s32), [[INT17:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT4]](s32), [[FPEXT5]](s32), 1 + ; SI-NEXT: [[INT18:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT14]](s32) + ; SI-NEXT: [[FNEG2:%[0-9]+]]:_(s32) = G_FNEG [[INT14]] + ; SI-NEXT: [[FMA10:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[INT18]], [[C1]] + ; SI-NEXT: [[FMA11:%[0-9]+]]:_(s32) = G_FMA [[FMA10]], [[INT18]], [[INT18]] + ; SI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[INT16]], [[FMA11]] + ; SI-NEXT: [[FMA12:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMUL2]], [[INT16]] + ; SI-NEXT: [[FMA13:%[0-9]+]]:_(s32) = G_FMA [[FMA12]], [[FMA11]], [[FMUL2]] + ; SI-NEXT: [[FMA14:%[0-9]+]]:_(s32) = G_FMA [[FNEG2]], [[FMA13]], [[INT16]] + ; SI-NEXT: [[INT19:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA14]](s32), [[FMA11]](s32), [[FMA13]](s32), [[INT17]](s1) + ; SI-NEXT: [[INT20:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT19]](s32), [[FPEXT5]](s32), [[FPEXT4]](s32) + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT20]](s32) + ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; SI-NEXT: [[INT21:%[0-9]+]]:_(s32), [[INT22:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT6]](s32), [[FPEXT7]](s32), 0 + ; SI-NEXT: [[INT23:%[0-9]+]]:_(s32), [[INT24:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT6]](s32), [[FPEXT7]](s32), 1 + ; SI-NEXT: [[INT25:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT21]](s32) + ; SI-NEXT: [[FNEG3:%[0-9]+]]:_(s32) = G_FNEG [[INT21]] + ; SI-NEXT: [[FMA15:%[0-9]+]]:_(s32) = G_FMA [[FNEG3]], [[INT25]], [[C1]] + ; SI-NEXT: [[FMA16:%[0-9]+]]:_(s32) = G_FMA [[FMA15]], [[INT25]], [[INT25]] + ; SI-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INT23]], [[FMA16]] + ; SI-NEXT: [[FMA17:%[0-9]+]]:_(s32) = G_FMA [[FNEG3]], [[FMUL3]], [[INT23]] + ; SI-NEXT: [[FMA18:%[0-9]+]]:_(s32) = G_FMA [[FMA17]], [[FMA16]], [[FMUL3]] + ; SI-NEXT: [[FMA19:%[0-9]+]]:_(s32) = G_FMA [[FNEG3]], [[FMA18]], [[INT23]] + ; SI-NEXT: [[INT26:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA19]](s32), [[FMA16]](s32), [[FMA18]](s32), [[INT24]](s1) + ; SI-NEXT: [[INT27:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT26]](s32), [[FPEXT7]](s32), [[FPEXT6]](s32) + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT27]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fdiv_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; VI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; VI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; VI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; VI: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC4]](s16), [[TRUNC]](s16) - ; VI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; VI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; VI: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; VI: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; VI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; VI: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC5]](s16), [[TRUNC1]](s16) - ; VI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; VI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) - ; VI: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] - ; VI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; VI: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC6]](s16), [[TRUNC2]](s16) - ; VI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; VI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT7]](s32) - ; VI: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT6]], [[INT6]] - ; VI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) - ; VI: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC3]](s16), [[TRUNC7]](s16), [[TRUNC3]](s16) - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[INT5]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[INT7]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC4]](s16), [[TRUNC]](s16) + ; VI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; VI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) + ; VI-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] + ; VI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; VI-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC5]](s16), [[TRUNC1]](s16) + ; VI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; VI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) + ; VI-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] + ; VI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC6]](s16), [[TRUNC2]](s16) + ; VI-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; VI-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT7]](s32) + ; VI-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT6]], [[INT6]] + ; VI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) + ; VI-NEXT: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC3]](s16), [[TRUNC7]](s16), [[TRUNC3]](s16) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[INT1]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[INT3]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[INT5]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[INT7]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fdiv_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX9: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX9: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX9: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC4]](s16), [[TRUNC]](s16) - ; GFX9: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX9: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; GFX9: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; GFX9: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; GFX9: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC5]](s16), [[TRUNC1]](s16) - ; GFX9: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX9: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) - ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] - ; GFX9: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; GFX9: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC6]](s16), [[TRUNC2]](s16) - ; GFX9: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX9: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT7]](s32) - ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT6]], [[INT6]] - ; GFX9: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) - ; GFX9: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC3]](s16), [[TRUNC7]](s16), [[TRUNC3]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) - ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[INT7]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC4]](s16), [[TRUNC]](s16) + ; GFX9-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX9-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] + ; GFX9-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC5]](s16), [[TRUNC1]](s16) + ; GFX9-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX9-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] + ; GFX9-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC6]](s16), [[TRUNC2]](s16) + ; GFX9-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX9-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT7]](s32) + ; GFX9-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT6]], [[INT6]] + ; GFX9-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) + ; GFX9-NEXT: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC3]](s16), [[TRUNC7]](s16), [[TRUNC3]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[INT7]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-UNSAFE-LABEL: name: test_fdiv_v4s16 ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9-UNSAFE: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-UNSAFE: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9-UNSAFE: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-UNSAFE: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-UNSAFE: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-UNSAFE: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9-UNSAFE: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-UNSAFE: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-UNSAFE: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-UNSAFE: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-UNSAFE: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9-UNSAFE: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-UNSAFE: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9-UNSAFE: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-UNSAFE: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-UNSAFE: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-UNSAFE: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-UNSAFE: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-UNSAFE: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC4]](s16) - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] - ; GFX9-UNSAFE: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC5]](s16) - ; GFX9-UNSAFE: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]] - ; GFX9-UNSAFE: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC6]](s16) - ; GFX9-UNSAFE: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[INT2]] - ; GFX9-UNSAFE: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC7]](s16) - ; GFX9-UNSAFE: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[INT3]] - ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) - ; GFX9-UNSAFE: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL1]](s16) - ; GFX9-UNSAFE: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9-UNSAFE: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL2]](s16) - ; GFX9-UNSAFE: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL3]](s16) - ; GFX9-UNSAFE: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9-UNSAFE: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-UNSAFE-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-UNSAFE-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-UNSAFE-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-UNSAFE-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-UNSAFE-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-UNSAFE-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-UNSAFE-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-UNSAFE-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-UNSAFE-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC4]](s16) + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC5]](s16) + ; GFX9-UNSAFE-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC1]], [[INT1]] + ; GFX9-UNSAFE-NEXT: [[INT2:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC6]](s16) + ; GFX9-UNSAFE-NEXT: [[FMUL2:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC2]], [[INT2]] + ; GFX9-UNSAFE-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC7]](s16) + ; GFX9-UNSAFE-NEXT: [[FMUL3:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC3]], [[INT3]] + ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL]](s16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL1]](s16) + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-UNSAFE-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL2]](s16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FMUL3]](s16) + ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX10-LABEL: name: test_fdiv_v4s16 ; GFX10: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX10: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX10: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX10: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX10: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX10: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX10: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX10: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX10: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX10: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX10: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX10: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX10: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX10: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX10: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX10: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX10: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX10: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX10: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX10: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; GFX10: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX10: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX10: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC4]](s16), [[TRUNC]](s16) - ; GFX10: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX10: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; GFX10: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) - ; GFX10: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] - ; GFX10: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) - ; GFX10: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC5]](s16), [[TRUNC1]](s16) - ; GFX10: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX10: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) - ; GFX10: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] - ; GFX10: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) - ; GFX10: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC6]](s16), [[TRUNC2]](s16) - ; GFX10: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX10: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; GFX10: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT7]](s32) - ; GFX10: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT6]], [[INT6]] - ; GFX10: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) - ; GFX10: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC3]](s16), [[TRUNC7]](s16), [[TRUNC3]](s16) - ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX10: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) - ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX10: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) - ; GFX10: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[INT7]](s16) - ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX10: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX10-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX10-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX10-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX10-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX10-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX10-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX10-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC4]](s16), [[TRUNC]](s16) + ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT3]](s32) + ; GFX10-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT2]], [[INT2]] + ; GFX10-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL1]](s32) + ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC1]](s16), [[TRUNC5]](s16), [[TRUNC1]](s16) + ; GFX10-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX10-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT5]](s32) + ; GFX10-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT4]], [[INT4]] + ; GFX10-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL2]](s32) + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC2]](s16), [[TRUNC6]](s16), [[TRUNC2]](s16) + ; GFX10-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX10-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT7]](s32) + ; GFX10-NEXT: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT6]], [[INT6]] + ; GFX10-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL3]](s32) + ; GFX10-NEXT: [[INT7:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC3]](s16), [[TRUNC7]](s16), [[TRUNC3]](s16) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX10-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[INT3]](s16) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX10-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[INT5]](s16) + ; GFX10-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[INT7]](s16) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_FDIV %0, %1 @@ -1927,68 +1927,68 @@ body: | ; SI-LABEL: name: test_fdiv_s16_constant_one_rcp ; SI: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_fdiv_s16_constant_one_rcp ; VI: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; VI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; VI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; VI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; VI: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_fdiv_s16_constant_one_rcp ; GFX9: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; GFX9: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX9: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX9: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s16_constant_one_rcp ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC]](s16) - ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) - ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[TRUNC]](s16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-LABEL: name: test_fdiv_s16_constant_one_rcp ; GFX10: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3C00 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; GFX10: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX10: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX10: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX10: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) - ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX10-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s16) = G_FCONSTANT half 1.0 %1:_(s32) = COPY $vgpr0 %2:_(s16) = G_TRUNC %1 @@ -2005,69 +2005,69 @@ body: | ; SI-LABEL: name: test_fdiv_s16_constant_negative_one_rcp ; SI: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xHBC00 - ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[FPEXT]](s32), [[FPEXT1]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[FPEXT1]](s32), [[FPEXT]](s32) + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INT6]](s32) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_fdiv_s16_constant_negative_one_rcp ; VI: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xHBC00 - ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; VI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; VI: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; VI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; VI: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; VI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; VI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; VI-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_fdiv_s16_constant_negative_one_rcp ; GFX9: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xHBC00 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; GFX9: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX9: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX9: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX9: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; GFX9-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX9-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX9-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s16_constant_negative_one_rcp ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-UNSAFE: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s16) - ; GFX9-UNSAFE: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) - ; GFX9-UNSAFE: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[TRUNC]] + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s16) + ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT]](s16) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-LABEL: name: test_fdiv_s16_constant_negative_one_rcp ; GFX10: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xHBC00 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; GFX10: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX10: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] - ; GFX10: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) - ; GFX10: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) - ; GFX10: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) - ; GFX10: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FPEXT1]](s32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[INT]] + ; GFX10-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMUL]](s32) + ; GFX10-NEXT: [[INT1:%[0-9]+]]:_(s16) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[FPTRUNC]](s16), [[TRUNC]](s16), [[C]](s16) + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[INT1]](s16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s16) = G_FCONSTANT half -1.0 %1:_(s32) = COPY $vgpr0 %2:_(s16) = G_TRUNC %1 @@ -2084,72 +2084,72 @@ body: | ; SI-LABEL: name: test_fdiv_s32_constant_one_rcp ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; SI: $vgpr0 = COPY [[INT6]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; SI-NEXT: $vgpr0 = COPY [[INT6]](s32) ; VI-LABEL: name: test_fdiv_s32_constant_one_rcp ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; VI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; VI: $vgpr0 = COPY [[INT6]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; VI-NEXT: $vgpr0 = COPY [[INT6]](s32) ; GFX9-LABEL: name: test_fdiv_s32_constant_one_rcp ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX9: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; GFX9: $vgpr0 = COPY [[INT6]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[INT6]](s32) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_constant_one_rcp ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32) - ; GFX9-UNSAFE: $vgpr0 = COPY [[INT]](s32) + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s32) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[INT]](s32) ; GFX10-LABEL: name: test_fdiv_s32_constant_one_rcp ; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX10: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; GFX10: $vgpr0 = COPY [[INT6]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[INT6]](s32) %0:_(s32) = G_FCONSTANT float 1.0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_FDIV %0, %1 @@ -2164,77 +2164,77 @@ body: | ; SI-LABEL: name: test_fdiv_s32_constant_negative_one_rcp ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00 - ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; SI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; SI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; SI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; SI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; SI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; SI: $vgpr0 = COPY [[INT6]](s32) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 + ; SI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; SI-NEXT: $vgpr0 = COPY [[INT6]](s32) ; VI-LABEL: name: test_fdiv_s32_constant_negative_one_rcp ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00 - ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; VI: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; VI: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; VI: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; VI: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; VI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; VI: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; VI: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; VI: $vgpr0 = COPY [[INT6]](s32) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 + ; VI-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; VI-NEXT: $vgpr0 = COPY [[INT6]](s32) ; GFX9-LABEL: name: test_fdiv_s32_constant_negative_one_rcp ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX9: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; GFX9: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; GFX9: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX9: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX9: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX9: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; GFX9: $vgpr0 = COPY [[INT6]](s32) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 + ; GFX9-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[INT6]](s32) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s32_constant_negative_one_rcp ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32) - ; GFX9-UNSAFE: $vgpr0 = COPY [[INT]](s32) + ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[FNEG]](s32) + ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[INT]](s32) ; GFX10-LABEL: name: test_fdiv_s32_constant_negative_one_rcp ; GFX10: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+00 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 - ; GFX10: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 - ; GFX10: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) - ; GFX10: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] - ; GFX10: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] - ; GFX10: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] - ; GFX10: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) - ; GFX10: $vgpr0 = COPY [[INT6]](s32) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 0 + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s32), [[INT3:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s32), [[COPY]](s32), 1 + ; GFX10-NEXT: [[INT4:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s32) + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[INT]] + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[INT4]], [[C1]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FMA]], [[INT4]], [[INT4]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[INT2]], [[FMA1]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMUL]], [[INT2]] + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FMA2]], [[FMA1]], [[FMUL]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[FMA3]], [[INT2]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s32), [[FMA1]](s32), [[FMA3]](s32), [[INT3]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s32), [[COPY]](s32), [[C]](s32) + ; GFX10-NEXT: $vgpr0 = COPY [[INT6]](s32) %0:_(s32) = G_FCONSTANT float -1.0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_FDIV %0, %1 @@ -2256,88 +2256,88 @@ body: | ; SI-LABEL: name: test_fdiv_s64_constant_one_rcp ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; SI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; SI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; SI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; SI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; SI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; SI: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; SI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64) - ; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64) - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV7]] - ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[UV5]] - ; SI: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]] - ; SI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[XOR]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; SI: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; SI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64) + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV7]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[UV5]] + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[XOR]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) ; VI-LABEL: name: test_fdiv_s64_constant_one_rcp ; VI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; VI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; VI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; VI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; VI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; VI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; VI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; VI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; VI: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; VI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; VI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; VI: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; VI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) ; GFX9-LABEL: name: test_fdiv_s64_constant_one_rcp ; GFX9: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; GFX9: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX9: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX9: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX9: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; GFX9: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX9: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; GFX9: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s64_constant_one_rcp ; GFX9-UNSAFE: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-UNSAFE: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]] - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s64) - ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT]], [[C]] - ; GFX9-UNSAFE: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FMA]], [[INT]], [[INT]] - ; GFX9-UNSAFE: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX9-UNSAFE: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[C]], [[FMA3]] - ; GFX9-UNSAFE: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[C]] - ; GFX9-UNSAFE: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] - ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[FMA5]](s64) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]] + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s64) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FMA]], [[INT]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[C]], [[FMA3]] + ; GFX9-UNSAFE-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[FMA5]](s64) ; GFX10-LABEL: name: test_fdiv_s64_constant_one_rcp ; GFX10: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; GFX10: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX10: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX10: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] - ; GFX10: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; GFX10: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX10: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; GFX10: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C]] + ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) %0:_(s64) = G_FCONSTANT double 1.0 %1:_(s64) = COPY $vgpr0_vgpr1 %2:_(s64) = G_FDIV %0, %1 @@ -2359,93 +2359,93 @@ body: | ; SI-LABEL: name: test_fdiv_s64_constant_negative_one_rcp ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00 - ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; SI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; SI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]] - ; SI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; SI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] - ; SI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; SI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; SI: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; SI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64) - ; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64) - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV7]] - ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[UV5]] - ; SI: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]] - ; SI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[XOR]](s1) - ; SI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; SI: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; SI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; SI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; SI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] + ; SI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; SI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT]](s64) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INT3]](s64) + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[UV7]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UV3]](s32), [[UV5]] + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[ICMP1]] + ; SI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[XOR]](s1) + ; SI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) ; VI-LABEL: name: test_fdiv_s64_constant_negative_one_rcp ; VI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00 - ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; VI: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; VI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; VI: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; VI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]] - ; VI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; VI: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] - ; VI: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; VI: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; VI: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; VI: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; VI: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; VI: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; VI: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; VI-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; VI-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 + ; VI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; VI-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] + ; VI-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; VI-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; VI-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; VI-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) ; GFX9-LABEL: name: test_fdiv_s64_constant_negative_one_rcp ; GFX9: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00 - ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; GFX9: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX9: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX9: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] - ; GFX9: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; GFX9: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX9: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX9: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX9: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX9: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; GFX9: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; GFX9-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; GFX9-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] + ; GFX9-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX9-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX9-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; GFX9-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) ; GFX9-UNSAFE-LABEL: name: test_fdiv_s64_constant_negative_one_rcp ; GFX9-UNSAFE: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00 - ; GFX9-UNSAFE: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9-UNSAFE: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]] - ; GFX9-UNSAFE: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX9-UNSAFE: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s64) - ; GFX9-UNSAFE: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT]], [[C1]] - ; GFX9-UNSAFE: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FMA]], [[INT]], [[INT]] - ; GFX9-UNSAFE: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] - ; GFX9-UNSAFE: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] - ; GFX9-UNSAFE: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[C]], [[FMA3]] - ; GFX9-UNSAFE: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[C]] - ; GFX9-UNSAFE: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] - ; GFX9-UNSAFE: $vgpr0_vgpr1 = COPY [[FMA5]](s64) + ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX9-UNSAFE-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[COPY]] + ; GFX9-UNSAFE-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; GFX9-UNSAFE-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[COPY]](s64) + ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT]], [[C1]] + ; GFX9-UNSAFE-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[FMA]], [[INT]], [[INT]] + ; GFX9-UNSAFE-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] + ; GFX9-UNSAFE-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA2]], [[FMA1]], [[FMA1]] + ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[C]], [[FMA3]] + ; GFX9-UNSAFE-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[C]] + ; GFX9-UNSAFE-NEXT: [[FMA5:%[0-9]+]]:_(s64) = G_FMA [[FMA4]], [[FMA3]], [[FMUL]] + ; GFX9-UNSAFE-NEXT: $vgpr0_vgpr1 = COPY [[FMA5]](s64) ; GFX10-LABEL: name: test_fdiv_s64_constant_negative_one_rcp ; GFX10: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double -1.000000e+00 - ; GFX10: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX10: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 - ; GFX10: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 - ; GFX10: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] - ; GFX10: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) - ; GFX10: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]] - ; GFX10: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] - ; GFX10: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] - ; GFX10: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 - ; GFX10: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] - ; GFX10: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] - ; GFX10: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] - ; GFX10: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) - ; GFX10: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) - ; GFX10: $vgpr0_vgpr1 = COPY [[INT6]](s64) + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 + ; GFX10-NEXT: [[INT:%[0-9]+]]:_(s64), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 0 + ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INT]] + ; GFX10-NEXT: [[INT2:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), [[INT]](s64) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[INT2]], [[C1]] + ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[INT2]], [[FMA]], [[INT2]] + ; GFX10-NEXT: [[FMA2:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMA1]], [[C1]] + ; GFX10-NEXT: [[INT3:%[0-9]+]]:_(s64), [[INT4:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[C]](s64), [[COPY]](s64), 1 + ; GFX10-NEXT: [[FMA3:%[0-9]+]]:_(s64) = G_FMA [[FMA1]], [[FMA2]], [[FMA1]] + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s64) = G_FMUL [[INT3]], [[FMA3]] + ; GFX10-NEXT: [[FMA4:%[0-9]+]]:_(s64) = G_FMA [[FNEG]], [[FMUL]], [[INT3]] + ; GFX10-NEXT: [[INT5:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[FMA4]](s64), [[FMA3]](s64), [[FMUL]](s64), [[INT4]](s1) + ; GFX10-NEXT: [[INT6:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fixup), [[INT5]](s64), [[COPY]](s64), [[C]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[INT6]](s64) %0:_(s64) = G_FCONSTANT double -1.0 %1:_(s64) = COPY $vgpr0_vgpr1 %2:_(s64) = G_FDIV %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir index c60f86af5e7e..0abe9ef1cb06 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fexp.mir @@ -12,22 +12,22 @@ body: | ; GFX6-LABEL: name: test_fexp_s32 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] - ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] - ; GFX6: $vgpr0 = COPY [[FEXP2_]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] + ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX6-NEXT: $vgpr0 = COPY [[FEXP2_]](s32) ; GFX8-LABEL: name: test_fexp_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] - ; GFX8: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] - ; GFX8: $vgpr0 = COPY [[FEXP2_]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] + ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX8-NEXT: $vgpr0 = COPY [[FEXP2_]](s32) ; GFX9-LABEL: name: test_fexp_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] - ; GFX9: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] - ; GFX9: $vgpr0 = COPY [[FEXP2_]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]] + ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX9-NEXT: $vgpr0 = COPY [[FEXP2_]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FEXP %0 $vgpr0 = COPY %1 @@ -41,22 +41,22 @@ body: | ; GFX6-LABEL: name: test_fexp_s32_nnan ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] - ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[FMUL]] - ; GFX6: $vgpr0 = COPY [[FEXP2_]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] + ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[FMUL]] + ; GFX6-NEXT: $vgpr0 = COPY [[FEXP2_]](s32) ; GFX8-LABEL: name: test_fexp_s32_nnan ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] - ; GFX8: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[FMUL]] - ; GFX8: $vgpr0 = COPY [[FEXP2_]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] + ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[FMUL]] + ; GFX8-NEXT: $vgpr0 = COPY [[FEXP2_]](s32) ; GFX9-LABEL: name: test_fexp_s32_nnan ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] - ; GFX9: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[FMUL]] - ; GFX9: $vgpr0 = COPY [[FEXP2_]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = nnan G_FMUL [[COPY]], [[C]] + ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = nnan G_FEXP2 [[FMUL]] + ; GFX9-NEXT: $vgpr0 = COPY [[FEXP2_]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = nnan G_FEXP %0 $vgpr0 = COPY %1 @@ -70,34 +70,34 @@ body: | ; GFX6-LABEL: name: test_fexp_v2s32 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] - ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX6: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32) - ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] + ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] + ; GFX6-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: test_fexp_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX8: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] - ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX8: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] + ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] + ; GFX8-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_fexp_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX9: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] - ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX9: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] + ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] + ; GFX9-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_FEXP %0 $vgpr0_vgpr1 = COPY %1 @@ -111,40 +111,40 @@ body: | ; GFX6-LABEL: name: test_fexp_v3s32 ; GFX6: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] - ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX6: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] - ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] - ; GFX6: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL2]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32) - ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] + ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] + ; GFX6-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] + ; GFX6-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] + ; GFX6-NEXT: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL2]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX8-LABEL: name: test_fexp_v3s32 ; GFX8: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX8: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] - ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX8: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] - ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] - ; GFX8: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL2]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32) - ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] + ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] + ; GFX8-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] + ; GFX8-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] + ; GFX8-NEXT: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL2]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_fexp_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] - ; GFX9: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] - ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] - ; GFX9: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] - ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] - ; GFX9: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL2]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UV]], [[C]] + ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[UV1]], [[C]] + ; GFX9-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] + ; GFX9-NEXT: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[UV2]], [[C]] + ; GFX9-NEXT: [[FEXP2_2:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FEXP2_]](s32), [[FEXP2_1]](s32), [[FEXP2_2]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = G_FEXP %0 $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -158,30 +158,30 @@ body: | ; GFX6-LABEL: name: test_fexp_s16 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] - ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] - ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32) - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] + ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32) + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: test_fexp_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3DC5 - ; GFX8: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C]] - ; GFX8: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3DC5 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C]] + ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_fexp_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3DC5 - ; GFX9: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C]] - ; GFX9: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3DC5 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[TRUNC]], [[C]] + ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 %2:_(s16) = G_FEXP %1 @@ -197,47 +197,47 @@ body: | ; GFX6-LABEL: name: test_fexp_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %4(s16) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 - ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] - ; GFX6: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] - ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32) - ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %5(s16) - ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C]] - ; GFX6: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] - ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_1]](s32) - ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX6: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT %4(s16) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3FF7154760000000 + ; GFX6-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT]], [[C]] + ; GFX6-NEXT: [[FEXP2_:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_]](s32) + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT %5(s16) + ; GFX6-NEXT: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[FPEXT1]], [[C]] + ; GFX6-NEXT: [[FEXP2_1:%[0-9]+]]:_(s32) = G_FEXP2 [[FMUL1]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FEXP2_1]](s32) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX8-LABEL: name: test_fexp_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3DC5 - ; GFX8: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL %4, [[C]] - ; GFX8: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL]] - ; GFX8: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL %5, [[C]] - ; GFX8: [[FEXP2_1:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL1]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FEXP2_]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FEXP2_1]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3DC5 + ; GFX8-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL %4, [[C]] + ; GFX8-NEXT: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL]] + ; GFX8-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL %5, [[C]] + ; GFX8-NEXT: [[FEXP2_1:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FEXP2_]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FEXP2_1]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) ; GFX9-LABEL: name: test_fexp_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3DC5 - ; GFX9: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL %4, [[C]] - ; GFX9: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL]] - ; GFX9: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL %5, [[C]] - ; GFX9: [[FEXP2_1:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3DC5 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL %4, [[C]] + ; GFX9-NEXT: [[FEXP2_:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL]] + ; GFX9-NEXT: [[FMUL1:%[0-9]+]]:_(s16) = G_FMUL %5, [[C]] + ; GFX9-NEXT: [[FEXP2_1:%[0-9]+]]:_(s16) = G_FEXP2 [[FMUL1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FEXP2_1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = G_FEXP %1 $vgpr0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir index 413df7a3ed3d..8ab87e06338b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ffloor.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_ffloor_s32 @@ -12,16 +12,16 @@ body: | ; SI-LABEL: name: test_ffloor_s32 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[COPY]] - ; SI: $vgpr0 = COPY [[FFLOOR]](s32) + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[COPY]] + ; SI-NEXT: $vgpr0 = COPY [[FFLOOR]](s32) ; VI-LABEL: name: test_ffloor_s32 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[COPY]] - ; VI: $vgpr0 = COPY [[FFLOOR]](s32) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[COPY]] + ; VI-NEXT: $vgpr0 = COPY [[FFLOOR]](s32) ; GFX9-LABEL: name: test_ffloor_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[COPY]] - ; GFX9: $vgpr0 = COPY [[FFLOOR]](s32) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[COPY]] + ; GFX9-NEXT: $vgpr0 = COPY [[FFLOOR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FFLOOR %0 $vgpr0 = COPY %1 @@ -35,22 +35,22 @@ body: | ; SI-LABEL: name: test_ffloor_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[COPY]](s64) - ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT]], [[C]] - ; SI: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[COPY]](s64), [[COPY]] - ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[COPY]], [[FMINNUM_IEEE]] - ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT]] - ; SI: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] - ; SI: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; SI-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[COPY]](s64) + ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT]], [[C]] + ; SI-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[COPY]](s64), [[COPY]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[COPY]], [[FMINNUM_IEEE]] + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT]] + ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) ; VI-LABEL: name: test_ffloor_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[COPY]] - ; VI: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[COPY]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) ; GFX9-LABEL: name: test_ffloor_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[COPY]] - ; GFX9: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[COPY]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_FFLOOR %0 $vgpr0_vgpr1 = COPY %1 @@ -65,20 +65,20 @@ body: | ; SI-LABEL: name: test_ffloor_s64_nnan ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[INT:%[0-9]+]]:_(s64) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[COPY]](s64) - ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = nnan G_FMINNUM_IEEE [[INT]], [[C]] - ; SI: [[FNEG:%[0-9]+]]:_(s64) = nnan G_FNEG [[FMINNUM_IEEE]] - ; SI: [[FADD:%[0-9]+]]:_(s64) = nnan G_FADD [[COPY]], [[FNEG]] - ; SI: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; SI-NEXT: [[INT:%[0-9]+]]:_(s64) = nnan G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[COPY]](s64) + ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = nnan G_FMINNUM_IEEE [[INT]], [[C]] + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = nnan G_FNEG [[FMINNUM_IEEE]] + ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = nnan G_FADD [[COPY]], [[FNEG]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) ; VI-LABEL: name: test_ffloor_s64_nnan ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[FFLOOR:%[0-9]+]]:_(s64) = nnan G_FFLOOR [[COPY]] - ; VI: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = nnan G_FFLOOR [[COPY]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) ; GFX9-LABEL: name: test_ffloor_s64_nnan ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[FFLOOR:%[0-9]+]]:_(s64) = nnan G_FFLOOR [[COPY]] - ; GFX9: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = nnan G_FFLOOR [[COPY]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = nnan G_FFLOOR %0 $vgpr0_vgpr1 = COPY %1 @@ -93,22 +93,22 @@ body: | ; SI-LABEL: name: test_ffloor_s64_nssaz ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[INT:%[0-9]+]]:_(s64) = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[COPY]](s64) - ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = nsz G_FMINNUM_IEEE [[INT]], [[C]] - ; SI: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(ord), [[COPY]](s64), [[COPY]] - ; SI: [[SELECT:%[0-9]+]]:_(s64) = nsz G_SELECT [[FCMP]](s1), [[COPY]], [[FMINNUM_IEEE]] - ; SI: [[FNEG:%[0-9]+]]:_(s64) = nsz G_FNEG [[SELECT]] - ; SI: [[FADD:%[0-9]+]]:_(s64) = nsz G_FADD [[COPY]], [[FNEG]] - ; SI: $vgpr0_vgpr1 = COPY [[FADD]](s64) + ; SI-NEXT: [[INT:%[0-9]+]]:_(s64) = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[COPY]](s64) + ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = nsz G_FMINNUM_IEEE [[INT]], [[C]] + ; SI-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(ord), [[COPY]](s64), [[COPY]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = nsz G_SELECT [[FCMP]](s1), [[COPY]], [[FMINNUM_IEEE]] + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = nsz G_FNEG [[SELECT]] + ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = nsz G_FADD [[COPY]], [[FNEG]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FADD]](s64) ; VI-LABEL: name: test_ffloor_s64_nssaz ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[FFLOOR:%[0-9]+]]:_(s64) = nsz G_FFLOOR [[COPY]] - ; VI: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = nsz G_FFLOOR [[COPY]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) ; GFX9-LABEL: name: test_ffloor_s64_nssaz ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[FFLOOR:%[0-9]+]]:_(s64) = nsz G_FFLOOR [[COPY]] - ; GFX9: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = nsz G_FFLOOR [[COPY]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FFLOOR]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = nsz G_FFLOOR %0 $vgpr0_vgpr1 = COPY %1 @@ -123,24 +123,24 @@ body: | ; SI-LABEL: name: test_ffloor_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_ffloor_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_ffloor_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 %2:_(s16) = G_FFLOOR %1 @@ -156,25 +156,25 @@ body: | ; SI-LABEL: name: test_ffloor_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] - ; SI: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] + ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_ffloor_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] - ; VI: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] + ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_ffloor_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] - ; GFX9: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] + ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_FFLOOR %0 $vgpr0_vgpr1 = COPY %1 @@ -188,28 +188,28 @@ body: | ; SI-LABEL: name: test_ffloor_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] - ; SI: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] - ; SI: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[UV2]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32), [[FFLOOR2]](s32) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] + ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] + ; SI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[UV2]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32), [[FFLOOR2]](s32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_ffloor_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] - ; VI: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] - ; VI: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[UV2]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32), [[FFLOOR2]](s32) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] + ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] + ; VI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[UV2]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32), [[FFLOOR2]](s32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_ffloor_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] - ; GFX9: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] - ; GFX9: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[UV2]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32), [[FFLOOR2]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[UV]] + ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[UV1]] + ; GFX9-NEXT: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[UV2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FFLOOR]](s32), [[FFLOOR1]](s32), [[FFLOOR2]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = G_FFLOOR %0 $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -223,36 +223,36 @@ body: | ; SI-LABEL: name: test_ffloor_v2s64 ; SI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[UV]](s64) - ; SI: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT]], [[C]] - ; SI: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[UV]](s64), [[UV]] - ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[UV]], [[FMINNUM_IEEE]] - ; SI: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT]] - ; SI: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]] - ; SI: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[UV1]](s64) - ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT1]], [[C]] - ; SI: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[UV1]](s64), [[UV1]] - ; SI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[UV1]], [[FMINNUM_IEEE1]] - ; SI: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[SELECT1]] - ; SI: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; SI-NEXT: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[UV]](s64) + ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0x3FEFFFFFFFFFFFFF + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT]], [[C]] + ; SI-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[UV]](s64), [[UV]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[UV]], [[FMINNUM_IEEE]] + ; SI-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT]] + ; SI-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]] + ; SI-NEXT: [[INT1:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.fract), [[UV1]](s64) + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[INT1]], [[C]] + ; SI-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ord), [[UV1]](s64), [[UV1]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[UV1]], [[FMINNUM_IEEE1]] + ; SI-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[SELECT1]] + ; SI-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD]](s64), [[FADD1]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_ffloor_v2s64 ; VI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[UV]] - ; VI: [[FFLOOR1:%[0-9]+]]:_(s64) = G_FFLOOR [[UV1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FFLOOR]](s64), [[FFLOOR1]](s64) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[UV]] + ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s64) = G_FFLOOR [[UV1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FFLOOR]](s64), [[FFLOOR1]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_ffloor_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[UV]] - ; GFX9: [[FFLOOR1:%[0-9]+]]:_(s64) = G_FFLOOR [[UV1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FFLOOR]](s64), [[FFLOOR1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s64) = G_FFLOOR [[UV]] + ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(s64) = G_FFLOOR [[UV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FFLOOR]](s64), [[FFLOOR1]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = G_FFLOOR %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -266,51 +266,51 @@ body: | ; SI-LABEL: name: test_ffloor_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT1]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR1]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR1]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-LABEL: name: test_ffloor_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; VI: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] + ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: test_ffloor_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; GFX9: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] + ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = G_FFLOOR %0 $vgpr0 = COPY %1 @@ -323,64 +323,64 @@ body: | ; SI-LABEL: name: test_ffloor_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT1]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR1]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT2]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR1]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR2]](s32) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_ffloor_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; VI: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] - ; VI: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] + ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] + ; VI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_ffloor_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; GFX9: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] - ; GFX9: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] + ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] + ; GFX9-NEXT: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FFLOOR %0 %2:_(<3 x s32>) = G_ANYEXT %1 @@ -395,92 +395,92 @@ body: | ; SI-LABEL: name: test_ffloor_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT1]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR1]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT2]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR2]](s32) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FFLOOR3:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT3]] - ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR3]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FFLOOR:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR]](s32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR1]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR2]](s32) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FFLOOR3:%[0-9]+]]:_(s32) = G_FFLOOR [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FFLOOR3]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_ffloor_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; VI: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] - ; VI: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] - ; VI: [[FFLOOR3:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC3]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR2]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR3]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] + ; VI-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] + ; VI-NEXT: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] + ; VI-NEXT: [[FFLOOR3:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR2]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FFLOOR3]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_ffloor_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] - ; GFX9: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] - ; GFX9: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] - ; GFX9: [[FFLOOR3:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC3]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) - ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[FFLOOR:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC]] + ; GFX9-NEXT: [[FFLOOR1:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC1]] + ; GFX9-NEXT: [[FFLOOR2:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC2]] + ; GFX9-NEXT: [[FFLOOR3:%[0-9]+]]:_(s16) = G_FFLOOR [[TRUNC3]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR2]](s16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FFLOOR3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_FFLOOR %0 $vgpr0_vgpr1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir index e28dc5e9b1e6..bcb249055a24 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fma_s32 @@ -12,22 +12,22 @@ body: | ; SI-LABEL: name: test_fma_s32 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; SI: $vgpr0 = COPY [[FMA]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; SI-NEXT: $vgpr0 = COPY [[FMA]](s32) ; VI-LABEL: name: test_fma_s32 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; VI: $vgpr0 = COPY [[FMA]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; VI-NEXT: $vgpr0 = COPY [[FMA]](s32) ; GFX9-LABEL: name: test_fma_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -42,22 +42,22 @@ body: | ; SI-LABEL: name: test_fma_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; SI: $vgpr0_vgpr1 = COPY [[FMA]](s64) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FMA]](s64) ; VI-LABEL: name: test_fma_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; VI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; VI: $vgpr0_vgpr1 = COPY [[FMA]](s64) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FMA]](s64) ; GFX9-LABEL: name: test_fma_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; GFX9: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9: $vgpr0_vgpr1 = COPY [[FMA]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FMA]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = COPY $vgpr4_vgpr5 @@ -73,38 +73,38 @@ body: | ; SI-LABEL: name: test_fma_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_fma_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_fma_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr1 @@ -125,37 +125,37 @@ body: | ; SI-LABEL: name: test_fma_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV3]], [[UV5]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV3]], [[UV5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_fma_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV3]], [[UV5]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV3]], [[UV5]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_fma_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV3]], [[UV5]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV3]], [[UV5]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 @@ -171,40 +171,40 @@ body: | ; SI-LABEL: name: test_fma_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV3]], [[UV6]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV4]], [[UV7]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV5]], [[UV8]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV3]], [[UV6]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV4]], [[UV7]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV5]], [[UV8]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_fma_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV3]], [[UV6]] - ; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV4]], [[UV7]] - ; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV5]], [[UV8]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV3]], [[UV6]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV4]], [[UV7]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV5]], [[UV8]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_fma_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV3]], [[UV6]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV4]], [[UV7]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV5]], [[UV8]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV3]], [[UV6]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV4]], [[UV7]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV5]], [[UV8]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 @@ -220,43 +220,43 @@ body: | ; SI-LABEL: name: test_fma_v4s32 ; SI: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; SI: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; SI: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV4]], [[UV8]] - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV5]], [[UV9]] - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV6]], [[UV10]] - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV3]], [[UV7]], [[UV11]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32), [[FMA3]](s32) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; SI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV4]], [[UV8]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV5]], [[UV9]] + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV6]], [[UV10]] + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV3]], [[UV7]], [[UV11]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32), [[FMA3]](s32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; VI-LABEL: name: test_fma_v4s32 ; VI: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; VI: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; VI: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; VI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV4]], [[UV8]] - ; VI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV5]], [[UV9]] - ; VI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV6]], [[UV10]] - ; VI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV3]], [[UV7]], [[UV11]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32), [[FMA3]](s32) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; VI-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV4]], [[UV8]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV5]], [[UV9]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV6]], [[UV10]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV3]], [[UV7]], [[UV11]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32), [[FMA3]](s32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) ; GFX9-LABEL: name: test_fma_v4s32 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) - ; GFX9: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV4]], [[UV8]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV5]], [[UV9]] - ; GFX9: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV6]], [[UV10]] - ; GFX9: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV3]], [[UV7]], [[UV11]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32), [[FMA3]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<4 x s32>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[UV]], [[UV4]], [[UV8]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[UV1]], [[UV5]], [[UV9]] + ; GFX9-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[UV2]], [[UV6]], [[UV10]] + ; GFX9-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[UV3]], [[UV7]], [[UV11]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[FMA]](s32), [[FMA1]](s32), [[FMA2]](s32), [[FMA3]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<4 x s32>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 @@ -272,37 +272,37 @@ body: | ; SI-LABEL: name: test_fma_v2s64 ; SI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; SI: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; SI: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) - ; SI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; SI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[UV1]], [[UV3]], [[UV5]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA]](s64), [[FMA1]](s64) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[UV1]], [[UV3]], [[UV5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA]](s64), [[FMA1]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_fma_v2s64 ; VI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; VI: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; VI: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) - ; VI: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; VI: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[UV1]], [[UV3]], [[UV5]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA]](s64), [[FMA1]](s64) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[UV1]], [[UV3]], [[UV5]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA]](s64), [[FMA1]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_fma_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) - ; GFX9: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; GFX9: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[UV1]], [[UV3]], [[UV5]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA]](s64), [[FMA1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY2]](<2 x s64>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(s64) = G_FMA [[UV1]], [[UV3]], [[UV5]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FMA]](s64), [[FMA1]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = COPY $vgpr8_vgpr9_vgpr10_vgpr11 @@ -318,68 +318,68 @@ body: | ; SI-LABEL: name: test_fma_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) ; VI-LABEL: name: test_fma_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC2]], [[TRUNC4]] - ; VI: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC3]], [[TRUNC5]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC2]], [[TRUNC4]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC3]], [[TRUNC5]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) ; GFX9-LABEL: name: test_fma_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9: $vgpr0 = COPY [[FMA]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 @@ -395,159 +395,159 @@ body: | ; SI-LABEL: name: test_fma_v3s16 ; SI: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI: [[COPY2:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr6_vgpr7_vgpr8 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<6 x s16>) - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32) - ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr6_vgpr7_vgpr8 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<6 x s16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32) + ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; SI-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_fma_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI: [[COPY2:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr6_vgpr7_vgpr8 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<6 x s16>) - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; VI: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC3]], [[TRUNC6]] - ; VI: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC4]], [[TRUNC7]] - ; VI: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC5]], [[TRUNC8]] - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMA2]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr6_vgpr7_vgpr8 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<6 x s16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC3]], [[TRUNC6]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC4]], [[TRUNC7]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC5]], [[TRUNC8]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMA2]](s16) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_fma_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9: [[COPY2:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr6_vgpr7_vgpr8 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) - ; GFX9: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32) - ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<6 x s16>) - ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) - ; GFX9: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]], [[BUILD_VECTOR_TRUNC4]] - ; GFX9: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC5]] - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[FMA]](<2 x s16>) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[FMA1]](<2 x s16>) - ; GFX9: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; GFX9: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[LSHR3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST7]](s32), [[BITCAST8]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST9]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>), [[BUILD_VECTOR_TRUNC8]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr6_vgpr7_vgpr8 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]], [[BUILD_VECTOR_TRUNC4]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC5]] + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[FMA]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[FMA1]](<2 x s16>) + ; GFX9-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST7]](s32), [[BITCAST8]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST9]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC6]](<2 x s16>), [[BUILD_VECTOR_TRUNC7]](<2 x s16>), [[BUILD_VECTOR_TRUNC8]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<6 x s16>) = COPY $vgpr6_vgpr7_vgpr8 @@ -569,127 +569,127 @@ body: | ; SI-LABEL: name: test_fma_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) - ; SI: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC9]](s16) - ; SI: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32) - ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; SI: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC10]](s16) - ; SI: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32) - ; SI: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; SI: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) - ; SI: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FPEXT9]], [[FPEXT10]], [[FPEXT11]] - ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA3]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) + ; SI-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC9]](s16) + ; SI-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32) + ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; SI-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC10]](s16) + ; SI-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32) + ; SI-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; SI-NEXT: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) + ; SI-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FPEXT9]], [[FPEXT10]], [[FPEXT11]] + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA3]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fma_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; VI: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC4]], [[TRUNC8]] - ; VI: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC5]], [[TRUNC9]] - ; VI: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC6]], [[TRUNC10]] - ; VI: [[FMA3:%[0-9]+]]:_(s16) = G_FMA [[TRUNC3]], [[TRUNC7]], [[TRUNC11]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMA2]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMA3]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; VI-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC4]], [[TRUNC8]] + ; VI-NEXT: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC5]], [[TRUNC9]] + ; VI-NEXT: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC6]], [[TRUNC10]] + ; VI-NEXT: [[FMA3:%[0-9]+]]:_(s16) = G_FMA [[TRUNC3]], [[TRUNC7]], [[TRUNC11]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMA2]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMA3]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fma_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GFX9: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; GFX9: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[UV1]], [[UV3]], [[UV5]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMA]](<2 x s16>), [[FMA1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[UV1]], [[UV3]], [[UV5]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMA]](<2 x s16>), [[FMA1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = COPY $vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir index 0aa015803cd9..3edb265c710d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir @@ -15,25 +15,25 @@ body: | ; SI-LABEL: name: test_fmaxnum_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; VI-LABEL: name: test_fmaxnum_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fmaxnum_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FMAXNUM %0, %1 @@ -51,19 +51,19 @@ body: | ; SI-LABEL: name: test_fmaxnum_s32_ieee_mode_off ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]] - ; SI: $vgpr0 = COPY [[FMAXNUM]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32) ; VI-LABEL: name: test_fmaxnum_s32_ieee_mode_off ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]] - ; VI: $vgpr0 = COPY [[FMAXNUM]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32) ; GFX9-LABEL: name: test_fmaxnum_s32_ieee_mode_off ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FMAXNUM %0, %1 @@ -78,19 +78,19 @@ body: | ; SI-LABEL: name: test_fmaxnum_s32_nnan ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] - ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; VI-LABEL: name: test_fmaxnum_s32_nnan ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] - ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fmaxnum_s32_nnan ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = nnan G_FMAXNUM %0, %1 @@ -106,22 +106,22 @@ body: | ; SI-LABEL: name: test_fmaxnum_s32_nnan_lhs ; SI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]] - ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]] + ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; VI-LABEL: name: test_fmaxnum_s32_nnan_lhs ; VI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]] - ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]] + ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fmaxnum_s32_nnan_lhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) %0:_(s32) = nnan COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FMAXNUM %0, %1 @@ -137,22 +137,22 @@ body: | ; SI-LABEL: name: test_fmaxnum_s32_nnan_rhs ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; VI-LABEL: name: test_fmaxnum_s32_nnan_rhs ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fmaxnum_s32_nnan_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = nnan COPY $vgpr1 %2:_(s32) = G_FMAXNUM %0, %1 @@ -167,19 +167,19 @@ body: | ; SI-LABEL: name: test_fmaxnum_s32_nnan_lhs_rhs ; SI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]] - ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; VI-LABEL: name: test_fmaxnum_s32_nnan_lhs_rhs ; VI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]] - ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fmaxnum_s32_nnan_lhs_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) %0:_(s32) = nnan COPY $vgpr0 %1:_(s32) = nnan COPY $vgpr1 %2:_(s32) = G_FMAXNUM %0, %1 @@ -194,25 +194,25 @@ body: | ; SI-LABEL: name: test_fmaxnum_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64) ; VI-LABEL: name: test_fmaxnum_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64) ; GFX9-LABEL: name: test_fmaxnum_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_FMAXNUM %0, %1 @@ -227,35 +227,35 @@ body: | ; SI-LABEL: name: test_fmaxnum_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_fmaxnum_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMAXNUM_IEEE]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMAXNUM_IEEE]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_fmaxnum_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMAXNUM_IEEE]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMAXNUM_IEEE]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -273,43 +273,43 @@ body: | ; SI-LABEL: name: test_fmaxnum_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; SI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] - ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] + ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] + ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_fmaxnum_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] - ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] + ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_fmaxnum_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] - ; GFX9: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] + ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_FMAXNUM %0, %1 @@ -324,61 +324,61 @@ body: | ; SI-LABEL: name: test_fmaxnum_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_fmaxnum_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] + ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_fmaxnum_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_FMAXNUM %0, %1 @@ -392,137 +392,137 @@ body: | liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-LABEL: name: test_fmaxnum_v3s16 ; SI: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE2]](s32) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE2]](s32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_fmaxnum_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] - ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] - ; VI: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] + ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] + ; VI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] + ; VI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_fmaxnum_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV4]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV6]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV5]] - ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV7]] - ; GFX9: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x s16>) - ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 + ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV4]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV6]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV5]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV7]] + ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x s16>) + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -542,113 +542,113 @@ body: | ; SI-LABEL: name: test_fmaxnum_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; SI: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE2]](s32) - ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; SI: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT6]], [[FPEXT7]] - ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE3]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE2]](s32) + ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT6]], [[FPEXT7]] + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE3]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fmaxnum_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] - ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC6]] - ; VI: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]] - ; VI: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE3]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] + ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] + ; VI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC6]] + ; VI-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] + ; VI-NEXT: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] + ; VI-NEXT: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]] + ; VI-NEXT: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE3]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fmaxnum_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV2]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV1]] - ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]] - ; GFX9: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMAXNUM_IEEE]](<2 x s16>), [[FMAXNUM_IEEE1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV2]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]] + ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMAXNUM_IEEE]](<2 x s16>), [[FMAXNUM_IEEE1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_FMAXNUM %0, %1 @@ -666,37 +666,37 @@ body: | ; SI-LABEL: name: test_fmaxnum_with_fmaxnum_argument_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; SI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] + ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) ; VI-LABEL: name: test_fmaxnum_with_fmaxnum_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) ; GFX9-LABEL: name: test_fmaxnum_with_fmaxnum_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; GFX9: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FMAXNUM %0, %1 @@ -716,31 +716,31 @@ body: | ; SI-LABEL: name: test_fmaxnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] - ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] + ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) ; VI-LABEL: name: test_fmaxnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] - ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] + ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) ; GFX9-LABEL: name: test_fmaxnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FCONSTANT float 0.000000e+00 %2:_(s32) = G_FMAXNUM %0, %1 @@ -760,37 +760,37 @@ body: | ; SI-LABEL: name: test_fmaxnum_with_fminnum_argument_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; SI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] + ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; VI-LABEL: name: test_fmaxnum_with_fminnum_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fmaxnum_with_fminnum_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FMINNUM %0, %1 @@ -810,31 +810,31 @@ body: | ; SI-LABEL: name: test_fmaxnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] - ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] + ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; VI-LABEL: name: test_fmaxnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] - ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] + ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fmaxnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FCONSTANT float 0.000000e+00 %2:_(s32) = G_FMINNUM %0, %1 @@ -854,22 +854,22 @@ body: | ; SI-LABEL: name: test_fmaxnum_with_constant_argument_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; SI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; VI-LABEL: name: test_fmaxnum_with_constant_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; VI-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fmaxnum_with_constant_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FCONSTANT float 0.000000e+00 %2:_(s32) = G_FMAXNUM %0, %1 @@ -887,54 +887,54 @@ body: | ; SI-LABEL: name: test_fmaxnum_with_constant_vector_argument_v2s16_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; SI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-LABEL: name: test_fmaxnum_with_constant_vector_argument_v2s16_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[C]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[C]] - ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[C]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[C]] + ; VI-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: test_fmaxnum_with_constant_vector_argument_v2s16_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT]](s32) - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[BUILD_VECTOR_TRUNC]] - ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT]](s32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(s16) = G_FCONSTANT half 0xH0000 %2:_(<2 x s16>) = G_BUILD_VECTOR %1(s16), %1(s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir index 9b6b39b01c38..097b096f82b7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir @@ -15,25 +15,25 @@ body: | ; SI-LABEL: name: test_fminnum_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; VI-LABEL: name: test_fminnum_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fminnum_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FMINNUM %0, %1 @@ -51,19 +51,19 @@ body: | ; SI-LABEL: name: test_fminnum_s32_ieee_mode_off ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]] - ; SI: $vgpr0 = COPY [[FMINNUM]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[FMINNUM]](s32) ; VI-LABEL: name: test_fminnum_s32_ieee_mode_off ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]] - ; VI: $vgpr0 = COPY [[FMINNUM]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[FMINNUM]](s32) ; GFX9-LABEL: name: test_fminnum_s32_ieee_mode_off ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[FMINNUM]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FMINNUM %0, %1 @@ -78,19 +78,19 @@ body: | ; SI-LABEL: name: test_fminnum_s32_nnan ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; VI-LABEL: name: test_fminnum_s32_nnan ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fminnum_s32_nnan ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = nnan G_FMINNUM %0, %1 @@ -106,22 +106,22 @@ body: | ; SI-LABEL: name: test_fminnum_s32_nnan_lhs ; SI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]] + ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; VI-LABEL: name: test_fminnum_s32_nnan_lhs ; VI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]] + ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fminnum_s32_nnan_lhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) %0:_(s32) = nnan COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FMINNUM %0, %1 @@ -137,22 +137,22 @@ body: | ; SI-LABEL: name: test_fminnum_s32_nnan_rhs ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; VI-LABEL: name: test_fminnum_s32_nnan_rhs ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fminnum_s32_nnan_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = nnan COPY $vgpr1 %2:_(s32) = G_FMINNUM %0, %1 @@ -167,19 +167,19 @@ body: | ; SI-LABEL: name: test_fminnum_s32_nnan_lhs_rhs ; SI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; VI-LABEL: name: test_fminnum_s32_nnan_lhs_rhs ; VI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fminnum_s32_nnan_lhs_rhs ; GFX9: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1 + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) %0:_(s32) = nnan COPY $vgpr0 %1:_(s32) = nnan COPY $vgpr1 %2:_(s32) = G_FMINNUM %0, %1 @@ -194,25 +194,25 @@ body: | ; SI-LABEL: name: test_fminnum_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) ; VI-LABEL: name: test_fminnum_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) ; GFX9-LABEL: name: test_fminnum_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_FMINNUM %0, %1 @@ -227,35 +227,35 @@ body: | ; SI-LABEL: name: test_fminnum_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_fminnum_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_fminnum_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -273,43 +273,43 @@ body: | ; SI-LABEL: name: test_fminnum_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; SI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] - ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] + ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_fminnum_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] - ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] + ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_fminnum_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] - ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] - ; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]] + ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_FMINNUM %0, %1 @@ -324,61 +324,61 @@ body: | ; SI-LABEL: name: test_fminnum_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_fminnum_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] + ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_fminnum_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_FMINNUM %0, %1 @@ -392,137 +392,137 @@ body: | liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; SI-LABEL: name: test_fminnum_v3s16 ; SI: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32) - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; SI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_fminnum_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] - ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] - ; VI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE2]](s16) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] + ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] + ; VI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] + ; VI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE2]](s16) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_fminnum_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV4]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV6]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV5]] - ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV7]] - ; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE1]](<2 x s16>) - ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 + ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV4]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV6]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV5]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV7]] + ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE1]](<2 x s16>) + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -542,113 +542,113 @@ body: | ; SI-LABEL: name: test_fminnum_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) - ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) - ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) - ; SI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32) - ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) - ; SI: [[FMINNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT6]], [[FPEXT7]] - ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE3]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) + ; SI-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; SI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32) + ; SI-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; SI-NEXT: [[FMINNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT6]], [[FPEXT7]] + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE3]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fminnum_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] - ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] - ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC6]] - ; VI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; VI: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] - ; VI: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]] - ; VI: [[FMINNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE2]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE3]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] + ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] + ; VI-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC6]] + ; VI-NEXT: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] + ; VI-NEXT: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] + ; VI-NEXT: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]] + ; VI-NEXT: [[FMINNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE2]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE3]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fminnum_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV2]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV1]] - ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]] - ; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMINNUM_IEEE]](<2 x s16>), [[FMINNUM_IEEE1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV2]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]] + ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMINNUM_IEEE]](<2 x s16>), [[FMINNUM_IEEE1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_FMINNUM %0, %1 @@ -666,37 +666,37 @@ body: | ; SI-LABEL: name: test_fminnum_with_fminnum_argument_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; SI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] + ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) ; VI-LABEL: name: test_fminnum_with_fminnum_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) ; GFX9-LABEL: name: test_fminnum_with_fminnum_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] - ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMINNUM_IEEE]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FMINNUM %0, %1 @@ -716,31 +716,31 @@ body: | ; SI-LABEL: name: test_fminnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] + ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) ; VI-LABEL: name: test_fminnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] + ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) ; GFX9-LABEL: name: test_fminnum_with_nonNaN_fminnum_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMINNUM_IEEE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FCONSTANT float 0.000000e+00 %2:_(s32) = G_FMINNUM %0, %1 @@ -760,37 +760,37 @@ body: | ; SI-LABEL: name: test_fminnum_with_fmaxnum_argument_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; SI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] + ; SI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; VI-LABEL: name: test_fminnum_with_fmaxnum_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fminnum_with_fmaxnum_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] - ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[FMAXNUM_IEEE]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY2]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_FMAXNUM %0, %1 @@ -810,31 +810,31 @@ body: | ; SI-LABEL: name: test_fminnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] + ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; VI-LABEL: name: test_fminnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] + ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fminnum_with_nonNaN_fmaxnum_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[C]] + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FCONSTANT float 0.000000e+00 %2:_(s32) = G_FMAXNUM %0, %1 @@ -854,22 +854,22 @@ body: | ; SI-LABEL: name: test_fminnum_with_constant_argument_s32_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; SI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; SI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; VI-LABEL: name: test_fminnum_with_constant_argument_s32_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; VI-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) ; GFX9-LABEL: name: test_fminnum_with_constant_argument_s32_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[C]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FCONSTANT float 0.000000e+00 %2:_(s32) = G_FMINNUM %0, %1 @@ -887,54 +887,54 @@ body: | ; SI-LABEL: name: test_fminnum_with_constant_vector_argument_v2s16_ieee_mode_on ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) - ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; SI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[C]](s16) + ; SI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-LABEL: name: test_fminnum_with_constant_vector_argument_v2s16_ieee_mode_on ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] - ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[C]] - ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] - ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[C]] - ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; VI-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[C]] + ; VI-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; VI-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; VI-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[C]] + ; VI-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: test_fminnum_with_constant_vector_argument_v2s16_ieee_mode_on ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT]](s32) - ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] - ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[BUILD_VECTOR_TRUNC]] - ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT]](s32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(s16) = G_FCONSTANT half 0xH0000 %2:_(<2 x s16>) = G_BUILD_VECTOR %1(s16), %1(s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir index cdf9fab798c8..e8662438fb50 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fneg.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fneg_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir index 2143bdcc85a3..bcd4627aadd7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fpext.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s --- name: test_fpext_f16_to_f32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir index d4ec463bd34f..d3d2e0294f7c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fshl_s32_s32 @@ -11,37 +11,37 @@ body: | ; SI-LABEL: name: test_fshl_s32_s32 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[C]](s32) - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; SI: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; SI: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) - ; SI: $vgpr0 = COPY [[FSHR1]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] + ; SI-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[FSHR1]](s32) ; VI-LABEL: name: test_fshl_s32_s32 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[C]](s32) - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; VI: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; VI: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) - ; VI: $vgpr0 = COPY [[FSHR1]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; VI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] + ; VI-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[FSHR1]](s32) ; GFX9-LABEL: name: test_fshl_s32_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[C]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; GFX9: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) - ; GFX9: $vgpr0 = COPY [[FSHR1]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] + ; GFX9-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[FSHR1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -57,61 +57,61 @@ body: | ; SI-LABEL: name: test_fshl_v2s32_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[C]](s32) - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; SI: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV4]], [[C1]] - ; SI: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) - ; SI: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[C]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; SI: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV5]], [[C1]] - ; SI: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[LSHR1]], [[FSHR2]], [[XOR1]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR1]](s32), [[FSHR3]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[C]](s32) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV4]], [[C1]] + ; SI-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) + ; SI-NEXT: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[C]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV5]], [[C1]] + ; SI-NEXT: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[LSHR1]], [[FSHR2]], [[XOR1]](s32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR1]](s32), [[FSHR3]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_fshl_v2s32_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[C]](s32) - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; VI: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV4]], [[C1]] - ; VI: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) - ; VI: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[C]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; VI: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV5]], [[C1]] - ; VI: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[LSHR1]], [[FSHR2]], [[XOR1]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR1]](s32), [[FSHR3]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; VI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[C]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV4]], [[C1]] + ; VI-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) + ; VI-NEXT: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[C]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV5]], [[C1]] + ; VI-NEXT: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[LSHR1]], [[FSHR2]], [[XOR1]](s32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR1]](s32), [[FSHR3]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_fshl_v2s32_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[C]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV4]], [[C1]] - ; GFX9: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) - ; GFX9: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[C]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV5]], [[C1]] - ; GFX9: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[LSHR1]], [[FSHR2]], [[XOR1]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR1]](s32), [[FSHR3]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV4]], [[C1]] + ; GFX9-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[LSHR]], [[FSHR]], [[XOR]](s32) + ; GFX9-NEXT: [[FSHR2:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[C]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV5]], [[C1]] + ; GFX9-NEXT: [[FSHR3:%[0-9]+]]:_(s32) = G_FSHR [[LSHR1]], [[FSHR2]], [[XOR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR1]](s32), [[FSHR3]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 @@ -127,66 +127,66 @@ body: | ; SI-LABEL: name: test_fshl_s16_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; SI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[ZEXT]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C2]](s32) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC2]] - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[ZEXT]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C2]](s32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC2]] + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_fshl_s16_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C1]] - ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] - ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; VI: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[LSHR]], [[AND1]](s16) - ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR1]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[LSHR]], [[AND1]](s16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_fshl_s16_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C1]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[LSHR]], [[AND1]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C2]](s16) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[LSHR]], [[AND1]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -206,115 +206,115 @@ body: | ; SI-LABEL: name: test_fshl_v2s16_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C2]] - ; SI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C4]] - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s32) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] - ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; SI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] - ; SI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] - ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]] - ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC4]], [[TRUNC5]] - ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL2]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C4]] + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]] + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC4]], [[TRUNC5]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL2]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) ; VI-LABEL: name: test_fshl_v2s16_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] - ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C2]] - ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] - ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; VI: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C3]](s16) - ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[LSHR3]], [[AND1]](s16) - ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR4]] - ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] - ; VI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C2]] - ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) - ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C3]](s16) - ; VI: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[LSHR5]], [[AND3]](s16) - ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR6]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; VI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C3]](s16) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[LSHR3]], [[AND1]](s16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR4]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C2]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C3]](s16) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[LSHR5]], [[AND3]](s16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR6]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) ; GFX9-LABEL: name: test_fshl_v2s16_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[C]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY2]], [[BUILD_VECTOR_TRUNC]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[C1]](s32) - ; GFX9: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BUILD_VECTOR_TRUNC1]] - ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC]] - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[C2]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[AND]](<2 x s16>) - ; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[COPY1]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR]], [[AND1]](<2 x s16>) - ; GFX9: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL]], [[LSHR1]] - ; GFX9: $vgpr0 = COPY [[OR]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[C]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY2]], [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[C1]](s32) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BUILD_VECTOR_TRUNC1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[C2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[AND]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[COPY1]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR]], [[AND1]](<2 x s16>) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL]], [[LSHR1]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 @@ -330,55 +330,55 @@ body: | ; SI-LABEL: name: test_fshl_s64_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; SI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] - ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; SI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C2]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; SI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[LSHR]], [[TRUNC1]](s32) - ; SI: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR1]] - ; SI: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C2]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[LSHR]], [[TRUNC1]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR1]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) ; VI-LABEL: name: test_fshl_s64_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; VI: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] - ; VI: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] - ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; VI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C2]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; VI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[LSHR]], [[TRUNC1]](s32) - ; VI: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR1]] - ; VI: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C2]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[LSHR]], [[TRUNC1]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR1]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) ; GFX9-LABEL: name: test_fshl_s64_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] - ; GFX9: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[LSHR]], [[TRUNC1]](s32) - ; GFX9: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR1]] - ; GFX9: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[C2]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[LSHR]], [[TRUNC1]](s32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL]], [[LSHR1]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = COPY $vgpr4_vgpr5 @@ -394,83 +394,83 @@ body: | ; SI-LABEL: name: test_fshl_s8_s8 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; SI: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND2]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY3]](s32) - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] - ; SI: $vgpr0 = COPY [[OR]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND2]](s32) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY3]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] + ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) ; VI-LABEL: name: test_fshl_s8_s8 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; VI: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; VI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[TRUNC3]](s16) - ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] - ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C4]] - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND4]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND5]], [[TRUNC4]](s16) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] - ; VI: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[TRUNC3]](s16) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C4]] + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND4]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND5]], [[TRUNC4]](s16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) ; GFX9-LABEL: name: test_fshl_s8_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[TRUNC3]](s16) - ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] - ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C4]] - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND4]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND5]], [[TRUNC4]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] - ; GFX9: $vgpr0 = COPY [[OR]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C4]] + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND3]], [[TRUNC3]](s16) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C4]] + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND4]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[AND5]], [[TRUNC4]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -490,124 +490,124 @@ body: | ; SI-LABEL: name: test_fshl_s24_s24 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) - ; SI: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; SI: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] - ; SI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] - ; SI: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; SI: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; SI: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; SI: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; SI: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] - ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; SI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] - ; SI: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; SI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] - ; SI: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; SI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] - ; SI: $vgpr0 = COPY [[OR]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; SI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; SI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; SI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; SI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; SI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; SI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] + ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) ; VI-LABEL: name: test_fshl_s24_s24 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) - ; VI: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; VI: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] - ; VI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] - ; VI: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; VI: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; VI: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; VI: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; VI: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] - ; VI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; VI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] - ; VI: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; VI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] - ; VI: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; VI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) - ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; VI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] - ; VI: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; VI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; VI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; VI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; VI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; VI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; VI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) ; GFX9-LABEL: name: test_fshl_s24_s24 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) - ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] - ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] - ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) - ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] - ; GFX9: $vgpr0 = COPY [[OR]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND1]](s32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY4]](s32) + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[LSHR1]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -627,235 +627,235 @@ body: | ; SI-LABEL: name: test_fshl_v3s16_v3s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; SI: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; SI: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; SI: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C2]] - ; SI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s32) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC4]] - ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; SI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] - ; SI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] - ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY6]](s32) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]] - ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC5]], [[TRUNC6]] - ; SI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; SI: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C2]] - ; SI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] - ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT4]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY7]](s32) - ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C4]] - ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) - ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC7]], [[TRUNC8]] - ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL3]] - ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; SI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; SI: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]] - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL4]] - ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C4]] - ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL5]] - ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) - ; SI: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) - ; SI: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC3]], [[TRUNC4]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY6]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]] + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC5]], [[TRUNC6]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] + ; SI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C2]] + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT4]](s32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY7]](s32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) + ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C4]] + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) + ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC7]], [[TRUNC8]] + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL3]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]] + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL4]] + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C4]] + ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND13]], [[SHL5]] + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) ; VI-LABEL: name: test_fshl_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; VI: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; VI: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; VI: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) - ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] - ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C2]] - ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] - ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; VI: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C3]](s16) - ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[LSHR3]], [[AND1]](s16) - ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR4]] - ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] - ; VI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C2]] - ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) - ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C3]](s16) - ; VI: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[LSHR5]], [[AND3]](s16) - ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR6]] - ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] - ; VI: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C2]] - ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND4]](s16) - ; VI: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C3]](s16) - ; VI: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[LSHR7]], [[AND5]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[SHL2]], [[LSHR8]] - ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; VI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]] - ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C4]] - ; VI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] - ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] - ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) - ; VI: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) - ; VI: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C3]](s16) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[LSHR3]], [[AND1]](s16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR4]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C2]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C3]](s16) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[LSHR5]], [[AND3]](s16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR6]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] + ; VI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C2]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND4]](s16) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C3]](s16) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[LSHR7]], [[AND5]](s16) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[SHL2]], [[LSHR8]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C4]] + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C4]] + ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C4]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL5]] + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST8]](<2 x s16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST9]](<2 x s16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST10]](<2 x s16>) ; GFX9-LABEL: name: test_fshl_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[COPY6]](s32) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[COPY7]](s32) - ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[C1]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC4]], [[BUILD_VECTOR_TRUNC6]] - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[C2]](s32) - ; GFX9: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BUILD_VECTOR_TRUNC4]], [[BUILD_VECTOR_TRUNC7]] - ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC6]] - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[C3]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[AND]](<2 x s16>) - ; GFX9: [[LSHR3:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC2]], [[BUILD_VECTOR_TRUNC8]](<2 x s16>) - ; GFX9: [[LSHR4:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR3]], [[AND1]](<2 x s16>) - ; GFX9: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL]], [[LSHR4]] - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC5]], [[BUILD_VECTOR_TRUNC9]] - ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) - ; GFX9: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BUILD_VECTOR_TRUNC5]], [[BUILD_VECTOR_TRUNC10]] - ; GFX9: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR_TRUNC9]] - ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) - ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[AND2]](<2 x s16>) - ; GFX9: [[LSHR5:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC11]](<2 x s16>) - ; GFX9: [[LSHR6:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR5]], [[AND3]](<2 x s16>) - ; GFX9: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR6]] - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; GFX9: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[BITCAST7]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR7]](s32), [[BITCAST8]](s32) - ; GFX9: $vgpr0 = COPY [[OR]](<2 x s16>) - ; GFX9: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) - ; GFX9: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[COPY6]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[C1]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC4]], [[BUILD_VECTOR_TRUNC6]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[C2]](s32) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BUILD_VECTOR_TRUNC4]], [[BUILD_VECTOR_TRUNC7]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC6]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[C3]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[AND]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC2]], [[BUILD_VECTOR_TRUNC8]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR3]], [[AND1]](<2 x s16>) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL]], [[LSHR4]] + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC5]], [[BUILD_VECTOR_TRUNC9]] + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BUILD_VECTOR_TRUNC5]], [[BUILD_VECTOR_TRUNC10]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR_TRUNC9]] + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[AND2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC11]](<2 x s16>) + ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR5]], [[AND3]](<2 x s16>) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR6]] + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[BITCAST7]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR7]](s32), [[BITCAST8]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) + ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 @@ -886,215 +886,215 @@ body: | ; SI-LABEL: name: test_fshl_v4s16_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] - ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C2]] - ; SI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] - ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s32) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]] - ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC4]], [[TRUNC5]] - ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; SI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] - ; SI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]] - ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]] - ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] - ; SI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] - ; SI: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C2]] - ; SI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] - ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT4]](s32) - ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY4]](s32) - ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C4]] - ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] - ; SI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; SI: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC3]], [[C2]] - ; SI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C1]] - ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[ZEXT6]](s32) - ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY5]](s32) - ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) - ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C4]] - ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] - ; SI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; SI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL4]] - ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; SI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL5]] - ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C2]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C3]](s32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]] + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC4]], [[TRUNC5]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC1]], [[C2]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]] + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]] + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] + ; SI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C2]] + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT4]](s32) + ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY4]](s32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) + ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C4]] + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) + ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] + ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] + ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC3]], [[C2]] + ; SI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C1]] + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[ZEXT6]](s32) + ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY5]](s32) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) + ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C4]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) + ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] + ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C]](s32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL4]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT11]], [[C]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT10]], [[SHL5]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fshl_v4s16_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; VI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; VI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] - ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C2]] - ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] - ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; VI: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C3]](s16) - ; VI: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[LSHR6]], [[AND1]](s16) - ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR7]] - ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]] - ; VI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC9]], [[C2]] - ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] - ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) - ; VI: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C3]](s16) - ; VI: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[LSHR8]], [[AND3]](s16) - ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR9]] - ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]] - ; VI: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC10]], [[C2]] - ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND4]](s16) - ; VI: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C3]](s16) - ; VI: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND5]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[SHL2]], [[LSHR11]] - ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C1]] - ; VI: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC11]], [[C2]] - ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C1]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[AND6]](s16) - ; VI: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C3]](s16) - ; VI: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[LSHR12]], [[AND7]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[SHL3]], [[LSHR13]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; VI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C2]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C1]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C3]](s16) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[LSHR6]], [[AND1]](s16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR7]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C1]] + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[TRUNC9]], [[C2]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C3]](s16) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[LSHR8]], [[AND3]](s16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR9]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C1]] + ; VI-NEXT: [[XOR2:%[0-9]+]]:_(s16) = G_XOR [[TRUNC10]], [[C2]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR2]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND4]](s16) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC6]], [[C3]](s16) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND5]](s16) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[SHL2]], [[LSHR11]] + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C1]] + ; VI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC11]], [[C2]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C1]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[AND6]](s16) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC7]], [[C3]](s16) + ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[LSHR12]], [[AND7]](s16) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[SHL3]], [[LSHR13]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR2]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fshl_v4s16_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[C]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV4]], [[BUILD_VECTOR_TRUNC]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[C1]](s32) - ; GFX9: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BUILD_VECTOR_TRUNC1]] - ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC]] - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[C2]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV]], [[AND]](<2 x s16>) - ; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV2]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR]], [[AND1]](<2 x s16>) - ; GFX9: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL]], [[LSHR1]] - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV5]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) - ; GFX9: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BUILD_VECTOR_TRUNC4]] - ; GFX9: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV1]], [[AND2]](<2 x s16>) - ; GFX9: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV3]], [[BUILD_VECTOR_TRUNC5]](<2 x s16>) - ; GFX9: [[LSHR3:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR2]], [[AND3]](<2 x s16>) - ; GFX9: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[OR]](<2 x s16>), [[OR1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[C]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV4]], [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[C1]](s32) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BUILD_VECTOR_TRUNC1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[C2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV]], [[AND]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV2]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR]], [[AND1]](<2 x s16>) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL]], [[LSHR1]] + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV5]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BUILD_VECTOR_TRUNC4]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV1]], [[AND2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV3]], [[BUILD_VECTOR_TRUNC5]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR2]], [[AND3]](<2 x s16>) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[OR]](<2 x s16>), [[OR1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = COPY $vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir index b2a3886c7dd8..e791b23c839f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fshr_s32_s32 @@ -12,22 +12,22 @@ body: | ; SI-LABEL: name: test_fshr_s32_s32 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) - ; SI: $vgpr0 = COPY [[FSHR]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) + ; SI-NEXT: $vgpr0 = COPY [[FSHR]](s32) ; VI-LABEL: name: test_fshr_s32_s32 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) - ; VI: $vgpr0 = COPY [[FSHR]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) + ; VI-NEXT: $vgpr0 = COPY [[FSHR]](s32) ; GFX9-LABEL: name: test_fshr_s32_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) - ; GFX9: $vgpr0 = COPY [[FSHR]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[COPY]], [[COPY1]], [[COPY2]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[FSHR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -43,37 +43,37 @@ body: | ; SI-LABEL: name: test_fshr_v2s32_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; SI: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[UV4]](s32) - ; SI: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[UV5]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; SI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[UV4]](s32) + ; SI-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[UV5]](s32) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_fshr_v2s32_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; VI: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[UV4]](s32) - ; VI: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[UV5]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; VI-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[UV4]](s32) + ; VI-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[UV5]](s32) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_fshr_v2s32_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; GFX9: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[UV4]](s32) - ; GFX9: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[UV5]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; GFX9-NEXT: [[FSHR:%[0-9]+]]:_(s32) = G_FSHR [[UV]], [[UV2]], [[UV4]](s32) + ; GFX9-NEXT: [[FSHR1:%[0-9]+]]:_(s32) = G_FSHR [[UV1]], [[UV3]], [[UV5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSHR]](s32), [[FSHR1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 @@ -89,65 +89,65 @@ body: | ; SI-LABEL: name: test_fshr_s16_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; SI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; SI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C2]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[ZEXT]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[ZEXT1]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC2]] - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C2]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[ZEXT]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[ZEXT1]](s32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC1]], [[TRUNC2]] + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_fshr_s16_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; VI: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C1]] - ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] - ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND1]](s16) - ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[AND]](s16) - ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND1]](s16) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[AND]](s16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_fshr_s16_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C1]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND1]](s16) - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[AND]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[TRUNC2]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND1]](s16) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[AND]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -167,184 +167,184 @@ body: | ; SI-LABEL: name: test_fshr_v2s16_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; SI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C5]] - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY4]](s32) - ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[COPY5]](s32) - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C1]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL4]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BITCAST3]] - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; SI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] - ; SI: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]] - ; SI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] - ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY8]](s32) - ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C5]] - ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] - ; SI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] - ; SI: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]] - ; SI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) - ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] - ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY9]](s32) - ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) - ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C5]] - ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] - ; SI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; SI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C1]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL7]] - ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C5]] + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY4]](s32) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[COPY5]](s32) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C1]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL4]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BITCAST3]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] + ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]] + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY8]](s32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) + ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C5]] + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] + ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] + ; SI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]] + ; SI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) + ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY9]](s32) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) + ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C5]] + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) + ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] + ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C1]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL7]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) ; VI-LABEL: name: test_fshr_v2s16_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; VI: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C]](s16) - ; VI: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[LSHR2]], [[AND1]](s16) - ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR3]] - ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) - ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C]](s16) - ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[LSHR4]], [[AND3]](s16) - ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR5]] - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C]](s16) - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C]](s16) - ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C1]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL4]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BITCAST3]] - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C2]] - ; VI: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C3]] - ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[OR]], [[AND4]](s16) - ; VI: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[C]](s16) - ; VI: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[LSHR8]], [[AND5]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[SHL5]], [[LSHR9]] - ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C2]] - ; VI: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C3]] - ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[OR1]], [[AND6]](s16) - ; VI: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[C]](s16) - ; VI: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND7]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[SHL6]], [[LSHR11]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]] - ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C]](s16) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[LSHR2]], [[AND1]](s16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR3]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C]](s16) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[LSHR4]], [[AND3]](s16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR5]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C]](s16) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C]](s16) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C1]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL4]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BITCAST3]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C2]] + ; VI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C3]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[OR]], [[AND4]](s16) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[C]](s16) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[LSHR8]], [[AND5]](s16) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[SHL5]], [[LSHR9]] + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C2]] + ; VI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C3]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[OR1]], [[AND6]](s16) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[C]](s16) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND7]](s16) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[SHL6]], [[LSHR11]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST5]](<2 x s16>) ; GFX9-LABEL: name: test_fshr_v2s16_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[C]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY2]], [[BUILD_VECTOR_TRUNC]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[C1]](s32) - ; GFX9: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BUILD_VECTOR_TRUNC1]] - ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC]] - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[C2]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL]], [[AND1]](<2 x s16>) - ; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[COPY1]], [[AND]](<2 x s16>) - ; GFX9: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR]] - ; GFX9: $vgpr0 = COPY [[OR]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[C]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY2]], [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[C1]](s32) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[COPY2]], [[BUILD_VECTOR_TRUNC1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[C2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL]], [[AND1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[COPY1]], [[AND]](<2 x s16>) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 @@ -360,55 +360,55 @@ body: | ; SI-LABEL: name: test_fshr_s64_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; SI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; SI: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C2]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; SI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SHL]], [[TRUNC]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; SI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC1]](s32) - ; SI: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] - ; SI: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C2]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SHL]], [[TRUNC]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC1]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) ; VI-LABEL: name: test_fshr_s64_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; VI: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] - ; VI: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C2]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; VI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SHL]], [[TRUNC]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; VI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC1]](s32) - ; VI: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] - ; VI: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SHL]], [[TRUNC]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC1]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) ; GFX9-LABEL: name: test_fshr_s64_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 - ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] - ; GFX9: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C2]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) - ; GFX9: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SHL]], [[TRUNC]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) - ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC1]](s32) - ; GFX9: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] - ; GFX9: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 63 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[COPY2]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[XOR]], [[C]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C2]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND1]](s64) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SHL]], [[TRUNC]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[COPY1]], [[TRUNC1]](s32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[SHL1]], [[LSHR]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = COPY $vgpr4_vgpr5 @@ -424,80 +424,80 @@ body: | ; SI-LABEL: name: test_fshr_s8_s8 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; SI: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY3]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND2]](s32) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] - ; SI: $vgpr0 = COPY [[OR]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY3]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND2]](s32) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[AND3]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] + ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) ; VI-LABEL: name: test_fshr_s8_s8 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; VI: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) - ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[TRUNC2]](s16) - ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] - ; VI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND3]](s32) - ; VI: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[TRUNC4]](s16) - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] - ; VI: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[TRUNC2]](s16) + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] + ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND3]](s32) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[TRUNC4]](s16) + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) ; GFX9-LABEL: name: test_fshr_s8_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[TRUNC2]](s16) - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] - ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND3]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[TRUNC4]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] - ; GFX9: $vgpr0 = COPY [[OR]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY2]], [[C1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[AND1]], [[C3]] + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[AND2]](s32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[TRUNC2]](s16) + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C3]] + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C4]] + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[AND3]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[AND4]], [[TRUNC4]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -517,121 +517,121 @@ body: | ; SI-LABEL: name: test_fshr_s24_s24 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) - ; SI: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; SI: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; SI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] - ; SI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] - ; SI: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; SI: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; SI: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; SI: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; SI: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] - ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; SI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] - ; SI: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; SI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] - ; SI: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; SI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] - ; SI: $vgpr0 = COPY [[OR]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; SI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; SI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; SI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; SI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; SI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; SI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; SI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; SI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; SI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] + ; SI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; SI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] + ; SI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; SI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; SI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] + ; SI-NEXT: $vgpr0 = COPY [[OR]](s32) ; VI-LABEL: name: test_fshr_s24_s24 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) - ; VI: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; VI: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; VI: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] - ; VI: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] - ; VI: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; VI: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; VI: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; VI: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; VI: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] - ; VI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; VI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; VI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] - ; VI: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; VI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] - ; VI: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; VI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] - ; VI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] - ; VI: $vgpr0 = COPY [[OR]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; VI-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; VI-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; VI-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; VI-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; VI-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; VI-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; VI-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; VI-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; VI-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; VI-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] + ; VI-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; VI-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] + ; VI-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; VI-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; VI-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] + ; VI-NEXT: $vgpr0 = COPY [[OR]](s32) ; GFX9-LABEL: name: test_fshr_s24_s24 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) - ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 - ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] - ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] - ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) - ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] - ; GFX9: $vgpr0 = COPY [[OR]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY3]](s32) + ; GFX9-NEXT: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C3]] + ; GFX9-NEXT: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C4]], [[COPY3]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY3]] + ; GFX9-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY3]] + ; GFX9-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY3]] + ; GFX9-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY3]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX9-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SELECT1]] + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY4]](s32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB4]], [[C2]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND1]](s32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C2]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[AND2]](s32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[LSHR]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -651,425 +651,425 @@ body: | ; SI-LABEL: name: test_fshr_v3s16_v3s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; SI: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; SI: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; SI: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; SI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; SI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; SI: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C4]] - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) - ; SI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[C6]](s32) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) - ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[ZEXT1]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; SI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; SI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; SI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C4]] - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND7]](s16) - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY7]](s32) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) - ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[ZEXT3]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY8]](s32) - ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY9]](s32) - ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY10]], [[SHL6]] - ; SI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST6]], [[BITCAST8]] - ; SI: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) - ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; SI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C4]] - ; SI: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C5]] - ; SI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C4]] - ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND11]](s16) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) - ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI: [[AND13:%[0-9]+]]:_(s32) = G_AND [[SHL4]], [[C1]] - ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY12]](s32) - ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) - ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C1]] - ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[ZEXT5]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] - ; SI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C4]] - ; SI: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C5]] - ; SI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C4]] - ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND15]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16) - ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) - ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI: [[AND17:%[0-9]+]]:_(s32) = G_AND [[SHL5]], [[C1]] - ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND17]], [[COPY13]](s32) - ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND16]](s16) - ; SI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C1]] - ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[ZEXT7]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] - ; SI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; SI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C]](s32) - ; SI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL9]] - ; SI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; SI: [[AND19:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; SI: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; SI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C4]] - ; SI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND19]](s16) - ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT10]](s32) - ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND21]], [[COPY14]](s32) - ; SI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16) - ; SI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C1]] - ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[ZEXT11]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; SI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] - ; SI: [[AND23:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; SI: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; SI: [[AND24:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C4]] - ; SI: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[AND23]](s16) - ; SI: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[DEF]], [[ZEXT12]](s32) - ; SI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[C2]], [[COPY15]](s32) - ; SI: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[AND24]](s16) - ; SI: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C1]] - ; SI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[AND25]], [[ZEXT13]](s32) - ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) - ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[TRUNC12]], [[TRUNC13]] - ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[COPY16]](s32) - ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI: [[COPY18:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) - ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[COPY17]](s32) - ; SI: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C]](s32) - ; SI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY19]], [[SHL14]] - ; SI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; SI: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST7]], [[BITCAST11]] - ; SI: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) - ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST12]](s32) - ; SI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST12]], [[C]](s32) - ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR16]](s32) - ; SI: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C4]] - ; SI: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC14]], [[C5]] - ; SI: [[AND27:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C4]] - ; SI: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[AND26]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR8]](s16) - ; SI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT14]](s32) - ; SI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) - ; SI: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI: [[AND28:%[0-9]+]]:_(s32) = G_AND [[SHL12]], [[C1]] - ; SI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND28]], [[COPY21]](s32) - ; SI: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[AND27]](s16) - ; SI: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C1]] - ; SI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[AND29]], [[ZEXT15]](s32) - ; SI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) - ; SI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[TRUNC16]], [[TRUNC17]] - ; SI: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C4]] - ; SI: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC15]], [[C5]] - ; SI: [[AND31:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C4]] - ; SI: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[AND30]](s16) - ; SI: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR9]](s16) - ; SI: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT3]], [[ZEXT16]](s32) - ; SI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[SHL16]](s32) - ; SI: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI: [[AND32:%[0-9]+]]:_(s32) = G_AND [[SHL13]], [[C1]] - ; SI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND32]], [[COPY22]](s32) - ; SI: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[AND31]](s16) - ; SI: [[AND33:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C1]] - ; SI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND33]], [[ZEXT17]](s32) - ; SI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR20]](s32) - ; SI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[TRUNC18]], [[TRUNC19]] - ; SI: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) - ; SI: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) - ; SI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT19]], [[C]](s32) - ; SI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[ZEXT18]], [[SHL17]] - ; SI: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) - ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>) - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C]](s32) - ; SI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[AND34:%[0-9]+]]:_(s32) = G_AND [[BITCAST14]], [[C1]] - ; SI: [[AND35:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]] - ; SI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND35]], [[C]](s32) - ; SI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND34]], [[SHL18]] - ; SI: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) - ; SI: [[AND36:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]] - ; SI: [[AND37:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]] - ; SI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND37]], [[C]](s32) - ; SI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND36]], [[SHL19]] - ; SI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32) - ; SI: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) - ; SI: $vgpr1 = COPY [[BITCAST17]](<2 x s16>) - ; SI: $vgpr2 = COPY [[BITCAST18]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; SI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C4]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND3]](s16) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[C6]](s32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[ZEXT1]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C4]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND7]](s16) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY7]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[ZEXT3]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY8]](s32) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY9]](s32) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C]](s32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY10]], [[SHL6]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST6]], [[BITCAST8]] + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; SI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C4]] + ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C5]] + ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C4]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND11]](s16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[SHL4]], [[C1]] + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY12]](s32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) + ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C1]] + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[ZEXT5]](s32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] + ; SI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C4]] + ; SI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C5]] + ; SI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C4]] + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND15]](s16) + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16) + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) + ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[SHL5]], [[C1]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND17]], [[COPY13]](s32) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND16]](s16) + ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C1]] + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[ZEXT7]](s32) + ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] + ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C]](s32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL9]] + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; SI-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] + ; SI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] + ; SI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C4]] + ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND19]](s16) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT10]](s32) + ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) + ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND21]], [[COPY14]](s32) + ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16) + ; SI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C1]] + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[ZEXT11]](s32) + ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] + ; SI-NEXT: [[AND23:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] + ; SI-NEXT: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] + ; SI-NEXT: [[AND24:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C4]] + ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[AND23]](s16) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[DEF]], [[ZEXT12]](s32) + ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) + ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[C2]], [[COPY15]](s32) + ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[AND24]](s16) + ; SI-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C1]] + ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[AND25]], [[ZEXT13]](s32) + ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[TRUNC12]], [[TRUNC13]] + ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[COPY16]](s32) + ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[COPY17]](s32) + ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C]](s32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY19]], [[SHL14]] + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; SI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST7]], [[BITCAST11]] + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) + ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST12]](s32) + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST12]], [[C]](s32) + ; SI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR16]](s32) + ; SI-NEXT: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C4]] + ; SI-NEXT: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC14]], [[C5]] + ; SI-NEXT: [[AND27:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C4]] + ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[AND26]](s16) + ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR8]](s16) + ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT14]](s32) + ; SI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) + ; SI-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[SHL12]], [[C1]] + ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND28]], [[COPY21]](s32) + ; SI-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[AND27]](s16) + ; SI-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C1]] + ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[AND29]], [[ZEXT15]](s32) + ; SI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(s16) = G_OR [[TRUNC16]], [[TRUNC17]] + ; SI-NEXT: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C4]] + ; SI-NEXT: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC15]], [[C5]] + ; SI-NEXT: [[AND31:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C4]] + ; SI-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[AND30]](s16) + ; SI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR9]](s16) + ; SI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT3]], [[ZEXT16]](s32) + ; SI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[SHL16]](s32) + ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[AND32:%[0-9]+]]:_(s32) = G_AND [[SHL13]], [[C1]] + ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND32]], [[COPY22]](s32) + ; SI-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[AND31]](s16) + ; SI-NEXT: [[AND33:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C1]] + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND33]], [[ZEXT17]](s32) + ; SI-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR20]](s32) + ; SI-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[TRUNC18]], [[TRUNC19]] + ; SI-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) + ; SI-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; SI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT19]], [[C]](s32) + ; SI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[ZEXT18]], [[SHL17]] + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C]](s32) + ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[AND34:%[0-9]+]]:_(s32) = G_AND [[BITCAST14]], [[C1]] + ; SI-NEXT: [[AND35:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]] + ; SI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND35]], [[C]](s32) + ; SI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND34]], [[SHL18]] + ; SI-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) + ; SI-NEXT: [[AND36:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]] + ; SI-NEXT: [[AND37:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]] + ; SI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND37]], [[C]](s32) + ; SI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND36]], [[SHL19]] + ; SI-NEXT: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST17]](<2 x s16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST18]](<2 x s16>) ; VI-LABEL: name: test_fshr_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; VI: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; VI: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; VI: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; VI: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; VI: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; VI: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; VI: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C4]] - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND3]](s16) - ; VI: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C3]](s16) - ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[LSHR3]], [[AND4]](s16) - ; VI: [[OR2:%[0-9]+]]:_(s16) = G_OR [[SHL2]], [[LSHR4]] - ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; VI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C4]] - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND5]](s16) - ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C3]](s16) - ; VI: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[LSHR5]], [[AND6]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[SHL3]], [[LSHR6]] - ; VI: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C3]](s16) - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C3]](s16) - ; VI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) - ; VI: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL6]] - ; VI: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST6]], [[BITCAST8]] - ; VI: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) - ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C4]] - ; VI: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C5]] - ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C4]] - ; VI: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[OR2]], [[AND7]](s16) - ; VI: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[SHL4]], [[C3]](s16) - ; VI: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[LSHR8]], [[AND8]](s16) - ; VI: [[OR5:%[0-9]+]]:_(s16) = G_OR [[SHL7]], [[LSHR9]] - ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C4]] - ; VI: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C5]] - ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C4]] - ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[OR3]], [[AND9]](s16) - ; VI: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[SHL5]], [[C3]](s16) - ; VI: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND10]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[SHL8]], [[LSHR11]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; VI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL9]] - ; VI: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; VI: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C4]] - ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND11]](s16) - ; VI: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C3]](s16) - ; VI: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[LSHR12]], [[AND12]](s16) - ; VI: [[OR8:%[0-9]+]]:_(s16) = G_OR [[SHL10]], [[LSHR13]] - ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; VI: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C4]] - ; VI: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[DEF]], [[AND13]](s16) - ; VI: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[DEF]], [[C3]](s16) - ; VI: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[LSHR14]], [[AND14]](s16) - ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[SHL11]], [[LSHR15]] - ; VI: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C3]](s16) - ; VI: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[DEF]], [[C3]](s16) - ; VI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32) - ; VI: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY7]], [[SHL14]] - ; VI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; VI: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST7]], [[BITCAST11]] - ; VI: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) - ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST12]](s32) - ; VI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST12]], [[C]](s32) - ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR16]](s32) - ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C4]] - ; VI: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C5]] - ; VI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C4]] - ; VI: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[OR8]], [[AND15]](s16) - ; VI: [[LSHR17:%[0-9]+]]:_(s16) = G_LSHR [[SHL12]], [[C3]](s16) - ; VI: [[LSHR18:%[0-9]+]]:_(s16) = G_LSHR [[LSHR17]], [[AND16]](s16) - ; VI: [[OR11:%[0-9]+]]:_(s16) = G_OR [[SHL15]], [[LSHR18]] - ; VI: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C4]] - ; VI: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC9]], [[C5]] - ; VI: [[AND18:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C4]] - ; VI: [[SHL16:%[0-9]+]]:_(s16) = G_SHL [[OR9]], [[AND17]](s16) - ; VI: [[LSHR19:%[0-9]+]]:_(s16) = G_LSHR [[SHL13]], [[C3]](s16) - ; VI: [[LSHR20:%[0-9]+]]:_(s16) = G_LSHR [[LSHR19]], [[AND18]](s16) - ; VI: [[OR12:%[0-9]+]]:_(s16) = G_OR [[SHL16]], [[LSHR20]] - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) - ; VI: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR13:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL17]] - ; VI: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) - ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>) - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C]](s32) - ; VI: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[BITCAST14]], [[C1]] - ; VI: [[AND20:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]] - ; VI: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C]](s32) - ; VI: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL18]] - ; VI: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) - ; VI: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]] - ; VI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]] - ; VI: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C]](s32) - ; VI: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL19]] - ; VI: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32) - ; VI: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) - ; VI: $vgpr1 = COPY [[BITCAST17]](<2 x s16>) - ; VI: $vgpr2 = COPY [[BITCAST18]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; VI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; VI-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C4]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND3]](s16) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C3]](s16) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[LSHR3]], [[AND4]](s16) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[SHL2]], [[LSHR4]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C4]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND5]](s16) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC4]], [[C3]](s16) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[LSHR5]], [[AND6]](s16) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[SHL3]], [[LSHR6]] + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C3]](s16) + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C3]](s16) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL6]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST6]], [[BITCAST8]] + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C4]] + ; VI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C5]] + ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C4]] + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[OR2]], [[AND7]](s16) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[SHL4]], [[C3]](s16) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[LSHR8]], [[AND8]](s16) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[SHL7]], [[LSHR9]] + ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C4]] + ; VI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C5]] + ; VI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C4]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[OR3]], [[AND9]](s16) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[SHL5]], [[C3]](s16) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND10]](s16) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[SHL8]], [[LSHR11]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL9]] + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] + ; VI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] + ; VI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C4]] + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND11]](s16) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C3]](s16) + ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[LSHR12]], [[AND12]](s16) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[SHL10]], [[LSHR13]] + ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] + ; VI-NEXT: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] + ; VI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C4]] + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[DEF]], [[AND13]](s16) + ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[DEF]], [[C3]](s16) + ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[LSHR14]], [[AND14]](s16) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[SHL11]], [[LSHR15]] + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C3]](s16) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[DEF]], [[C3]](s16) + ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY7]], [[SHL14]] + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; VI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST7]], [[BITCAST11]] + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST12]](s32) + ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST12]], [[C]](s32) + ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR16]](s32) + ; VI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C4]] + ; VI-NEXT: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C5]] + ; VI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C4]] + ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[OR8]], [[AND15]](s16) + ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(s16) = G_LSHR [[SHL12]], [[C3]](s16) + ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s16) = G_LSHR [[LSHR17]], [[AND16]](s16) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(s16) = G_OR [[SHL15]], [[LSHR18]] + ; VI-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C4]] + ; VI-NEXT: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC9]], [[C5]] + ; VI-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C4]] + ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s16) = G_SHL [[OR9]], [[AND17]](s16) + ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s16) = G_LSHR [[SHL13]], [[C3]](s16) + ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(s16) = G_LSHR [[LSHR19]], [[AND18]](s16) + ; VI-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[SHL16]], [[LSHR20]] + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL17]] + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C]](s32) + ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[BITCAST14]], [[C1]] + ; VI-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]] + ; VI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C]](s32) + ; VI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL18]] + ; VI-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) + ; VI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]] + ; VI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]] + ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C]](s32) + ; VI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL19]] + ; VI-NEXT: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST17]](<2 x s16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST18]](<2 x s16>) ; GFX9-LABEL: name: test_fshr_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[COPY6]](s32) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[COPY7]](s32) - ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[C1]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC4]], [[BUILD_VECTOR_TRUNC6]] - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[C2]](s32) - ; GFX9: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BUILD_VECTOR_TRUNC4]], [[BUILD_VECTOR_TRUNC7]] - ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC6]] - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[C3]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC8]](<2 x s16>) - ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL]], [[AND1]](<2 x s16>) - ; GFX9: [[LSHR3:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC2]], [[AND]](<2 x s16>) - ; GFX9: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR3]] - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC5]], [[BUILD_VECTOR_TRUNC9]] - ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) - ; GFX9: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BUILD_VECTOR_TRUNC5]], [[BUILD_VECTOR_TRUNC10]] - ; GFX9: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR_TRUNC9]] - ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) - ; GFX9: [[SHL2:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC11]](<2 x s16>) - ; GFX9: [[SHL3:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL2]], [[AND3]](<2 x s16>) - ; GFX9: [[LSHR4:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC3]], [[AND2]](<2 x s16>) - ; GFX9: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL3]], [[LSHR4]] - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; GFX9: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[BITCAST7]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR5]](s32), [[BITCAST8]](s32) - ; GFX9: $vgpr0 = COPY [[OR]](<2 x s16>) - ; GFX9: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) - ; GFX9: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[COPY6]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[C1]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC4]], [[BUILD_VECTOR_TRUNC6]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY9]](s32), [[C2]](s32) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BUILD_VECTOR_TRUNC4]], [[BUILD_VECTOR_TRUNC7]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC6]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[C3]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC8]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL]], [[AND1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC2]], [[AND]](<2 x s16>) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR3]] + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC5]], [[BUILD_VECTOR_TRUNC9]] + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BUILD_VECTOR_TRUNC5]], [[BUILD_VECTOR_TRUNC10]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR_TRUNC9]] + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC11]](<2 x s16>) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL2]], [[AND3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC3]], [[AND2]](<2 x s16>) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL3]], [[LSHR4]] + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[BITCAST7]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR5]](s32), [[BITCAST8]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) + ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 @@ -1100,352 +1100,352 @@ body: | ; SI-LABEL: name: test_fshr_v4s16_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; SI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; SI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; SI: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; SI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; SI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C5]] - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; SI: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; SI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; SI: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) - ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] - ; SI: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY4]](s32) - ; SI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[COPY5]](s32) - ; SI: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C1]](s32) - ; SI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL4]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BITCAST3]] - ; SI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; SI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; SI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) - ; SI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; SI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] - ; SI: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]] - ; SI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; SI: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; SI: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) - ; SI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) - ; SI: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] - ; SI: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY8]](s32) - ; SI: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) - ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C5]] - ; SI: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) - ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; SI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] - ; SI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] - ; SI: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]] - ; SI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; SI: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) - ; SI: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) - ; SI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) - ; SI: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[AND14:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] - ; SI: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY9]](s32) - ; SI: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) - ; SI: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C5]] - ; SI: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) - ; SI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) - ; SI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] - ; SI: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; SI: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; SI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C1]](s32) - ; SI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL7]] - ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C1]](s32) - ; SI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C1]](s32) - ; SI: [[AND16:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI: [[AND17:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] - ; SI: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND16]](s16) - ; SI: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[BITCAST6]], [[ZEXT10]](s32) - ; SI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; SI: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[AND18:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C5]] - ; SI: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[COPY10]](s32) - ; SI: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND17]](s16) - ; SI: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C5]] - ; SI: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[AND19]], [[ZEXT11]](s32) - ; SI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) - ; SI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] - ; SI: [[AND20:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; SI: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; SI: [[AND21:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C2]] - ; SI: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16) - ; SI: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[LSHR12]], [[ZEXT12]](s32) - ; SI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) - ; SI: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C5]] - ; SI: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[COPY11]](s32) - ; SI: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[AND21]](s16) - ; SI: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C5]] - ; SI: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND23]], [[ZEXT13]](s32) - ; SI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR17]](s32) - ; SI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[TRUNC12]], [[TRUNC13]] - ; SI: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32) - ; SI: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[BITCAST8]], [[COPY12]](s32) - ; SI: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LSHR18]], [[COPY13]](s32) - ; SI: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C5]](s32) - ; SI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C1]](s32) - ; SI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[SHL12]] - ; SI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; SI: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BITCAST9]] - ; SI: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) - ; SI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32) - ; SI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32) - ; SI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR19]](s32) - ; SI: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C2]] - ; SI: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC14]], [[C3]] - ; SI: [[AND25:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C2]] - ; SI: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[AND24]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR6]](s16) - ; SI: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT14]](s32) - ; SI: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) - ; SI: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[AND26:%[0-9]+]]:_(s32) = G_AND [[SHL10]], [[C5]] - ; SI: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND26]], [[COPY16]](s32) - ; SI: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[AND25]](s16) - ; SI: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C5]] - ; SI: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[AND27]], [[ZEXT15]](s32) - ; SI: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR21]](s32) - ; SI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[TRUNC16]], [[TRUNC17]] - ; SI: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C2]] - ; SI: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC15]], [[C3]] - ; SI: [[AND29:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C2]] - ; SI: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[AND28]](s16) - ; SI: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR7]](s16) - ; SI: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT3]], [[ZEXT16]](s32) - ; SI: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) - ; SI: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; SI: [[AND30:%[0-9]+]]:_(s32) = G_AND [[SHL11]], [[C5]] - ; SI: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND30]], [[COPY17]](s32) - ; SI: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[AND29]](s16) - ; SI: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C5]] - ; SI: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND31]], [[ZEXT17]](s32) - ; SI: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR23]](s32) - ; SI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[TRUNC18]], [[TRUNC19]] - ; SI: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; SI: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) - ; SI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT19]], [[C1]](s32) - ; SI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT18]], [[SHL15]] - ; SI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST11]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; SI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[AND]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[ZEXT]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C5]] + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C4]](s32) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[AND1]](s16) + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C5]] + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[ZEXT1]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND4]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) + ; SI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY3]](s32) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND5]](s16) + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C5]] + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[ZEXT3]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY4]](s32) + ; SI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR6]], [[COPY5]](s32) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C1]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY6]], [[SHL4]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BITCAST3]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) + ; SI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; SI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; SI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C2]] + ; SI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC4]], [[C3]] + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] + ; SI-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) + ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32) + ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[SHL2]], [[C5]] + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY8]](s32) + ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND9]](s16) + ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C5]] + ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[ZEXT5]](s32) + ; SI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC6]], [[TRUNC7]] + ; SI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C2]] + ; SI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC5]], [[C3]] + ; SI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] + ; SI-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s16) + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) + ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[SHL3]], [[C5]] + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY9]](s32) + ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND13]](s16) + ; SI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C5]] + ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[ZEXT7]](s32) + ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] + ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C1]](s32) + ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL7]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C1]](s32) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C1]](s32) + ; SI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; SI-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] + ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND16]](s16) + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[BITCAST6]], [[ZEXT10]](s32) + ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) + ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C5]] + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[COPY10]](s32) + ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND17]](s16) + ; SI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C5]] + ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[AND19]], [[ZEXT11]](s32) + ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) + ; SI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] + ; SI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; SI-NEXT: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; SI-NEXT: [[AND21:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C2]] + ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[LSHR12]], [[ZEXT12]](s32) + ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR13]], [[C5]] + ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[COPY11]](s32) + ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[AND21]](s16) + ; SI-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C5]] + ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND23]], [[ZEXT13]](s32) + ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR17]](s32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[TRUNC12]], [[TRUNC13]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[BITCAST8]], [[COPY12]](s32) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[LSHR18]], [[COPY13]](s32) + ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C5]](s32) + ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY15]], [[C1]](s32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY14]], [[SHL12]] + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; SI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BITCAST9]] + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) + ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32) + ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32) + ; SI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR19]](s32) + ; SI-NEXT: [[AND24:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C2]] + ; SI-NEXT: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC14]], [[C3]] + ; SI-NEXT: [[AND25:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C2]] + ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[AND24]](s16) + ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR6]](s16) + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT14]](s32) + ; SI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32) + ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[SHL10]], [[C5]] + ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND26]], [[COPY16]](s32) + ; SI-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[AND25]](s16) + ; SI-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[LSHR20]], [[C5]] + ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[AND27]], [[ZEXT15]](s32) + ; SI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR21]](s32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[TRUNC16]], [[TRUNC17]] + ; SI-NEXT: [[AND28:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C2]] + ; SI-NEXT: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC15]], [[C3]] + ; SI-NEXT: [[AND29:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C2]] + ; SI-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[AND28]](s16) + ; SI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR7]](s16) + ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT3]], [[ZEXT16]](s32) + ; SI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32) + ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; SI-NEXT: [[AND30:%[0-9]+]]:_(s32) = G_AND [[SHL11]], [[C5]] + ; SI-NEXT: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND30]], [[COPY17]](s32) + ; SI-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[AND29]](s16) + ; SI-NEXT: [[AND31:%[0-9]+]]:_(s32) = G_AND [[LSHR22]], [[C5]] + ; SI-NEXT: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND31]], [[ZEXT17]](s32) + ; SI-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR23]](s32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(s16) = G_OR [[TRUNC18]], [[TRUNC19]] + ; SI-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; SI-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT19]], [[C1]](s32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT18]], [[SHL15]] + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST11]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fshr_v4s16_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 - ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; VI: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) - ; VI: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C]](s16) - ; VI: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[LSHR2]], [[AND1]](s16) - ; VI: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR3]] - ; VI: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] - ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) - ; VI: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C]](s16) - ; VI: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[LSHR4]], [[AND3]](s16) - ; VI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR5]] - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; VI: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C]](s16) - ; VI: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C]](s16) - ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C1]](s32) - ; VI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL4]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BITCAST3]] - ; VI: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; VI: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; VI: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C2]] - ; VI: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C3]] - ; VI: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] - ; VI: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[OR]], [[AND4]](s16) - ; VI: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[C]](s16) - ; VI: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[LSHR8]], [[AND5]](s16) - ; VI: [[OR3:%[0-9]+]]:_(s16) = G_OR [[SHL5]], [[LSHR9]] - ; VI: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C2]] - ; VI: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C3]] - ; VI: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] - ; VI: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[OR1]], [[AND6]](s16) - ; VI: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[C]](s16) - ; VI: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND7]](s16) - ; VI: [[OR4:%[0-9]+]]:_(s16) = G_OR [[SHL6]], [[LSHR11]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) - ; VI: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) - ; VI: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]] - ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; VI: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C1]](s32) - ; VI: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) - ; VI: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) - ; VI: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C1]](s32) - ; VI: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; VI: [[AND8:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] - ; VI: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[TRUNC8]], [[AND8]](s16) - ; VI: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C]](s16) - ; VI: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[LSHR14]], [[AND9]](s16) - ; VI: [[OR6:%[0-9]+]]:_(s16) = G_OR [[SHL8]], [[LSHR15]] - ; VI: [[AND10:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] - ; VI: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] - ; VI: [[AND11:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C2]] - ; VI: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[TRUNC9]], [[AND10]](s16) - ; VI: [[LSHR16:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC11]], [[C]](s16) - ; VI: [[LSHR17:%[0-9]+]]:_(s16) = G_LSHR [[LSHR16]], [[AND11]](s16) - ; VI: [[OR7:%[0-9]+]]:_(s16) = G_OR [[SHL9]], [[LSHR17]] - ; VI: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) - ; VI: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32) - ; VI: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) - ; VI: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[TRUNC12]], [[C]](s16) - ; VI: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[TRUNC13]], [[C]](s16) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) - ; VI: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C1]](s32) - ; VI: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL12]] - ; VI: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; VI: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BITCAST9]] - ; VI: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) - ; VI: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32) - ; VI: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32) - ; VI: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR19]](s32) - ; VI: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C2]] - ; VI: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC14]], [[C3]] - ; VI: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C2]] - ; VI: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[OR6]], [[AND12]](s16) - ; VI: [[LSHR20:%[0-9]+]]:_(s16) = G_LSHR [[SHL10]], [[C]](s16) - ; VI: [[LSHR21:%[0-9]+]]:_(s16) = G_LSHR [[LSHR20]], [[AND13]](s16) - ; VI: [[OR9:%[0-9]+]]:_(s16) = G_OR [[SHL13]], [[LSHR21]] - ; VI: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C2]] - ; VI: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC15]], [[C3]] - ; VI: [[AND15:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C2]] - ; VI: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[OR7]], [[AND14]](s16) - ; VI: [[LSHR22:%[0-9]+]]:_(s16) = G_LSHR [[SHL11]], [[C]](s16) - ; VI: [[LSHR23:%[0-9]+]]:_(s16) = G_LSHR [[LSHR22]], [[AND15]](s16) - ; VI: [[OR10:%[0-9]+]]:_(s16) = G_OR [[SHL14]], [[LSHR23]] - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) - ; VI: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32) - ; VI: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL15]] - ; VI: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST11]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; VI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; VI-NEXT: [[XOR:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[XOR]], [[C2]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[AND]](s16) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C]](s16) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[LSHR2]], [[AND1]](s16) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[SHL]], [[LSHR3]] + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; VI-NEXT: [[XOR1:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[XOR1]], [[C2]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[AND2]](s16) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC3]], [[C]](s16) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[LSHR4]], [[AND3]](s16) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[SHL1]], [[LSHR5]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC4]], [[C]](s16) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C]](s16) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C4]], [[C1]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL4]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BITCAST3]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; VI-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C2]] + ; VI-NEXT: [[XOR3:%[0-9]+]]:_(s16) = G_XOR [[TRUNC6]], [[C3]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[XOR3]], [[C2]] + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[OR]], [[AND4]](s16) + ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[C]](s16) + ; VI-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[LSHR8]], [[AND5]](s16) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[SHL5]], [[LSHR9]] + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C2]] + ; VI-NEXT: [[XOR4:%[0-9]+]]:_(s16) = G_XOR [[TRUNC7]], [[C3]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[XOR4]], [[C2]] + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[OR1]], [[AND6]](s16) + ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[C]](s16) + ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND7]](s16) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[SHL6]], [[LSHR11]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C1]](s32) + ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL7]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) + ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C1]](s32) + ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR12]](s32) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C1]](s32) + ; VI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) + ; VI-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; VI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; VI-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C2]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[TRUNC8]], [[AND8]](s16) + ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC10]], [[C]](s16) + ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[LSHR14]], [[AND9]](s16) + ; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[SHL8]], [[LSHR15]] + ; VI-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[C]], [[C2]] + ; VI-NEXT: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C]], [[C3]] + ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C2]] + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[TRUNC9]], [[AND10]](s16) + ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC11]], [[C]](s16) + ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(s16) = G_LSHR [[LSHR16]], [[AND11]](s16) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[SHL9]], [[LSHR17]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) + ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32) + ; VI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[TRUNC12]], [[C]](s16) + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[TRUNC13]], [[C]](s16) + ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; VI-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C4]](s32) + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C1]](s32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[SHL12]] + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; VI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BITCAST9]] + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) + ; VI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32) + ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32) + ; VI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR19]](s32) + ; VI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C2]] + ; VI-NEXT: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC14]], [[C3]] + ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C2]] + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[OR6]], [[AND12]](s16) + ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(s16) = G_LSHR [[SHL10]], [[C]](s16) + ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s16) = G_LSHR [[LSHR20]], [[AND13]](s16) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[SHL13]], [[LSHR21]] + ; VI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C2]] + ; VI-NEXT: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC15]], [[C3]] + ; VI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C2]] + ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s16) = G_SHL [[OR7]], [[AND14]](s16) + ; VI-NEXT: [[LSHR22:%[0-9]+]]:_(s16) = G_LSHR [[SHL11]], [[C]](s16) + ; VI-NEXT: [[LSHR23:%[0-9]+]]:_(s16) = G_LSHR [[LSHR22]], [[AND15]](s16) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(s16) = G_OR [[SHL14]], [[LSHR23]] + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C1]](s32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL15]] + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST11]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fshr_v4s16_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[C]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV4]], [[BUILD_VECTOR_TRUNC]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[C1]](s32) - ; GFX9: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BUILD_VECTOR_TRUNC1]] - ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC]] - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[C2]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL]], [[AND1]](<2 x s16>) - ; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV2]], [[AND]](<2 x s16>) - ; GFX9: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR]] - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV5]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) - ; GFX9: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BUILD_VECTOR_TRUNC4]] - ; GFX9: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) - ; GFX9: [[SHL2:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV1]], [[BUILD_VECTOR_TRUNC5]](<2 x s16>) - ; GFX9: [[SHL3:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL2]], [[AND3]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV3]], [[AND2]](<2 x s16>) - ; GFX9: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL3]], [[LSHR1]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[OR]](<2 x s16>), [[OR1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<4 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY3]](s32), [[C]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV4]], [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[C1]](s32) + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV4]], [[BUILD_VECTOR_TRUNC1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR]], [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY5]](s32), [[C2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL]], [[AND1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV2]], [[AND]](<2 x s16>) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR]] + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(<2 x s16>) = G_AND [[UV5]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s16>) = G_XOR [[UV5]], [[BUILD_VECTOR_TRUNC4]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(<2 x s16>) = G_AND [[XOR1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(<2 x s16>) = G_SHL [[UV1]], [[BUILD_VECTOR_TRUNC5]](<2 x s16>) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL2]], [[AND3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UV3]], [[AND2]](<2 x s16>) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL3]], [[LSHR1]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[OR]](<2 x s16>), [[OR1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = COPY $vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir index 717ad3bfd40a..0228f4d3e339 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsin.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_fsin_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir index f3f4695b8863..df0471000aa3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsqrt.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fsqrt_s32 @@ -12,16 +12,16 @@ body: | ; SI-LABEL: name: test_fsqrt_s32 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[COPY]] - ; SI: $vgpr0 = COPY [[FSQRT]](s32) + ; SI-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[COPY]] + ; SI-NEXT: $vgpr0 = COPY [[FSQRT]](s32) ; VI-LABEL: name: test_fsqrt_s32 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[COPY]] - ; VI: $vgpr0 = COPY [[FSQRT]](s32) + ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[COPY]] + ; VI-NEXT: $vgpr0 = COPY [[FSQRT]](s32) ; GFX9-LABEL: name: test_fsqrt_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[COPY]] - ; GFX9: $vgpr0 = COPY [[FSQRT]](s32) + ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[COPY]] + ; GFX9-NEXT: $vgpr0 = COPY [[FSQRT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_FSQRT %0 $vgpr0 = COPY %1 @@ -35,16 +35,16 @@ body: | ; SI-LABEL: name: test_fsqrt_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[COPY]] - ; SI: $vgpr0_vgpr1 = COPY [[FSQRT]](s64) + ; SI-NEXT: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[COPY]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[FSQRT]](s64) ; VI-LABEL: name: test_fsqrt_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[COPY]] - ; VI: $vgpr0_vgpr1 = COPY [[FSQRT]](s64) + ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[COPY]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[FSQRT]](s64) ; GFX9-LABEL: name: test_fsqrt_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[COPY]] - ; GFX9: $vgpr0_vgpr1 = COPY [[FSQRT]](s64) + ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[COPY]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FSQRT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_FSQRT %0 $vgpr0_vgpr1 = COPY %1 @@ -58,24 +58,24 @@ body: | ; SI-LABEL: name: test_fsqrt_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: $vgpr0 = COPY [[ANYEXT]](s32) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_fsqrt_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_fsqrt_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 %2:_(s16) = G_FSQRT %1 @@ -91,25 +91,25 @@ body: | ; SI-LABEL: name: test_fsqrt_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] - ; SI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] + ; SI-NEXT: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_fsqrt_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] - ; VI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] + ; VI-NEXT: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_fsqrt_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] - ; GFX9: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] + ; GFX9-NEXT: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_FSQRT %0 $vgpr0_vgpr1 = COPY %1 @@ -123,28 +123,28 @@ body: | ; SI-LABEL: name: test_fsqrt_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] - ; SI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] - ; SI: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[UV2]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32), [[FSQRT2]](s32) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; SI-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] + ; SI-NEXT: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] + ; SI-NEXT: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[UV2]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32), [[FSQRT2]](s32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_fsqrt_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] - ; VI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] - ; VI: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[UV2]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32), [[FSQRT2]](s32) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] + ; VI-NEXT: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] + ; VI-NEXT: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[UV2]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32), [[FSQRT2]](s32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_fsqrt_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] - ; GFX9: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] - ; GFX9: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[UV2]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32), [[FSQRT2]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[UV]] + ; GFX9-NEXT: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[UV1]] + ; GFX9-NEXT: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[UV2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[FSQRT]](s32), [[FSQRT1]](s32), [[FSQRT2]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = G_FSQRT %0 $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -158,25 +158,25 @@ body: | ; SI-LABEL: name: test_fsqrt_v2s64 ; SI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; SI: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[UV]] - ; SI: [[FSQRT1:%[0-9]+]]:_(s64) = G_FSQRT [[UV1]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FSQRT]](s64), [[FSQRT1]](s64) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; SI-NEXT: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[UV]] + ; SI-NEXT: [[FSQRT1:%[0-9]+]]:_(s64) = G_FSQRT [[UV1]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FSQRT]](s64), [[FSQRT1]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; VI-LABEL: name: test_fsqrt_v2s64 ; VI: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; VI: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[UV]] - ; VI: [[FSQRT1:%[0-9]+]]:_(s64) = G_FSQRT [[UV1]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FSQRT]](s64), [[FSQRT1]](s64) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[UV]] + ; VI-NEXT: [[FSQRT1:%[0-9]+]]:_(s64) = G_FSQRT [[UV1]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FSQRT]](s64), [[FSQRT1]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_fsqrt_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[UV]] - ; GFX9: [[FSQRT1:%[0-9]+]]:_(s64) = G_FSQRT [[UV1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FSQRT]](s64), [[FSQRT1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s64) = G_FSQRT [[UV]] + ; GFX9-NEXT: [[FSQRT1:%[0-9]+]]:_(s64) = G_FSQRT [[UV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FSQRT]](s64), [[FSQRT1]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = G_FSQRT %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -190,51 +190,51 @@ body: | ; SI-LABEL: name: test_fsqrt_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT1]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT1]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT1]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; VI-LABEL: name: test_fsqrt_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] + ; VI-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: test_fsqrt_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] + ; GFX9-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = G_FSQRT %0 $vgpr0 = COPY %1 @@ -247,64 +247,64 @@ body: | ; SI-LABEL: name: test_fsqrt_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT1]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT1]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT2]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT2]](s32) - ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT1]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT2]](s32) + ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_fsqrt_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] - ; VI: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] + ; VI-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] + ; VI-NEXT: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_fsqrt_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] - ; GFX9: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] + ; GFX9-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] + ; GFX9-NEXT: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_FSQRT %0 %2:_(<3 x s32>) = G_ANYEXT %1 @@ -319,92 +319,92 @@ body: | ; SI-LABEL: name: test_fsqrt_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; SI: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]] - ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32) - ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; SI: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT1]] - ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT1]](s32) - ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; SI: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT2]] - ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT2]](s32) - ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; SI: [[FSQRT3:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT3]] - ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT3]](s32) - ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) - ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; SI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; SI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; SI-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; SI-NEXT: [[FSQRT:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT]] + ; SI-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT]](s32) + ; SI-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; SI-NEXT: [[FSQRT1:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT1]] + ; SI-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT1]](s32) + ; SI-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; SI-NEXT: [[FSQRT2:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT2]] + ; SI-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT2]](s32) + ; SI-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; SI-NEXT: [[FSQRT3:%[0-9]+]]:_(s32) = G_FSQRT [[FPEXT3]] + ; SI-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FSQRT3]](s32) + ; SI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16) + ; SI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_fsqrt_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; VI: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] - ; VI: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] - ; VI: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC3]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT2]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT3]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] + ; VI-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] + ; VI-NEXT: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] + ; VI-NEXT: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT2]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FSQRT3]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_fsqrt_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] - ; GFX9: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] - ; GFX9: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] - ; GFX9: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC3]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) - ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC]] + ; GFX9-NEXT: [[FSQRT1:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC1]] + ; GFX9-NEXT: [[FSQRT2:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC2]] + ; GFX9-NEXT: [[FSQRT3:%[0-9]+]]:_(s16) = G_FSQRT [[TRUNC3]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT2]](s16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FSQRT3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_FSQRT %0 $vgpr0_vgpr1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir index 6dbeab45e1cd..46980b399c4d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fsub.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fsub_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir index be6d4baccd1f..00eed0e2a64c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir @@ -9,7 +9,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s1 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: $vgpr0 = COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) %0:_(s1) = G_IMPLICIT_DEF %1:_(s32) = G_ANYEXT %0 $vgpr0 = COPY %1 @@ -22,7 +22,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s7 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: $vgpr0 = COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) %0:_(s7) = G_IMPLICIT_DEF %1:_(s32) = G_ANYEXT %0 $vgpr0 = COPY %1 @@ -35,7 +35,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s8 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: $vgpr0 = COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) %0:_(s8) = G_IMPLICIT_DEF %1:_(s32) = G_ANYEXT %0 $vgpr0 = COPY %1 @@ -48,7 +48,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s16 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: $vgpr0 = COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) %0:_(s16) = G_IMPLICIT_DEF %1:_(s32) = G_ANYEXT %0 $vgpr0 = COPY %1 @@ -61,7 +61,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s32 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: $vgpr0 = COPY [[DEF]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32) %0:_(s32) = G_IMPLICIT_DEF $vgpr0 = COPY %0 ... @@ -73,7 +73,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_48 ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64) %0:_(s48) = G_IMPLICIT_DEF %1:_(s64) = G_ANYEXT %0 $vgpr0_vgpr1 = COPY %1 @@ -86,7 +86,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s64 ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](s64) %0:_(s64) = G_IMPLICIT_DEF $vgpr0_vgpr1 = COPY %0 ... @@ -98,8 +98,8 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s65 ; CHECK: [[DEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[DEF]](s128) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[DEF]](s128) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96) %0:_(s65) = G_IMPLICIT_DEF %1:_(s96) = G_ANYEXT %0 $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -112,7 +112,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s128 ; CHECK: [[DEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](s128) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](s128) %0:_(s128) = G_IMPLICIT_DEF $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0 ... @@ -124,7 +124,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_256 ; CHECK: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[DEF]](s256) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[DEF]](s256) %0:_(s256) = G_IMPLICIT_DEF $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %0 ... @@ -136,8 +136,8 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s448 ; CHECK: [[DEF:%[0-9]+]]:_(s448) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s448), 0 - ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s448), 0 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) %0:_(s448) = G_IMPLICIT_DEF %1:_(s32) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -150,8 +150,8 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s512 ; CHECK: [[DEF:%[0-9]+]]:_(s512) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s512), 0 - ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s512), 0 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) %0:_(s512) = G_IMPLICIT_DEF %1:_(s32) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -164,8 +164,8 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s1024 ; CHECK: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s1024), 0 - ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](s1024), 0 + ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) %0:_(s1024) = G_IMPLICIT_DEF %1:_(s32) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -178,8 +178,8 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s1056 ; CHECK: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s1024) - ; CHECK: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s1024) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) %0:_(s1056) = G_IMPLICIT_DEF %1:_(s32) = G_TRUNC %0 $vgpr0 = COPY %1 @@ -192,8 +192,8 @@ body: | ; CHECK-LABEL: name: test_implicit_def_s2048 ; CHECK: [[DEF:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s1024) - ; CHECK: $vgpr0 = COPY [[TRUNC]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s1024) + ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32) %0:_(s2048) = G_IMPLICIT_DEF %1:_(s32) = G_TRUNC %0 $vgpr0 = COPY %1 @@ -206,7 +206,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v2s32 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](<2 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](<2 x s32>) %0:_(<2 x s32>) = G_IMPLICIT_DEF $vgpr0_vgpr1 = COPY %0 ... @@ -218,7 +218,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v3s32 ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[DEF]](<3 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[DEF]](<3 x s32>) %0:_(<3 x s32>) = G_IMPLICIT_DEF $vgpr0_vgpr1_vgpr2 = COPY %0 ... @@ -230,7 +230,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v4s32 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<4 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<4 x s32>) %0:_(<4 x s32>) = G_IMPLICIT_DEF $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0 ... @@ -242,7 +242,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v5s32 ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK: S_NOP 0, implicit [[DEF]](<5 x s32>) + ; CHECK-NEXT: S_NOP 0, implicit [[DEF]](<5 x s32>) %0:_(<5 x s32>) = G_IMPLICIT_DEF S_NOP 0, implicit %0 ... @@ -254,7 +254,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v6s32 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF - ; CHECK: S_NOP 0, implicit [[DEF]](<6 x s32>) + ; CHECK-NEXT: S_NOP 0, implicit [[DEF]](<6 x s32>) %0:_(<6 x s32>) = G_IMPLICIT_DEF S_NOP 0, implicit %0 ... @@ -266,7 +266,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v7s32 ; CHECK: [[DEF:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF - ; CHECK: S_NOP 0, implicit [[DEF]](<7 x s32>) + ; CHECK-NEXT: S_NOP 0, implicit [[DEF]](<7 x s32>) %0:_(<7 x s32>) = G_IMPLICIT_DEF S_NOP 0, implicit %0 ... @@ -278,7 +278,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v8s32 ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[DEF]](<8 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[DEF]](<8 x s32>) %0:_(<8 x s32>) = G_IMPLICIT_DEF $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %0 ... @@ -290,7 +290,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v16s32 ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[DEF]](<16 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[DEF]](<16 x s32>) %0:_(<16 x s32>) = G_IMPLICIT_DEF $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %0 ... @@ -302,7 +302,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v17s32 ; CHECK: [[DEF:%[0-9]+]]:_(<17 x s32>) = G_IMPLICIT_DEF - ; CHECK: S_NOP 0, implicit [[DEF]](<17 x s32>) + ; CHECK-NEXT: S_NOP 0, implicit [[DEF]](<17 x s32>) %0:_(<17 x s32>) = G_IMPLICIT_DEF S_NOP 0, implicit %0 ... @@ -314,7 +314,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v32s32 ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF - ; CHECK: S_NOP 0, implicit [[DEF]](<32 x s32>) + ; CHECK-NEXT: S_NOP 0, implicit [[DEF]](<32 x s32>) %0:_(<32 x s32>) = G_IMPLICIT_DEF S_NOP 0, implicit %0 ... @@ -327,11 +327,11 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v33s32 ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: G_STORE [[UV]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) - ; CHECK: G_STORE [[UV16]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) + ; CHECK-NEXT: G_STORE [[UV16]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) %0:_(<33 x s32>) = G_IMPLICIT_DEF %1:_(s32), %2:_(s32), %3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32), %10:_(s32), %11:_(s32), %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32), %16:_(s32), %17:_(s32), %18:_(s32), %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32), %23:_(s32), %24:_(s32), %25:_(s32), %26:_(s32), %27:_(s32), %28:_(s32), %29:_(s32), %30:_(s32), %31:_(s32), %32:_(s32), %33:_(s32) = G_UNMERGE_VALUES %0 %34:_(p1) = COPY $vgpr0_vgpr1 @@ -347,9 +347,9 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v64s32 ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>) - ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[DEF]](<16 x s32>) - ; CHECK: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x s32>), implicit [[CONCAT_VECTORS1]](<32 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[DEF]](<16 x s32>), [[DEF]](<16 x s32>) + ; CHECK-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x s32>), implicit [[CONCAT_VECTORS1]](<32 x s32>) %0:_(<64 x s32>) = G_IMPLICIT_DEF %1:_(<32 x s32>), %2:_(<32 x s32>) = G_UNMERGE_VALUES %0 S_NOP 0, implicit %0, implicit %1 @@ -362,7 +362,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v2s1 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](<2 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](<2 x s32>) %0:_(<2 x s1>) = G_IMPLICIT_DEF %1:_(<2 x s32>) = G_ANYEXT %0 $vgpr0_vgpr1 = COPY %1 @@ -375,7 +375,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v3s1 ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[DEF]](<3 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[DEF]](<3 x s32>) %0:_(<3 x s1>) = G_IMPLICIT_DEF %1:_(<3 x s32>) = G_ANYEXT %0 $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -388,7 +388,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v2s8 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](<2 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](<2 x s32>) %0:_(<2 x s8>) = G_IMPLICIT_DEF %1:_(<2 x s32>) = G_ANYEXT %0 $vgpr0_vgpr1 = COPY %1 @@ -401,9 +401,9 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v3s8 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s32>) = G_ANYEXT %0 $vgpr0_vgpr1_vgpr2 = COPY %1 @@ -416,7 +416,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v2s16 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0 = COPY [[DEF]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>) %0:_(<2 x s16>) = G_IMPLICIT_DEF $vgpr0 = COPY %0 ... @@ -428,11 +428,11 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v3s16 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<4 x s16>) = G_IMPLICIT_DEF %2:_(<4 x s16>) = G_INSERT %1, %0, 0 @@ -446,7 +446,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v4s16 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](<4 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](<4 x s16>) %0:_(<4 x s16>) = G_IMPLICIT_DEF $vgpr0_vgpr1 = COPY %0 ... @@ -458,12 +458,12 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v5s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CHECK: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF2]], [[UV]](<5 x s16>), 0 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<8 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF2]], [[UV]](<5 x s16>), 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<8 x s16>) = G_IMPLICIT_DEF %2:_(<8 x s16>) = G_INSERT %1, %0, 0 @@ -477,9 +477,9 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v6s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF1]], [[DEF]](<6 x s16>), 0 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<8 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF1]], [[DEF]](<6 x s16>), 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<8 x s16>) %0:_(<6 x s16>) = G_IMPLICIT_DEF %1:_(<8 x s16>) = G_IMPLICIT_DEF %2:_(<8 x s16>) = G_INSERT %1, %0, 0 @@ -493,7 +493,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v8s16 ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<8 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<8 x s16>) %0:_(<8 x s16>) = G_IMPLICIT_DEF $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0 ... @@ -505,7 +505,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v2s64 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<2 x s64>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DEF]](<2 x s64>) %0:_(<2 x s64>) = G_IMPLICIT_DEF $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %0 ... @@ -517,8 +517,8 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v4s8 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) - ; CHECK: $vgpr0 = COPY [[TRUNC]](<4 x s8>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[DEF]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](<4 x s8>) %0:_(<4 x s8>) = G_IMPLICIT_DEF $vgpr0 = COPY %0 ... @@ -530,7 +530,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_p0 ; CHECK: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](p0) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](p0) %0:_(p0) = G_IMPLICIT_DEF $vgpr0_vgpr1 = COPY %0 ... @@ -542,7 +542,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_p1 ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](p1) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](p1) %0:_(p1) = G_IMPLICIT_DEF $vgpr0_vgpr1 = COPY %0 ... @@ -554,7 +554,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_p2 ; CHECK: [[DEF:%[0-9]+]]:_(p2) = G_IMPLICIT_DEF - ; CHECK: $vgpr0 = COPY [[DEF]](p2) + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](p2) %0:_(p2) = G_IMPLICIT_DEF $vgpr0 = COPY %0 ... @@ -566,7 +566,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_p3 ; CHECK: [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF - ; CHECK: $vgpr0 = COPY [[DEF]](p3) + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](p3) %0:_(p3) = G_IMPLICIT_DEF $vgpr0 = COPY %0 ... @@ -578,7 +578,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_p4 ; CHECK: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](p4) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](p4) %0:_(p4) = G_IMPLICIT_DEF $vgpr0_vgpr1 = COPY %0 ... @@ -590,7 +590,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_p5 ; CHECK: [[DEF:%[0-9]+]]:_(p5) = G_IMPLICIT_DEF - ; CHECK: $vgpr0 = COPY [[DEF]](p5) + ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](p5) %0:_(p5) = G_IMPLICIT_DEF $vgpr0 = COPY %0 ... @@ -602,7 +602,7 @@ body: | ; CHECK-LABEL: name: test_implicit_def_p999 ; CHECK: [[DEF:%[0-9]+]]:_(p999) = G_IMPLICIT_DEF - ; CHECK: $vgpr0_vgpr1 = COPY [[DEF]](p999) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[DEF]](p999) %0:_(p999) = G_IMPLICIT_DEF $vgpr0_vgpr1 = COPY %0 @@ -615,8 +615,8 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v2s1024 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s1024>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s1024), [[UV1:%[0-9]+]]:_(s1024) = G_UNMERGE_VALUES [[DEF]](<2 x s1024>) - ; CHECK: S_ENDPGM 0, implicit [[UV]](s1024), implicit [[UV1]](s1024) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s1024), [[UV1:%[0-9]+]]:_(s1024) = G_UNMERGE_VALUES [[DEF]](<2 x s1024>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](s1024), implicit [[UV1]](s1024) %0:_(<2 x s1024>) = G_IMPLICIT_DEF %1:_(s1024), %2:_(s1024) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2 @@ -630,8 +630,8 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v3s1024 ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s1024>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s1024), [[UV1:%[0-9]+]]:_(s1024), [[UV2:%[0-9]+]]:_(s1024) = G_UNMERGE_VALUES [[DEF]](<3 x s1024>) - ; CHECK: S_ENDPGM 0, implicit [[UV]](s1024), implicit [[UV1]](s1024), implicit [[UV2]](s1024) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s1024), [[UV1:%[0-9]+]]:_(s1024), [[UV2:%[0-9]+]]:_(s1024) = G_UNMERGE_VALUES [[DEF]](<3 x s1024>) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[UV]](s1024), implicit [[UV1]](s1024), implicit [[UV2]](s1024) %0:_(<3 x s1024>) = G_IMPLICIT_DEF %1:_(s1024), %2:_(s1024), %3:_(s1024) = G_UNMERGE_VALUES %0 S_ENDPGM 0, implicit %1, implicit %2, implicit %3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir index 8a87eb1d0f68..1e6412fd8aa6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-intrinsic-round.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX6 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX8 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_intrinsic_round_s32 @@ -12,49 +12,49 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_s32 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] - ; GFX6: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] - ; GFX6: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX6: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX6: $vgpr0 = COPY [[FADD]](s32) + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX6-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX8-LABEL: name: test_intrinsic_round_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] - ; GFX8: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] - ; GFX8: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX8: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX8: $vgpr0 = COPY [[FADD]](s32) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] + ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX8-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX9-LABEL: name: test_intrinsic_round_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] - ; GFX9: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] - ; GFX9: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX9: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX9: $vgpr0 = COPY [[FADD]](s32) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[COPY]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_INTRINSIC_ROUND %0 $vgpr0 = COPY %1 @@ -68,49 +68,49 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_s32_flags ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = nsz G_INTRINSIC_TRUNC [[COPY]] - ; GFX6: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] - ; GFX6: [[FABS:%[0-9]+]]:_(s32) = nsz G_FABS [[FSUB]] - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX6: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = nsz G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX6: [[FADD:%[0-9]+]]:_(s32) = nsz G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX6: $vgpr0 = COPY [[FADD]](s32) + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = nsz G_INTRINSIC_TRUNC [[COPY]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s32) = nsz G_FABS [[FSUB]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nsz G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = nsz G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX6-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX8-LABEL: name: test_intrinsic_round_s32_flags ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = nsz G_INTRINSIC_TRUNC [[COPY]] - ; GFX8: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] - ; GFX8: [[FABS:%[0-9]+]]:_(s32) = nsz G_FABS [[FSUB]] - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX8: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = nsz G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX8: [[FADD:%[0-9]+]]:_(s32) = nsz G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX8: $vgpr0 = COPY [[FADD]](s32) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = nsz G_INTRINSIC_TRUNC [[COPY]] + ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s32) = nsz G_FABS [[FSUB]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nsz G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = nsz G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX8-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX9-LABEL: name: test_intrinsic_round_s32_flags ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = nsz G_INTRINSIC_TRUNC [[COPY]] - ; GFX9: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] - ; GFX9: [[FABS:%[0-9]+]]:_(s32) = nsz G_FABS [[FSUB]] - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] - ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX9: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = nsz G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX9: [[FADD:%[0-9]+]]:_(s32) = nsz G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX9: $vgpr0 = COPY [[FADD]](s32) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = nsz G_INTRINSIC_TRUNC [[COPY]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = nsz G_FSUB [[COPY]], [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s32) = nsz G_FABS [[FSUB]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C2]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = nsz G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = nsz G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = nsz G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = nsz G_INTRINSIC_ROUND %0 $vgpr0 = COPY %1 @@ -124,71 +124,71 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_s64 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV1]](s32), [[C]](s32), [[C1]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C2]] - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] - ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4503599627370495 - ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB]](s32) - ; GFX6: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[C6]] - ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[XOR]] - ; GFX6: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C5]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C7]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[MV]], [[AND1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[COPY]], [[SELECT]] - ; GFX6: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT1]] - ; GFX6: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] - ; GFX6: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] - ; GFX6: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX6: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GFX6: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 - ; GFX6: [[AND2:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C10]] - ; GFX6: [[OR:%[0-9]+]]:_(s64) = G_OR [[C11]], [[AND2]] - ; GFX6: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C9]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C8]] - ; GFX6: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[SELECT1]], [[SELECT2]] - ; GFX6: $vgpr0_vgpr1 = COPY [[FADD1]](s64) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV1]](s32), [[C]](s32), [[C1]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C3]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4503599627370495 + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB]](s32) + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[C6]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[XOR]] + ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C5]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C7]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[MV]], [[AND1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[COPY]], [[SELECT]] + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT1]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] + ; GFX6-NEXT: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; GFX6-NEXT: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 + ; GFX6-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C10]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[C11]], [[AND2]] + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C9]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C8]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[SELECT1]], [[SELECT2]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[FADD1]](s64) ; GFX8-LABEL: name: test_intrinsic_round_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[COPY]] - ; GFX8: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX8: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] - ; GFX8: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX8: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 - ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C2]] - ; GFX8: [[OR:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND]] - ; GFX8: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C1]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX8: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX8: $vgpr0_vgpr1 = COPY [[FADD1]](s64) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[COPY]] + ; GFX8-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C2]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND]] + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[FADD1]](s64) ; GFX9-LABEL: name: test_intrinsic_round_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[COPY]] - ; GFX9: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX9: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] - ; GFX9: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 - ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C2]] - ; GFX9: [[OR:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND]] - ; GFX9: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX9: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX9: $vgpr0_vgpr1 = COPY [[FADD1]](s64) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[COPY]] + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[COPY]], [[FNEG]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C2]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND]] + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[FADD1]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = G_INTRINSIC_ROUND %0 $vgpr0_vgpr1 = COPY %1 @@ -202,79 +202,79 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_v2s32 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] - ; GFX6: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[INTRINSIC_TRUNC]] - ; GFX6: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX6: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX6: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] - ; GFX6: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[INTRINSIC_TRUNC1]] - ; GFX6: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[FSUB1]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND1]] - ; GFX6: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s32), [[C1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C]] - ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) - ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] + ; GFX6-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[INTRINSIC_TRUNC]] + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] + ; GFX6-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[INTRINSIC_TRUNC1]] + ; GFX6-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[FSUB1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND1]] + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s32), [[C1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: test_intrinsic_round_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] - ; GFX8: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[INTRINSIC_TRUNC]] - ; GFX8: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX8: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX8: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] - ; GFX8: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[INTRINSIC_TRUNC1]] - ; GFX8: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[FSUB1]] - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND1]] - ; GFX8: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s32), [[C1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C]] - ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] + ; GFX8-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] + ; GFX8-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[INTRINSIC_TRUNC1]] + ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[FSUB1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND1]] + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s32), [[C1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_intrinsic_round_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] - ; GFX9: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[INTRINSIC_TRUNC]] - ; GFX9: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] - ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] - ; GFX9: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] - ; GFX9: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[INTRINSIC_TRUNC1]] - ; GFX9: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[FSUB1]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] - ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND1]] - ; GFX9: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s32), [[C1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C]] - ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV]] + ; GFX9-NEXT: [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[UV]], [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s32) = G_FABS [[FSUB]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e-01 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1065353216 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C2]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND]] + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s32), [[C1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[UV1]] + ; GFX9-NEXT: [[FSUB1:%[0-9]+]]:_(s32) = G_FSUB [[UV1]], [[INTRINSIC_TRUNC1]] + ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s32) = G_FABS [[FSUB1]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C2]] + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C3]], [[AND1]] + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s32), [[C1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FADD]](s32), [[FADD1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = G_INTRINSIC_ROUND %0 $vgpr0_vgpr1 = COPY %1 @@ -288,115 +288,115 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_v2s64 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 - ; GFX6: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV3]](s32), [[C]](s32), [[C1]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C2]] - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]] - ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4503599627370495 - ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB]](s32) - ; GFX6: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[C6]] - ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV]], [[XOR]] - ; GFX6: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C5]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C7]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[MV]], [[AND1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; GFX6: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT1]] - ; GFX6: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]] - ; GFX6: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] - ; GFX6: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX6: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GFX6: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 - ; GFX6: [[AND2:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C10]] - ; GFX6: [[OR:%[0-9]+]]:_(s64) = G_OR [[C11]], [[AND2]] - ; GFX6: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C9]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C8]] - ; GFX6: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[SELECT1]], [[SELECT2]] - ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV5]](s32), [[C]](s32), [[C1]](s32) - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[INT1]], [[C2]] - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]] - ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND3]](s32) - ; GFX6: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB1]](s32) - ; GFX6: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[ASHR1]], [[C6]] - ; GFX6: [[AND4:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[XOR1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB1]](s32), [[C5]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB1]](s32), [[C7]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[MV1]], [[AND4]] - ; GFX6: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV1]], [[SELECT3]] - ; GFX6: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[SELECT4]] - ; GFX6: [[FADD2:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] - ; GFX6: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[FADD2]] - ; GFX6: [[AND5:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C10]] - ; GFX6: [[OR1:%[0-9]+]]:_(s64) = G_OR [[C11]], [[AND5]] - ; GFX6: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s64), [[C9]] - ; GFX6: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C8]] - ; GFX6: [[FADD3:%[0-9]+]]:_(s64) = G_FADD [[SELECT4]], [[SELECT5]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD1]](s64), [[FADD3]](s64) - ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 11 + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV3]](s32), [[C]](s32), [[C1]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1023 + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C2]] + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C3]] + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4503599627370495 + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB]](s32) + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[C6]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV]], [[XOR]] + ; GFX6-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 51 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[C5]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB]](s32), [[C7]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[MV]], [[AND1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[SELECT1]] + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]] + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] + ; GFX6-NEXT: [[C8:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; GFX6-NEXT: [[C9:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 + ; GFX6-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C10]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[C11]], [[AND2]] + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C9]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C8]] + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[SELECT1]], [[SELECT2]] + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.ubfe), [[UV5]](s32), [[C]](s32), [[C1]](s32) + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[INT1]], [[C2]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C3]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C5]](s32), [[AND3]](s32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[C4]], [[SUB1]](s32) + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[ASHR1]], [[C6]] + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[XOR1]] + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB1]](s32), [[C5]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SUB1]](s32), [[C7]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[MV1]], [[AND4]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV1]], [[SELECT3]] + ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[SELECT4]] + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] + ; GFX6-NEXT: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[FADD2]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C10]] + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[C11]], [[AND5]] + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s64), [[C9]] + ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C8]] + ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(s64) = G_FADD [[SELECT4]], [[SELECT5]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD1]](s64), [[FADD3]](s64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX8-LABEL: name: test_intrinsic_round_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV]] - ; GFX8: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX8: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]] - ; GFX8: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX8: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 - ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C2]] - ; GFX8: [[OR:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND]] - ; GFX8: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C1]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX8: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX8: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV1]] - ; GFX8: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC1]] - ; GFX8: [[FADD2:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] - ; GFX8: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[FADD2]] - ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C2]] - ; GFX8: [[OR1:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND1]] - ; GFX8: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s64), [[C1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C]] - ; GFX8: [[FADD3:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD1]](s64), [[FADD3]](s64) - ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV]] + ; GFX8-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C2]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND]] + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV1]] + ; GFX8-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC1]] + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] + ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[FADD2]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C2]] + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND1]] + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s64), [[C1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C]] + ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD1]](s64), [[FADD3]](s64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_intrinsic_round_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV]] - ; GFX9: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX9: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]] - ; GFX9: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 - ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C2]] - ; GFX9: [[OR:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND]] - ; GFX9: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX9: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV1]] - ; GFX9: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC1]] - ; GFX9: [[FADD2:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] - ; GFX9: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[FADD2]] - ; GFX9: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C2]] - ; GFX9: [[OR1:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND1]] - ; GFX9: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s64), [[C1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C]] - ; GFX9: [[FADD3:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD1]](s64), [[FADD3]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV]] + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[UV]], [[FNEG]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s64) = G_FABS [[FADD]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 5.000000e-01 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4607182418800017408 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C2]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND]] + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s64), [[C1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s64) = G_INTRINSIC_TRUNC [[UV1]] + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s64) = G_FNEG [[INTRINSIC_TRUNC1]] + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s64) = G_FADD [[UV1]], [[FNEG1]] + ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s64) = G_FABS [[FADD2]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C2]] + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[C3]], [[AND1]] + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s64), [[C1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C]] + ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s64) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[FADD1]](s64), [[FADD3]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = G_INTRINSIC_ROUND %0 $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1 @@ -410,68 +410,68 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_s16 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] - ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) - ; GFX6: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC]] - ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) - ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] - ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; GFX6: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC1]] - ; GFX6: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX6: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX6: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX6: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[C3]], [[AND]] - ; GFX6: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FABS]](s16) - ; GFX6: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[C1]](s16) - ; GFX6: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT3]](s32), [[FPEXT4]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX6: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; GFX6: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT]](s16) - ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT5]], [[FPEXT6]] - ; GFX6: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) - ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC]] + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC1]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C3]], [[AND]] + ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FABS]](s16) + ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[C1]](s16) + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT3]](s32), [[FPEXT4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) + ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT]](s16) + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT5]], [[FPEXT6]] + ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: test_intrinsic_round_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX8: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX8: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] - ; GFX8: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[C3]], [[AND]] - ; GFX8: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C1]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX8: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] + ; GFX8-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C3]], [[AND]] + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C1]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_intrinsic_round_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX9: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX9: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] - ; GFX9: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[C3]], [[AND]] - ; GFX9: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] - ; GFX9: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C3]], [[AND]] + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0 %2:_(s16) = G_INTRINSIC_ROUND %1 @@ -487,127 +487,127 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] - ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) - ; GFX6: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC]] - ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) - ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] - ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; GFX6: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC1]] - ; GFX6: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX6: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX6: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX6: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] - ; GFX6: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FABS]](s16) - ; GFX6: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) - ; GFX6: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT3]](s32), [[FPEXT4]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] - ; GFX6: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; GFX6: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT]](s16) - ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT5]], [[FPEXT6]] - ; GFX6: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; GFX6: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT7]] - ; GFX6: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](s32) - ; GFX6: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC3]] - ; GFX6: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG1]](s16) - ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT8]], [[FPEXT9]] - ; GFX6: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; GFX6: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC4]] - ; GFX6: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX6: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] - ; GFX6: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[FABS1]](s16) - ; GFX6: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) - ; GFX6: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT10]](s32), [[FPEXT11]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] - ; GFX6: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC3]](s16) - ; GFX6: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT1]](s16) - ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT12]], [[FPEXT13]] - ; GFX6: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX6: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC]] + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC1]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] + ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FABS]](s16) + ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT3]](s32), [[FPEXT4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] + ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) + ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT]](s16) + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT5]], [[FPEXT6]] + ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) + ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT7]] + ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](s32) + ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC3]] + ; GFX6-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG1]](s16) + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT8]], [[FPEXT9]] + ; GFX6-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) + ; GFX6-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] + ; GFX6-NEXT: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[FABS1]](s16) + ; GFX6-NEXT: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT10]](s32), [[FPEXT11]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] + ; GFX6-NEXT: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC3]](s16) + ; GFX6-NEXT: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT1]](s16) + ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT12]], [[FPEXT13]] + ; GFX6-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX8-LABEL: name: test_intrinsic_round_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX8: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX8: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] - ; GFX8: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] - ; GFX8: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] - ; GFX8: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX8: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] - ; GFX8: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] - ; GFX8: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] - ; GFX8: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX8: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] - ; GFX8: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] - ; GFX8: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] + ; GFX8-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] + ; GFX8-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] + ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] + ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; GFX9-LABEL: name: test_intrinsic_round_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX9: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX9: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] - ; GFX9: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] - ; GFX9: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] - ; GFX9: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] - ; GFX9: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] - ; GFX9: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] - ; GFX9: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] - ; GFX9: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] - ; GFX9: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] + ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] + ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = G_INTRINSIC_ROUND %0 $vgpr0 = COPY %1 @@ -620,216 +620,216 @@ body: | liveins: $vgpr0_vgpr1_vgpr2 ; GFX6-LABEL: name: test_intrinsic_round_v3s16 ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] - ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) - ; GFX6: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC]] - ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) - ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] - ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; GFX6: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC1]] - ; GFX6: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX6: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX6: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX6: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] - ; GFX6: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FABS]](s16) - ; GFX6: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) - ; GFX6: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT3]](s32), [[FPEXT4]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] - ; GFX6: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; GFX6: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT]](s16) - ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT5]], [[FPEXT6]] - ; GFX6: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; GFX6: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT7]] - ; GFX6: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](s32) - ; GFX6: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC3]] - ; GFX6: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG1]](s16) - ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT8]], [[FPEXT9]] - ; GFX6: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; GFX6: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC4]] - ; GFX6: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX6: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] - ; GFX6: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[FABS1]](s16) - ; GFX6: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) - ; GFX6: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT10]](s32), [[FPEXT11]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] - ; GFX6: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC3]](s16) - ; GFX6: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT1]](s16) - ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT12]], [[FPEXT13]] - ; GFX6: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; GFX6: [[FPEXT14:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX6: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT14]] - ; GFX6: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC2]](s32) - ; GFX6: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC6]] - ; GFX6: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX6: [[FPEXT16:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG2]](s16) - ; GFX6: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FPEXT15]], [[FPEXT16]] - ; GFX6: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD4]](s32) - ; GFX6: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC7]] - ; GFX6: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX6: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] - ; GFX6: [[FPEXT17:%[0-9]+]]:_(s32) = G_FPEXT [[FABS2]](s16) - ; GFX6: [[FPEXT18:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) - ; GFX6: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT17]](s32), [[FPEXT18]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] - ; GFX6: [[FPEXT19:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC6]](s16) - ; GFX6: [[FPEXT20:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT2]](s16) - ; GFX6: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FPEXT19]], [[FPEXT20]] - ; GFX6: [[FPTRUNC8:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD5]](s32) - ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX6: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC8]](s16) - ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C5]] - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C5]] - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC]] + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC1]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] + ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FABS]](s16) + ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT3]](s32), [[FPEXT4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] + ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) + ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT]](s16) + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT5]], [[FPEXT6]] + ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) + ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT7]] + ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](s32) + ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC3]] + ; GFX6-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG1]](s16) + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT8]], [[FPEXT9]] + ; GFX6-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) + ; GFX6-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] + ; GFX6-NEXT: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[FABS1]](s16) + ; GFX6-NEXT: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT10]](s32), [[FPEXT11]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] + ; GFX6-NEXT: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC3]](s16) + ; GFX6-NEXT: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT1]](s16) + ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT12]], [[FPEXT13]] + ; GFX6-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) + ; GFX6-NEXT: [[FPEXT14:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT14]] + ; GFX6-NEXT: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC2]](s32) + ; GFX6-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC6]] + ; GFX6-NEXT: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX6-NEXT: [[FPEXT16:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG2]](s16) + ; GFX6-NEXT: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FPEXT15]], [[FPEXT16]] + ; GFX6-NEXT: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD4]](s32) + ; GFX6-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC7]] + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] + ; GFX6-NEXT: [[FPEXT17:%[0-9]+]]:_(s32) = G_FPEXT [[FABS2]](s16) + ; GFX6-NEXT: [[FPEXT18:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) + ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT17]](s32), [[FPEXT18]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] + ; GFX6-NEXT: [[FPEXT19:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC6]](s16) + ; GFX6-NEXT: [[FPEXT20:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT2]](s16) + ; GFX6-NEXT: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FPEXT19]], [[FPEXT20]] + ; GFX6-NEXT: [[FPTRUNC8:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD5]](s32) + ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX6-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC8]](s16) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C5]] + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C5]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: test_intrinsic_round_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX8: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX8: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] - ; GFX8: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] - ; GFX8: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] - ; GFX8: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX8: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] - ; GFX8: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] - ; GFX8: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] - ; GFX8: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX8: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] - ; GFX8: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] - ; GFX8: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX8: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]] - ; GFX8: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC2]] - ; GFX8: [[FADD4:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] - ; GFX8: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FADD4]] - ; GFX8: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX8: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] - ; GFX8: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C2]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] - ; GFX8: [[FADD5:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]] - ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD5]](s16) - ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C5]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX8: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] - ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C5]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX8: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; GFX8: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] + ; GFX8-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] + ; GFX8-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] + ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] + ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]] + ; GFX8-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC2]] + ; GFX8-NEXT: [[FADD4:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] + ; GFX8-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FADD4]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] + ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C2]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] + ; GFX8-NEXT: [[FADD5:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]] + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD5]](s16) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C5]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX8-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C5]] + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C5]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX8-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_intrinsic_round_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX9: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX9: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] - ; GFX9: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] - ; GFX9: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] - ; GFX9: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] - ; GFX9: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] - ; GFX9: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] - ; GFX9: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] - ; GFX9: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] - ; GFX9: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX9: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]] - ; GFX9: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC2]] - ; GFX9: [[FADD4:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] - ; GFX9: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FADD4]] - ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] - ; GFX9: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C2]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] - ; GFX9: [[FADD5:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]] - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD5]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] + ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] + ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]] + ; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC2]] + ; GFX9-NEXT: [[FADD4:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] + ; GFX9-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FADD4]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] + ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C2]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] + ; GFX9-NEXT: [[FADD5:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]] + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD5]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 %3:_(<3 x s16>) = G_INTRINSIC_ROUND %1 @@ -846,232 +846,232 @@ body: | ; GFX6-LABEL: name: test_intrinsic_round_v4s16 ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX6: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] - ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) - ; GFX6: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC]] - ; GFX6: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) - ; GFX6: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) - ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] - ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) - ; GFX6: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC1]] - ; GFX6: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX6: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX6: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX6: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] - ; GFX6: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FABS]](s16) - ; GFX6: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) - ; GFX6: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT3]](s32), [[FPEXT4]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] - ; GFX6: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) - ; GFX6: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT]](s16) - ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT5]], [[FPEXT6]] - ; GFX6: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) - ; GFX6: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT7]] - ; GFX6: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](s32) - ; GFX6: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC3]] - ; GFX6: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) - ; GFX6: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG1]](s16) - ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT8]], [[FPEXT9]] - ; GFX6: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) - ; GFX6: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC4]] - ; GFX6: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX6: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] - ; GFX6: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[FABS1]](s16) - ; GFX6: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) - ; GFX6: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT10]](s32), [[FPEXT11]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] - ; GFX6: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC3]](s16) - ; GFX6: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT1]](s16) - ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT12]], [[FPEXT13]] - ; GFX6: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) - ; GFX6: [[FPEXT14:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX6: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT14]] - ; GFX6: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC2]](s32) - ; GFX6: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC6]] - ; GFX6: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) - ; GFX6: [[FPEXT16:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG2]](s16) - ; GFX6: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FPEXT15]], [[FPEXT16]] - ; GFX6: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD4]](s32) - ; GFX6: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC7]] - ; GFX6: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX6: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] - ; GFX6: [[FPEXT17:%[0-9]+]]:_(s32) = G_FPEXT [[FABS2]](s16) - ; GFX6: [[FPEXT18:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) - ; GFX6: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT17]](s32), [[FPEXT18]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] - ; GFX6: [[FPEXT19:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC6]](s16) - ; GFX6: [[FPEXT20:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT2]](s16) - ; GFX6: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FPEXT19]], [[FPEXT20]] - ; GFX6: [[FPTRUNC8:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD5]](s32) - ; GFX6: [[FPEXT21:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX6: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT21]] - ; GFX6: [[FPTRUNC9:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC3]](s32) - ; GFX6: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC9]] - ; GFX6: [[FPEXT22:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) - ; GFX6: [[FPEXT23:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG3]](s16) - ; GFX6: [[FADD6:%[0-9]+]]:_(s32) = G_FADD [[FPEXT22]], [[FPEXT23]] - ; GFX6: [[FPTRUNC10:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD6]](s32) - ; GFX6: [[FABS3:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC10]] - ; GFX6: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX6: [[OR3:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND3]] - ; GFX6: [[FPEXT24:%[0-9]+]]:_(s32) = G_FPEXT [[FABS3]](s16) - ; GFX6: [[FPEXT25:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) - ; GFX6: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT24]](s32), [[FPEXT25]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[FCMP3]](s1), [[OR3]], [[C1]] - ; GFX6: [[FPEXT26:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC9]](s16) - ; GFX6: [[FPEXT27:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT3]](s16) - ; GFX6: [[FADD7:%[0-9]+]]:_(s32) = G_FADD [[FPEXT26]], [[FPEXT27]] - ; GFX6: [[FPTRUNC11:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD7]](s32) - ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) - ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX6: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC8]](s16) - ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC11]](s16) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX6: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX6: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX6-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT]] + ; GFX6-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC]](s32) + ; GFX6-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC]] + ; GFX6-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX6-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG]](s16) + ; GFX6-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FPEXT1]], [[FPEXT2]] + ; GFX6-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD]](s32) + ; GFX6-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC1]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] + ; GFX6-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[FABS]](s16) + ; GFX6-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) + ; GFX6-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT3]](s32), [[FPEXT4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] + ; GFX6-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC]](s16) + ; GFX6-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT]](s16) + ; GFX6-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FPEXT5]], [[FPEXT6]] + ; GFX6-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD1]](s32) + ; GFX6-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT7]] + ; GFX6-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC1]](s32) + ; GFX6-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC3]] + ; GFX6-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX6-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG1]](s16) + ; GFX6-NEXT: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FPEXT8]], [[FPEXT9]] + ; GFX6-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD2]](s32) + ; GFX6-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] + ; GFX6-NEXT: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[FABS1]](s16) + ; GFX6-NEXT: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) + ; GFX6-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT10]](s32), [[FPEXT11]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] + ; GFX6-NEXT: [[FPEXT12:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC3]](s16) + ; GFX6-NEXT: [[FPEXT13:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT1]](s16) + ; GFX6-NEXT: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FPEXT12]], [[FPEXT13]] + ; GFX6-NEXT: [[FPTRUNC5:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD3]](s32) + ; GFX6-NEXT: [[FPEXT14:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT14]] + ; GFX6-NEXT: [[FPTRUNC6:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC2]](s32) + ; GFX6-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC6]] + ; GFX6-NEXT: [[FPEXT15:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX6-NEXT: [[FPEXT16:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG2]](s16) + ; GFX6-NEXT: [[FADD4:%[0-9]+]]:_(s32) = G_FADD [[FPEXT15]], [[FPEXT16]] + ; GFX6-NEXT: [[FPTRUNC7:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD4]](s32) + ; GFX6-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC7]] + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] + ; GFX6-NEXT: [[FPEXT17:%[0-9]+]]:_(s32) = G_FPEXT [[FABS2]](s16) + ; GFX6-NEXT: [[FPEXT18:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) + ; GFX6-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT17]](s32), [[FPEXT18]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] + ; GFX6-NEXT: [[FPEXT19:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC6]](s16) + ; GFX6-NEXT: [[FPEXT20:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT2]](s16) + ; GFX6-NEXT: [[FADD5:%[0-9]+]]:_(s32) = G_FADD [[FPEXT19]], [[FPEXT20]] + ; GFX6-NEXT: [[FPTRUNC8:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD5]](s32) + ; GFX6-NEXT: [[FPEXT21:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX6-NEXT: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FPEXT21]] + ; GFX6-NEXT: [[FPTRUNC9:%[0-9]+]]:_(s16) = G_FPTRUNC [[INTRINSIC_TRUNC3]](s32) + ; GFX6-NEXT: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[FPTRUNC9]] + ; GFX6-NEXT: [[FPEXT22:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX6-NEXT: [[FPEXT23:%[0-9]+]]:_(s32) = G_FPEXT [[FNEG3]](s16) + ; GFX6-NEXT: [[FADD6:%[0-9]+]]:_(s32) = G_FADD [[FPEXT22]], [[FPEXT23]] + ; GFX6-NEXT: [[FPTRUNC10:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD6]](s32) + ; GFX6-NEXT: [[FABS3:%[0-9]+]]:_(s16) = G_FABS [[FPTRUNC10]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] + ; GFX6-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND3]] + ; GFX6-NEXT: [[FPEXT24:%[0-9]+]]:_(s32) = G_FPEXT [[FABS3]](s16) + ; GFX6-NEXT: [[FPEXT25:%[0-9]+]]:_(s32) = G_FPEXT [[C2]](s16) + ; GFX6-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FPEXT24]](s32), [[FPEXT25]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[FCMP3]](s1), [[OR3]], [[C1]] + ; GFX6-NEXT: [[FPEXT26:%[0-9]+]]:_(s32) = G_FPEXT [[FPTRUNC9]](s16) + ; GFX6-NEXT: [[FPEXT27:%[0-9]+]]:_(s32) = G_FPEXT [[SELECT3]](s16) + ; GFX6-NEXT: [[FADD7:%[0-9]+]]:_(s32) = G_FADD [[FPEXT26]], [[FPEXT27]] + ; GFX6-NEXT: [[FPTRUNC11:%[0-9]+]]:_(s16) = G_FPTRUNC [[FADD7]](s32) + ; GFX6-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC2]](s16) + ; GFX6-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC5]](s16) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX6-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX6-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC8]](s16) + ; GFX6-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC11]](s16) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX6-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX8-LABEL: name: test_intrinsic_round_v4s16 ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX8: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX8: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] - ; GFX8: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] - ; GFX8: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] - ; GFX8: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX8: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] - ; GFX8: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] - ; GFX8: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] - ; GFX8: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX8: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] - ; GFX8: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] - ; GFX8: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX8: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]] - ; GFX8: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC2]] - ; GFX8: [[FADD4:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] - ; GFX8: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FADD4]] - ; GFX8: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX8: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] - ; GFX8: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C2]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] - ; GFX8: [[FADD5:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]] - ; GFX8: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC3]] - ; GFX8: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC3]] - ; GFX8: [[FADD6:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[FNEG3]] - ; GFX8: [[FABS3:%[0-9]+]]:_(s16) = G_FABS [[FADD6]] - ; GFX8: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX8: [[OR3:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND3]] - ; GFX8: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS3]](s16), [[C2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[FCMP3]](s1), [[OR3]], [[C1]] - ; GFX8: [[FADD7:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC3]], [[SELECT3]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD5]](s16) - ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FADD7]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] + ; GFX8-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX8-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] + ; GFX8-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] + ; GFX8-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] + ; GFX8-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] + ; GFX8-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] + ; GFX8-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] + ; GFX8-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] + ; GFX8-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] + ; GFX8-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]] + ; GFX8-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC2]] + ; GFX8-NEXT: [[FADD4:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] + ; GFX8-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FADD4]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] + ; GFX8-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C2]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] + ; GFX8-NEXT: [[FADD5:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]] + ; GFX8-NEXT: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC3]] + ; GFX8-NEXT: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC3]] + ; GFX8-NEXT: [[FADD6:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[FNEG3]] + ; GFX8-NEXT: [[FABS3:%[0-9]+]]:_(s16) = G_FABS [[FADD6]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] + ; GFX8-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND3]] + ; GFX8-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS3]](s16), [[C2]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[FCMP3]](s1), [[OR3]], [[C1]] + ; GFX8-NEXT: [[FADD7:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC3]], [[SELECT3]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FADD1]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FADD3]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FADD5]](s16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FADD7]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX8-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_intrinsic_round_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] - ; GFX9: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] - ; GFX9: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] - ; GFX9: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] - ; GFX9: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] - ; GFX9: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] - ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] - ; GFX9: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] - ; GFX9: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] - ; GFX9: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] - ; GFX9: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] - ; GFX9: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] - ; GFX9: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]] - ; GFX9: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC2]] - ; GFX9: [[FADD4:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] - ; GFX9: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FADD4]] - ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX9: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] - ; GFX9: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C2]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] - ; GFX9: [[FADD5:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]] - ; GFX9: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC3]] - ; GFX9: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC3]] - ; GFX9: [[FADD6:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[FNEG3]] - ; GFX9: [[FABS3:%[0-9]+]]:_(s16) = G_FABS [[FADD6]] - ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX9: [[OR3:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND3]] - ; GFX9: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS3]](s16), [[C2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[FCMP3]](s1), [[OR3]], [[C1]] - ; GFX9: [[FADD7:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC3]], [[SELECT3]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD5]](s16) - ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD7]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC]] + ; GFX9-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[TRUNC]], [[FNEG]] + ; GFX9-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[FADD]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH3800 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 15360 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND]] + ; GFX9-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS]](s16), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[FCMP]](s1), [[OR]], [[C1]] + ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC]], [[SELECT]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC1]] + ; GFX9-NEXT: [[FNEG1:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC1]] + ; GFX9-NEXT: [[FADD2:%[0-9]+]]:_(s16) = G_FADD [[TRUNC1]], [[FNEG1]] + ; GFX9-NEXT: [[FABS1:%[0-9]+]]:_(s16) = G_FABS [[FADD2]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND1]] + ; GFX9-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS1]](s16), [[C2]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[FCMP1]](s1), [[OR1]], [[C1]] + ; GFX9-NEXT: [[FADD3:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC1]], [[SELECT1]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC2:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC2]] + ; GFX9-NEXT: [[FNEG2:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC2]] + ; GFX9-NEXT: [[FADD4:%[0-9]+]]:_(s16) = G_FADD [[TRUNC2]], [[FNEG2]] + ; GFX9-NEXT: [[FABS2:%[0-9]+]]:_(s16) = G_FABS [[FADD4]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND2]] + ; GFX9-NEXT: [[FCMP2:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS2]](s16), [[C2]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[FCMP2]](s1), [[OR2]], [[C1]] + ; GFX9-NEXT: [[FADD5:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC2]], [[SELECT2]] + ; GFX9-NEXT: [[INTRINSIC_TRUNC3:%[0-9]+]]:_(s16) = G_INTRINSIC_TRUNC [[TRUNC3]] + ; GFX9-NEXT: [[FNEG3:%[0-9]+]]:_(s16) = G_FNEG [[INTRINSIC_TRUNC3]] + ; GFX9-NEXT: [[FADD6:%[0-9]+]]:_(s16) = G_FADD [[TRUNC3]], [[FNEG3]] + ; GFX9-NEXT: [[FABS3:%[0-9]+]]:_(s16) = G_FABS [[FADD6]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] + ; GFX9-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[C4]], [[AND3]] + ; GFX9-NEXT: [[FCMP3:%[0-9]+]]:_(s1) = G_FCMP floatpred(oge), [[FABS3]](s16), [[C2]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[FCMP3]](s1), [[OR3]], [[C1]] + ; GFX9-NEXT: [[FADD7:%[0-9]+]]:_(s16) = G_FADD [[INTRINSIC_TRUNC3]], [[SELECT3]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD1]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD5]](s16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD7]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = G_INTRINSIC_ROUND %0 $vgpr0_vgpr1 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-inttoptr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-inttoptr.mir index 5d037703572a..eb5a4f959602 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-inttoptr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-inttoptr.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -O0 -run-pass=legalizer -o - %s | FileCheck %s --- name: test_inttoptr_s64_to_p0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll index c0bfde9e89a5..e814e66d5d82 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll @@ -1,46 +1,48 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=PACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -o - %s | FileCheck -check-prefix=UNPACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -o - %s | FileCheck -check-prefix=PACKED %s define amdgpu_ps half @image_load_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16) - ; UNPACKED: $vgpr0 = COPY [[ANYEXT]](s32) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16) + ; UNPACKED-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_f16 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16) - ; PACKED: $vgpr0 = COPY [[ANYEXT]](s32) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s16) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AMDGPU_INTRIN_IMAGE_LOAD]](s16) + ; PACKED-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret half %tex } @@ -48,48 +50,50 @@ define amdgpu_ps half @image_load_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { define amdgpu_ps <2 x half> @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v2f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_v2f16 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x half> %tex } @@ -97,70 +101,72 @@ define amdgpu_ps <2 x half> @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v3f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C1]] - ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C1]] + ; UNPACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) - ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) - ; PACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; PACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex } @@ -168,56 +174,58 @@ define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 define amdgpu_ps <4 x half> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v4f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; UNPACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v4f16 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) - ; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) + ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex } @@ -225,46 +233,48 @@ define amdgpu_ps <4 x half> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 define amdgpu_ps half @image_load_tfe_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: $vgpr0 = COPY [[UV]](s32) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](s32) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_f16 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: $vgpr0 = COPY [[UV]](s32) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: $vgpr0 = COPY [[UV]](s32) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { half, i32 } %res, 0 %tfe = extractvalue { half, i32 } %res, 1 @@ -275,54 +285,56 @@ define amdgpu_ps half @image_load_tfe_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) define amdgpu_ps <2 x half> @image_load_tfe_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v2f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_v2f16 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x half>, i32 } %res, 0 %tfe = extractvalue { <2 x half>, i32 } %res, 1 @@ -333,76 +345,78 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16(<8 x i32> inreg %rsrc, i32 %s, define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v3f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; UNPACKED: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C1]] - ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; UNPACKED-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C1]] + ; UNPACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) - ; PACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>) - ; PACKED: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; PACKED: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) + ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 %tfe = extractvalue { <3 x half>, i32 } %res, 1 @@ -413,62 +427,64 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, define amdgpu_ps <4 x half> @image_load_tfe_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v4f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) - ; UNPACKED: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>) + ; UNPACKED-NEXT: G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; UNPACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v4f16 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) - ; PACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) + ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 %tfe = extractvalue { <4 x half>, i32 } %res, 1 @@ -479,20 +495,22 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16(<8 x i32> inreg %rsrc, i32 %s, define amdgpu_ps half @image_load_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_f16_dmask_0000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; UNPACKED: $vgpr0 = COPY [[DEF]](s32) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: $vgpr0 = COPY [[DEF]](s32) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; PACKED: $vgpr0 = COPY [[DEF]](s32) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; PACKED-NEXT: $vgpr0 = COPY [[DEF]](s32) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret half %tex } @@ -500,47 +518,49 @@ define amdgpu_ps half @image_load_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, define amdgpu_ps <2 x half> @image_load_v2f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v2f16_dmask_1000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_v2f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x half> %tex } @@ -548,20 +568,22 @@ define amdgpu_ps <2 x half> @image_load_v2f16_dmask_1000(<8 x i32> inreg %rsrc, define amdgpu_ps <2 x half> @image_load_v2f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v2f16_dmask_0000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: $vgpr0 = COPY [[DEF]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_v2f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: $vgpr0 = COPY [[DEF]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: $vgpr0 = COPY [[DEF]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <2 x half> %tex } @@ -569,70 +591,72 @@ define amdgpu_ps <2 x half> @image_load_v2f16_dmask_0000(<8 x i32> inreg %rsrc, define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v3f16_dmask_1100 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) - ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex } @@ -640,68 +664,70 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc, define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v3f16_dmask_1000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C1]] - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C1]] + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) - ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex } @@ -709,60 +735,62 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc, define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v3f16_dmask_0000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNPACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; UNPACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; UNPACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; UNPACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; PACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; PACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; PACKED: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; PACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; PACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex } @@ -770,56 +798,58 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc, define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1110 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v4f16_dmask_1110 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) - ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) - ; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) + ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex } @@ -827,55 +857,57 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1110(<8 x i32> inreg %rsrc, define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1100 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v4f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[DEF]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[DEF]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex } @@ -883,52 +915,54 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1100(<8 x i32> inreg %rsrc, define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v4f16_dmask_1000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v4f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[DEF]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[DEF]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex } @@ -936,24 +970,26 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_1000(<8 x i32> inreg %rsrc, define amdgpu_ps <4 x half> @image_load_v4f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_v4f16_dmask_0000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNPACKED: $vgpr0 = COPY [[UV]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v4f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; PACKED: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <4 x half> %tex } @@ -961,46 +997,48 @@ define amdgpu_ps <4 x half> @image_load_v4f16_dmask_0000(<8 x i32> inreg %rsrc, define amdgpu_ps half @image_load_tfe_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_f16_dmask_0000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: $vgpr0 = COPY [[UV]](s32) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: $vgpr0 = COPY [[UV]](s32) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: $vgpr0 = COPY [[UV]](s32) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: $vgpr0 = COPY [[UV]](s32) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { half, i32 } %res, 0 %tfe = extractvalue { half, i32 } %res, 1 @@ -1011,54 +1049,56 @@ define amdgpu_ps half @image_load_tfe_f16_dmask_0000(<8 x i32> inreg %rsrc, i32 define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x half>, i32 } %res, 0 %tfe = extractvalue { <2 x half>, i32 } %res, 1 @@ -1069,54 +1109,56 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_1000(<8 x i32> inreg %rs define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <2 x half>, i32 } %res, 0 %tfe = extractvalue { <2 x half>, i32 } %res, 1 @@ -1127,76 +1169,78 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_0000(<8 x i32> inreg %rs define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; UNPACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) + ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) - ; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) + ; PACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 %tfe = extractvalue { <3 x half>, i32 } %res, 1 @@ -1207,75 +1251,77 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) - ; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) + ; PACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 %tfe = extractvalue { <3 x half>, i32 } %res, 1 @@ -1286,75 +1332,77 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) - ; PACKED: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; PACKED: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; PACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; PACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; PACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; PACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; PACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) + ; PACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 %tfe = extractvalue { <3 x half>, i32 } %res, 1 @@ -1365,62 +1413,64 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1110(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1110 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) - ; UNPACKED: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) + ; UNPACKED-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1110 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) - ; PACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) + ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 %tfe = extractvalue { <4 x half>, i32 } %res, 1 @@ -1431,61 +1481,63 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1110(<8 x i32> inreg %rs define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1100(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1100 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; UNPACKED: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) + ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[DEF1]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[DEF1]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 %tfe = extractvalue { <4 x half>, i32 } %res, 1 @@ -1496,59 +1548,61 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1100(<8 x i32> inreg %rs define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_1000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[DEF1]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[DEF1]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 %tfe = extractvalue { <4 x half>, i32 } %res, 1 @@ -1559,59 +1613,61 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1000(<8 x i32> inreg %rs define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_0000(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; UNPACKED-LABEL: name: image_load_tfe_v4f16_dmask_0000 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; UNPACKED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) - ; UNPACKED: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; UNPACKED: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) + ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) + ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v4f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): - ; PACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 - ; PACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; PACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; PACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; PACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; PACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; PACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; PACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; PACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; PACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; PACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; PACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; PACKED: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; PACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; PACKED: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; PACKED: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED: $vgpr1 = COPY [[DEF1]](<2 x s16>) - ; PACKED: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; PACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 + ; PACKED-NEXT: {{ $}} + ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; PACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; PACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; PACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; PACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; PACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; PACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; PACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; PACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") + ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) + ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[DEF1]](<2 x s16>) + ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <4 x half>, i32 } %res, 0 %tfe = extractvalue { <4 x half>, i32 } %res, 1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll index 95bc6b2d00be..e605938b4b7b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GCN %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -o - %s | FileCheck -check-prefix=GCN %s define amdgpu_ps float @image_load_f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { ; GCN-LABEL: name: image_load_f32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll index ce9ef775779b..1d84858080da 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll @@ -7,80 +7,84 @@ define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, half %data) { ; UNPACKED-LABEL: name: image_store_f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; UNPACKED: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") - ; UNPACKED: S_ENDPGM 0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; UNPACKED-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") + ; UNPACKED-NEXT: S_ENDPGM 0 ; GFX81-LABEL: name: image_store_f16 ; GFX81: bb.1 (%ir-block.0): - ; GFX81: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 - ; GFX81: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX81: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX81: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX81: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX81: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX81: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX81: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX81: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX81: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX81: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX81: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX81: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX81: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX81: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX81: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") - ; GFX81: S_ENDPGM 0 + ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 + ; GFX81-NEXT: {{ $}} + ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX81-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") + ; GFX81-NEXT: S_ENDPGM 0 ; GFX9-LABEL: name: image_store_f16 ; GFX9: bb.1 (%ir-block.0): - ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") - ; GFX9: S_ENDPGM 0 + ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") + ; GFX9-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: image_store_f16 ; GFX10: bb.1 (%ir-block.0): - ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") - ; GFX10: S_ENDPGM 0 + ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[TRUNC]](s16), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (s16) into custom "ImageResource") + ; GFX10-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void } @@ -88,83 +92,87 @@ define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ha define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x half> %in) { ; UNPACKED-LABEL: name: image_store_v2f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; UNPACKED: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32) - ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") - ; UNPACKED: S_ENDPGM 0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32) + ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") + ; UNPACKED-NEXT: S_ENDPGM 0 ; GFX81-LABEL: name: image_store_v2f16 ; GFX81: bb.1 (%ir-block.0): - ; GFX81: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 - ; GFX81: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX81: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX81: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX81: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX81: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX81: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX81: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX81: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX81: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX81: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX81: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX81: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX81: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX81: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) - ; GFX81: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX81: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[DEF]](s32) - ; GFX81: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") - ; GFX81: S_ENDPGM 0 + ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 + ; GFX81-NEXT: {{ $}} + ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; GFX81-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX81-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[DEF]](s32) + ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<2 x s32>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") + ; GFX81-NEXT: S_ENDPGM 0 ; GFX9-LABEL: name: image_store_v2f16 ; GFX9: bb.1 (%ir-block.0): - ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") - ; GFX9: S_ENDPGM 0 + ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") + ; GFX9-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: image_store_v2f16 ; GFX10: bb.1 (%ir-block.0): - ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") - ; GFX10: S_ENDPGM 0 + ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[COPY10]](<2 x s16>), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<2 x s16>) into custom "ImageResource") + ; GFX10-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %in, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void } @@ -172,130 +180,134 @@ define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <3 x half> %in) { ; UNPACKED-LABEL: name: image_store_v3f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; UNPACKED: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; UNPACKED: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; UNPACKED: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; UNPACKED: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[UV1]](s32) - ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) - ; UNPACKED: S_ENDPGM 0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; UNPACKED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[UV1]](s32) + ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) + ; UNPACKED-NEXT: S_ENDPGM 0 ; GFX81-LABEL: name: image_store_v3f16 ; GFX81: bb.1 (%ir-block.0): - ; GFX81: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX81: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX81: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX81: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX81: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX81: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX81: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX81: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX81: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX81: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX81: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX81: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX81: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX81: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX81: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX81: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX81: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX81: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) - ; GFX81: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX81: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX81: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX81: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX81: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; GFX81: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; GFX81: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX81: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX81: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX81: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] - ; GFX81: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX81: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) - ; GFX81: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX81: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX81: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; GFX81: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX81: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX81: [[BITCAST4:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX81: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST4]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) - ; GFX81: S_ENDPGM 0 + ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX81-NEXT: {{ $}} + ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX81-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX81-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX81-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; GFX81-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) + ; GFX81-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX81-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX81-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX81-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] + ; GFX81-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; GFX81-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX81-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GFX81-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX81-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] + ; GFX81-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX81-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; GFX81-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GFX81-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX81-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; GFX81-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX81-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; GFX81-NEXT: [[BITCAST4:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST4]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) + ; GFX81-NEXT: S_ENDPGM 0 ; GFX9-LABEL: name: image_store_v3f16 ; GFX9: bb.1 (%ir-block.0): - ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32) - ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[DEF1]](s32) - ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV3]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) - ; GFX9: S_ENDPGM 0 + ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[DEF1]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV3]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) + ; GFX9-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: image_store_v3f16 ; GFX10: bb.1 (%ir-block.0): - ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) - ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX10: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32) - ; GFX10: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[DEF1]](s32) - ; GFX10: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV3]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) - ; GFX10: S_ENDPGM 0 + ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) + ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32) + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[DEF1]](s32) + ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-NEXT: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV3]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) + ; GFX10-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void } @@ -303,93 +315,97 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <4 x half> %in) { ; UNPACKED-LABEL: name: image_store_v4f16 ; UNPACKED: bb.1 (%ir-block.0): - ; UNPACKED: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; UNPACKED: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; UNPACKED: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; UNPACKED: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; UNPACKED: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; UNPACKED: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; UNPACKED: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; UNPACKED: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; UNPACKED: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; UNPACKED: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; UNPACKED: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; UNPACKED: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; UNPACKED: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; UNPACKED: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; UNPACKED: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) - ; UNPACKED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; UNPACKED: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) - ; UNPACKED: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; UNPACKED: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32), [[LSHR1]](s32) - ; UNPACKED: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") - ; UNPACKED: S_ENDPGM 0 + ; UNPACKED-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; UNPACKED-NEXT: {{ $}} + ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; UNPACKED-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; UNPACKED-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; UNPACKED-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; UNPACKED-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; UNPACKED-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; UNPACKED-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; UNPACKED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; UNPACKED-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) + ; UNPACKED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32), [[LSHR1]](s32) + ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") + ; UNPACKED-NEXT: S_ENDPGM 0 ; GFX81-LABEL: name: image_store_v4f16 ; GFX81: bb.1 (%ir-block.0): - ; GFX81: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX81: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX81: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX81: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX81: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX81: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX81: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX81: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX81: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX81: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX81: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX81: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX81: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX81: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX81: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) - ; GFX81: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX81: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s16>) - ; GFX81: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) - ; GFX81: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX81: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[DEF]](s32), [[DEF]](s32) - ; GFX81: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") - ; GFX81: S_ENDPGM 0 + ; GFX81-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX81-NEXT: {{ $}} + ; GFX81-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX81-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX81-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX81-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX81-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX81-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX81-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX81-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX81-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX81-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX81-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX81-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) + ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<4 x s16>) + ; GFX81-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) + ; GFX81-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX81-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[DEF]](s32), [[DEF]](s32) + ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<4 x s32>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") + ; GFX81-NEXT: S_ENDPGM 0 ; GFX9-LABEL: name: image_store_v4f16 ; GFX9: bb.1 (%ir-block.0): - ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") - ; GFX9: S_ENDPGM 0 + ; GFX9-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") + ; GFX9-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: image_store_v4f16 ; GFX10: bb.1 (%ir-block.0): - ; GFX10: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 - ; GFX10: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 - ; GFX10: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 - ; GFX10: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 - ; GFX10: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 - ; GFX10: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 - ; GFX10: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 - ; GFX10: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) - ; GFX10: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") - ; GFX10: S_ENDPGM 0 + ; GFX10-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<4 x s16>) into custom "ImageResource") + ; GFX10-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %in, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir index 307199ba0616..97d0d92cbe3a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir @@ -10,9 +10,9 @@ body: | ; GCN-LABEL: name: s_buffer_load_s32 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s32)) - ; GCN: S_ENDPGM 0, implicit [[AMDGPU_S_BUFFER_LOAD]](s32) + ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s32)) + ; GCN-NEXT: S_ENDPGM 0, implicit [[AMDGPU_S_BUFFER_LOAD]](s32) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -28,12 +28,12 @@ body: | ; GCN-LABEL: name: s_buffer_load_v3s32 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GCN: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GCN: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN: S_ENDPGM 0, implicit [[UV]](<3 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) + ; GCN-NEXT: S_ENDPGM 0, implicit [[UV]](<3 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -49,13 +49,13 @@ body: | ; GCN-LABEL: name: s_buffer_load_v3p3 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GCN: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GCN: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN: [[BITCAST:%[0-9]+]]:_(<3 x p3>) = G_BITCAST [[UV]](<3 x s32>) - ; GCN: S_ENDPGM 0, implicit [[BITCAST]](<3 x p3>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x p3>) = G_BITCAST [[UV]](<3 x s32>) + ; GCN-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<3 x p3>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x p3>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -71,13 +71,13 @@ body: | ; GCN-LABEL: name: s_buffer_load_v6s16 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GCN: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GCN: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[UV]](<3 x s32>) - ; GCN: S_ENDPGM 0, implicit [[BITCAST]](<6 x s16>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[UV]](<3 x s32>) + ; GCN-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<6 x s16>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<6 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -93,12 +93,12 @@ body: | ; GCN-LABEL: name: s_buffer_load_v6s32 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 4) - ; GCN: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<24 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>), [[DEF]](<8 x s32>), [[DEF]](<8 x s32>) - ; GCN: [[UV:%[0-9]+]]:_(<6 x s32>), [[UV1:%[0-9]+]]:_(<6 x s32>), [[UV2:%[0-9]+]]:_(<6 x s32>), [[UV3:%[0-9]+]]:_(<6 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<24 x s32>) - ; GCN: S_ENDPGM 0, implicit [[UV]](<6 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 4) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<24 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>), [[DEF]](<8 x s32>), [[DEF]](<8 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(<6 x s32>), [[UV1:%[0-9]+]]:_(<6 x s32>), [[UV2:%[0-9]+]]:_(<6 x s32>), [[UV3:%[0-9]+]]:_(<6 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<24 x s32>) + ; GCN-NEXT: S_ENDPGM 0, implicit [[UV]](<6 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<6 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -114,12 +114,12 @@ body: | ; GCN-LABEL: name: s_buffer_load_v3s64 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s64>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 4) - ; GCN: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s64>), [[DEF]](<4 x s64>), [[DEF]](<4 x s64>) - ; GCN: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) - ; GCN: S_ENDPGM 0, implicit [[UV]](<3 x s64>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s64>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 4) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s64>), [[DEF]](<4 x s64>), [[DEF]](<4 x s64>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) + ; GCN-NEXT: S_ENDPGM 0, implicit [[UV]](<3 x s64>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s64>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -135,57 +135,57 @@ body: | ; GCN-LABEL: name: s_buffer_load_v12s8 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GCN: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GCN: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<3 x s32>) - ; GCN: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GCN: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32) - ; GCN: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GCN: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32) - ; GCN: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GCN: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C3]](s32) - ; GCN: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32) - ; GCN: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32) - ; GCN: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32) - ; GCN: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32) - ; GCN: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32) - ; GCN: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C3]](s32) - ; GCN: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GCN: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C4]] - ; GCN: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]] - ; GCN: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) - ; GCN: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GCN: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GCN: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] - ; GCN: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]] - ; GCN: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) - ; GCN: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GCN: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GCN: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C4]] - ; GCN: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GCN: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) - ; GCN: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; GCN: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GCN: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C4]] - ; GCN: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]] - ; GCN: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) - ; GCN: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] - ; GCN: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; GCN: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C4]] - ; GCN: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]] - ; GCN: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) - ; GCN: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] - ; GCN: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; GCN: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C4]] - ; GCN: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]] - ; GCN: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) - ; GCN: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] - ; GCN: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; GCN: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GCN: S_ENDPGM 0, implicit [[CONCAT_VECTORS1]](<12 x s16>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) + ; GCN-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<3 x s32>) + ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32) + ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GCN-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32) + ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GCN-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C3]](s32) + ; GCN-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32) + ; GCN-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32) + ; GCN-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32) + ; GCN-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32) + ; GCN-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32) + ; GCN-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C3]](s32) + ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C4]] + ; GCN-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]] + ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) + ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GCN-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] + ; GCN-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]] + ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) + ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GCN-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C4]] + ; GCN-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; GCN-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) + ; GCN-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; GCN-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GCN-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C4]] + ; GCN-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C4]] + ; GCN-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) + ; GCN-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; GCN-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; GCN-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C4]] + ; GCN-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]] + ; GCN-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) + ; GCN-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] + ; GCN-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; GCN-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C4]] + ; GCN-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]] + ; GCN-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) + ; GCN-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] + ; GCN-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; GCN-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GCN-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS1]](<12 x s16>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<12 x s8>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -202,12 +202,12 @@ body: | ; GCN-LABEL: name: s_buffer_load_s96 ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; GCN: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GCN: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GCN: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GCN: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN: S_ENDPGM 0, implicit [[UV]](<3 x s32>) + ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) + ; GCN-NEXT: S_ENDPGM 0, implicit [[UV]](<3 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir index ec638c5a3907..2bed0e0c85b6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant-32bit.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=CI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=CI %s --- name: test_load_constant32bit_s32_align1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir index cd4dedba0910..e08cfd5ffa09 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=CI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=CI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_load_flat_s1_align1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir index 736f606fd1bd..b376bff0ca95 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -9,9 +9,9 @@ body: | ; CHECK-LABEL: name: test_or_s32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[OR]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_OR %0, %1 @@ -26,9 +26,9 @@ body: | ; CHECK-LABEL: name: test_or_s1 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]] - ; CHECK: S_NOP 0, implicit [[OR]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: S_NOP 0, implicit [[OR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_CONSTANT i32 0 @@ -46,22 +46,22 @@ body: | ; CHECK-LABEL: name: test_or_v2s1 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]] - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[UV6]] - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV5]](s32), [[UV7]] - ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP2]] - ; CHECK: [[OR1:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP3]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s1) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV3]] + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<2 x s32>) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV4]](s32), [[UV6]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV5]](s32), [[UV7]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = COPY $vgpr4_vgpr5 @@ -80,26 +80,26 @@ body: | ; CHECK-LABEL: name: test_or_v3s1 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] - ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV6]](s32), [[UV9]] - ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV7]](s32), [[UV10]] - ; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[UV11]] - ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP3]] - ; CHECK: [[OR1:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP4]] - ; CHECK: [[OR2:%[0-9]+]]:_(s1) = G_OR [[ICMP2]], [[ICMP5]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s1) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[UV3]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[UV4]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV2]](s32), [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV6]](s32), [[UV9]] + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV7]](s32), [[UV10]] + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV8]](s32), [[UV11]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP]], [[ICMP3]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP4]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s1) = G_OR [[ICMP2]], [[ICMP5]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR1]](s1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s1) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = COPY $vgpr6_vgpr7_vgpr8 @@ -118,9 +118,9 @@ body: | ; CHECK-LABEL: name: test_or_s64 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[COPY1]] - ; CHECK: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_OR %0, %1 @@ -135,16 +135,16 @@ body: | ; CHECK-LABEL: name: test_or_s96 ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0 - ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64 - ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY1]](s96), 0 - ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64 - ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[EXTRACT]], [[EXTRACT2]] - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[EXTRACT1]], [[EXTRACT3]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](s64) - ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[OR1]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](s96), 0 + ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](s96), 64 + ; CHECK-NEXT: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY1]](s96), 0 + ; CHECK-NEXT: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s96), 64 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[EXTRACT]], [[EXTRACT2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[EXTRACT1]], [[EXTRACT3]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](s64) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[OR1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96) %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5 %2:_(s96) = G_OR %0, %1 @@ -159,13 +159,13 @@ body: | ; CHECK-LABEL: name: test_or_128 ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](s128) - ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[UV]], [[UV2]] - ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[UV1]], [[UV3]] - ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[OR1]](s64) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](s128) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[OR]](s64), [[OR1]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV]](s128) %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s128) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(s128) = G_OR %0, %1 @@ -180,9 +180,9 @@ body: | ; CHECK-LABEL: name: test_or_s7 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[OR]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -200,9 +200,9 @@ body: | ; CHECK-LABEL: name: test_or_s8 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[OR]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[OR]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -220,12 +220,12 @@ body: | ; CHECK-LABEL: name: test_or_s16 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -243,12 +243,12 @@ body: | ; CHECK-LABEL: name: test_or_s24 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -266,9 +266,9 @@ body: | ; CHECK-LABEL: name: test_or_s48 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[COPY1]] - ; CHECK: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s48) = G_TRUNC %0 @@ -286,9 +286,9 @@ body: | ; CHECK-LABEL: name: test_or_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[COPY]], [[COPY1]] - ; CHECK: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_OR %0, %1 @@ -303,20 +303,20 @@ body: | ; CHECK-LABEL: name: test_or_v3s32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) - ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) - ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; CHECK: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[OR]](<2 x s32>), [[OR1]](<2 x s32>), [[DEF1]](<2 x s32>) - ; CHECK: [[UV6:%[0-9]+]]:_(<3 x s32>), [[UV7:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[UV6]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[OR]](<2 x s32>), [[OR1]](<2 x s32>), [[DEF1]](<2 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<3 x s32>), [[UV7:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV6]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_OR %0, %1 @@ -331,13 +331,13 @@ body: | ; CHECK-LABEL: name: test_or_v4s32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) - ; CHECK: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[UV]], [[UV2]] - ; CHECK: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[UV1]], [[UV3]] - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[OR]](<2 x s32>), [[OR1]](<2 x s32>) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[OR]](<2 x s32>), [[OR1]](<2 x s32>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<4 x s32>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<4 x s32>) = G_OR %0, %1 @@ -351,25 +351,25 @@ body: | ; CHECK-LABEL: name: test_or_v5s32 ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) - ; CHECK: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32) - ; CHECK: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) - ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) - ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) - ; CHECK: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR3]] - ; CHECK: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR4]] - ; CHECK: [[OR2:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR2]], [[BUILD_VECTOR5]] - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[OR]](<2 x s32>), [[OR1]](<2 x s32>), [[OR2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>) - ; CHECK: [[UV10:%[0-9]+]]:_(<5 x s32>), [[UV11:%[0-9]+]]:_(<5 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s32>) - ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[UV10]](<5 x s32>), 0 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32) + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR3]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR4]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR2]], [[BUILD_VECTOR5]] + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[OR]](<2 x s32>), [[OR1]](<2 x s32>), [[OR2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>) + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<5 x s32>), [[UV11:%[0-9]+]]:_(<5 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s32>) + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[UV10]](<5 x s32>), 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>) %0:_(<5 x s32>) = G_IMPLICIT_DEF %1:_(<5 x s32>) = G_IMPLICIT_DEF %2:_(<5 x s32>) = G_OR %0, %1 @@ -386,13 +386,13 @@ body: | ; CHECK-LABEL: name: test_or_v2s64 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[UV]], [[UV2]] - ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[UV1]], [[UV3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[UV]], [[UV2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[OR]](s64), [[OR1]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_OR %0, %1 @@ -407,9 +407,9 @@ body: | ; CHECK-LABEL: name: test_or_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[OR]](<2 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s16>) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_OR %0, %1 @@ -423,41 +423,41 @@ body: | liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-LABEL: name: test_or_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; CHECK: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT]], [[INSERT1]] - ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 + ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT]], [[INSERT1]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -477,9 +477,9 @@ body: | ; CHECK-LABEL: name: test_or_v4s16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[COPY]], [[COPY1]] - ; CHECK: $vgpr0_vgpr1 = COPY [[OR]](<4 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[OR]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_OR %0, %1 @@ -493,54 +493,54 @@ body: | ; CHECK-LABEL: name: test_or_v5s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CHECK: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) - ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0 - ; CHECK: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) - ; CHECK: [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) - ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0 - ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV14]](<3 x s16>), 0 - ; CHECK: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT2]], [[INSERT3]] - ; CHECK: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV13]](<3 x s16>), 0 - ; CHECK: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV15]](<3 x s16>), 0 - ; CHECK: [[OR1:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT4]], [[INSERT5]] - ; CHECK: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>) - ; CHECK: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR1]](<4 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK: [[CONCAT_VECTORS3:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK: [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>), [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<30 x s16>) - ; CHECK: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV20]](<5 x s16>), 0 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 + ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0 + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) + ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0 + ; CHECK-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV14]](<3 x s16>), 0 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT2]], [[INSERT3]] + ; CHECK-NEXT: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV13]](<3 x s16>), 0 + ; CHECK-NEXT: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV15]](<3 x s16>), 0 + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT4]], [[INSERT5]] + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>) + ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) + ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>), [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<30 x s16>) + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV20]](<5 x s16>), 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF %2:_(<5 x s16>) = G_OR %0, %1 @@ -556,33 +556,33 @@ body: | ; CHECK-LABEL: name: test_or_v3s8 ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF1]](<3 x s8>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) - ; CHECK: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[OR]](s32) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ANYEXT2]], [[ANYEXT3]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[OR1]](s32) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ANYEXT4]], [[ANYEXT5]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[OR2]](s32) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ANYEXT6]], [[ANYEXT7]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[OR3]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF2]](<4 x s8>), [[DEF2]](<4 x s8>) - ; CHECK: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0 + ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF1]](<3 x s8>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[OR]](s32) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ANYEXT2]], [[ANYEXT3]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[OR1]](s32) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ANYEXT4]], [[ANYEXT5]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[OR2]](s32) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ANYEXT6]], [[ANYEXT7]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[OR3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF2]](<4 x s8>), [[DEF2]](<4 x s8>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) + ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s8>) = G_IMPLICIT_DEF %2:_(<3 x s8>) = G_OR %0, %1 @@ -597,15 +597,15 @@ body: | ; CHECK-LABEL: name: test_or_v4s8 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV]], [[UV4]] - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV1]], [[UV5]] - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV2]], [[UV6]] - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UV7]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV]], [[UV4]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV1]], [[UV5]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV2]], [[UV6]] + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[UV3]], [[UV7]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32), [[OR3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(<4 x s8>) = G_IMPLICIT_DEF %2:_(<4 x s8>) = G_OR %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir index 23b7328027af..7b867c833e1f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -8,22 +8,26 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_s32 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0, $vgpr1 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[COPY]](s32), %bb.0, [[ADD]](s32), %bb.1 - ; CHECK: $vgpr0 = COPY [[PHI]](s32) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]] + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[COPY]](s32), %bb.0, [[ADD]](s32), %bb.1 + ; CHECK-NEXT: $vgpr0 = COPY [[PHI]](s32) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0, $vgpr1 @@ -54,34 +58,38 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v2s16 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0, $vgpr1 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST1]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<2 x s16>) = G_PHI [[COPY]](<2 x s16>), %bb.0, [[BITCAST2]](<2 x s16>), %bb.1 - ; CHECK: $vgpr0 = COPY [[PHI]](<2 x s16>) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<2 x s16>) = G_PHI [[COPY]](<2 x s16>), %bb.0, [[BITCAST2]](<2 x s16>), %bb.1 + ; CHECK-NEXT: $vgpr0 = COPY [[PHI]](<2 x s16>) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0, $vgpr1 @@ -112,81 +120,85 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v3s16 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST3]] - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C2]] - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF2]](<2 x s16>) - ; CHECK: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[INSERT]](<4 x s16>), %bb.0, [[INSERT3]](<4 x s16>), %bb.1 - ; CHECK: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[PHI]](<4 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<4 x s16>) - ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C4]](s32) - ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C5]] - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C5]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] - ; CHECK: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C5]] - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C5]] - ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C4]](s32) - ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] - ; CHECK: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C5]] - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C5]] - ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C4]](s32) - ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] - ; CHECK: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST3]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF2]](<2 x s16>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[INSERT]](<4 x s16>), %bb.0, [[INSERT3]](<4 x s16>), %bb.1 + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[PHI]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C4]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C5]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C5]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C5]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C5]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C4]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C5]] + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C5]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C4]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1, $vgpr2 @@ -221,48 +233,52 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v4s16 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR2]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST3]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR3]] - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C2]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C2]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[COPY]](<4 x s16>), %bb.0, [[CONCAT_VECTORS]](<4 x s16>), %bb.1 - ; CHECK: $vgpr0_vgpr1 = COPY [[PHI]](<4 x s16>) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR2]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST3]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR3]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C2]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[COPY]](<4 x s16>), %bb.0, [[CONCAT_VECTORS]](<4 x s16>), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](<4 x s16>) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1, $vgpr2 @@ -293,26 +309,30 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v2s32 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV2]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<2 x s32>) = G_PHI [[COPY]](<2 x s32>), %bb.0, [[BUILD_VECTOR]](<2 x s32>), %bb.1 - ; CHECK: $vgpr0_vgpr1 = COPY [[PHI]](<2 x s32>) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<2 x s32>) = G_PHI [[COPY]](<2 x s32>), %bb.0, [[BUILD_VECTOR]](<2 x s32>), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](<2 x s32>) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1, $vgpr2 @@ -343,27 +363,31 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v3s32 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 - ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV3]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV4]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV5]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<3 x s32>) = G_PHI [[COPY]](<3 x s32>), %bb.0, [[BUILD_VECTOR]](<3 x s32>), %bb.1 - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[PHI]](<3 x s32>) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV3]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV4]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV5]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<3 x s32>) = G_PHI [[COPY]](<3 x s32>), %bb.0, [[BUILD_VECTOR]](<3 x s32>), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[PHI]](<3 x s32>) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1_vgpr2, $vgpr3 @@ -394,28 +418,32 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v4s32 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV4]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV5]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV6]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV7]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<4 x s32>) = G_PHI [[COPY]](<4 x s32>), %bb.0, [[BUILD_VECTOR]](<4 x s32>), %bb.1 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[PHI]](<4 x s32>) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV4]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV5]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV6]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV7]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<4 x s32>) = G_PHI [[COPY]](<4 x s32>), %bb.0, [[BUILD_VECTOR]](<4 x s32>), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[PHI]](<4 x s32>) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 @@ -446,32 +474,36 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v8s32 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 - ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV8]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV9]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV10]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV11]] - ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV12]] - ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV13]] - ; CHECK: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV14]] - ; CHECK: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV15]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<8 x s32>) = G_PHI [[COPY]](<8 x s32>), %bb.0, [[BUILD_VECTOR]](<8 x s32>), %bb.1 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[PHI]](<8 x s32>) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV8]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV9]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV10]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV11]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV12]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV13]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV14]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV15]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<8 x s32>) = G_PHI [[COPY]](<8 x s32>), %bb.0, [[BUILD_VECTOR]](<8 x s32>), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[PHI]](<8 x s32>) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 @@ -502,39 +534,43 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v16s32 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 - ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV16]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV17]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV18]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV19]] - ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV20]] - ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV21]] - ; CHECK: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV22]] - ; CHECK: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV23]] - ; CHECK: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV24]] - ; CHECK: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV25]] - ; CHECK: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV26]] - ; CHECK: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV27]] - ; CHECK: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV28]] - ; CHECK: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV29]] - ; CHECK: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV30]] - ; CHECK: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV31]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1 - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](<16 x s32>) + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV16]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV17]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV18]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV19]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV20]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV21]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV22]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV23]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV24]] + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV25]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV26]] + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV27]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV28]] + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV29]] + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV30]] + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV31]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1 + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](<16 x s32>) bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 @@ -565,55 +601,59 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v32s32 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 - ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32), [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV32]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV33]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV34]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV35]] - ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV36]] - ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV37]] - ; CHECK: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV38]] - ; CHECK: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV39]] - ; CHECK: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV40]] - ; CHECK: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV41]] - ; CHECK: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV42]] - ; CHECK: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV43]] - ; CHECK: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV44]] - ; CHECK: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV45]] - ; CHECK: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV46]] - ; CHECK: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV47]] - ; CHECK: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV16]], [[UV48]] - ; CHECK: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[UV49]] - ; CHECK: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV18]], [[UV50]] - ; CHECK: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[UV51]] - ; CHECK: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[UV20]], [[UV52]] - ; CHECK: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[UV53]] - ; CHECK: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[UV22]], [[UV54]] - ; CHECK: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UV23]], [[UV55]] - ; CHECK: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV24]], [[UV56]] - ; CHECK: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[UV57]] - ; CHECK: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[UV26]], [[UV58]] - ; CHECK: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[UV27]], [[UV59]] - ; CHECK: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[UV28]], [[UV60]] - ; CHECK: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[UV61]] - ; CHECK: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[UV30]], [[UV62]] - ; CHECK: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[UV63]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32), [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<32 x s32>) = G_PHI [[DEF]](<32 x s32>), %bb.0, [[BUILD_VECTOR]](<32 x s32>), %bb.1 - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](<32 x s32>) + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32), [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV32]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV33]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV34]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV35]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV36]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV37]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV38]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV39]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV40]] + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV41]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV42]] + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV43]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV44]] + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV45]] + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV46]] + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV47]] + ; CHECK-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV16]], [[UV48]] + ; CHECK-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[UV49]] + ; CHECK-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV18]], [[UV50]] + ; CHECK-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[UV51]] + ; CHECK-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[UV20]], [[UV52]] + ; CHECK-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[UV53]] + ; CHECK-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[UV22]], [[UV54]] + ; CHECK-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UV23]], [[UV55]] + ; CHECK-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV24]], [[UV56]] + ; CHECK-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[UV57]] + ; CHECK-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[UV26]], [[UV58]] + ; CHECK-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[UV27]], [[UV59]] + ; CHECK-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[UV28]], [[UV60]] + ; CHECK-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[UV61]] + ; CHECK-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[UV30]], [[UV62]] + ; CHECK-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[UV63]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32), [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<32 x s32>) = G_PHI [[DEF]](<32 x s32>), %bb.0, [[BUILD_VECTOR]](<32 x s32>), %bb.1 + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](<32 x s32>) bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 @@ -644,100 +684,104 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v64s32 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 - ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32), [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32), [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32), [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32), [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32), [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32), [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32), [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32), [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32), [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32), [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32), [[UV88:%[0-9]+]]:_(s32), [[UV89:%[0-9]+]]:_(s32), [[UV90:%[0-9]+]]:_(s32), [[UV91:%[0-9]+]]:_(s32), [[UV92:%[0-9]+]]:_(s32), [[UV93:%[0-9]+]]:_(s32), [[UV94:%[0-9]+]]:_(s32), [[UV95:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV96:%[0-9]+]]:_(s32), [[UV97:%[0-9]+]]:_(s32), [[UV98:%[0-9]+]]:_(s32), [[UV99:%[0-9]+]]:_(s32), [[UV100:%[0-9]+]]:_(s32), [[UV101:%[0-9]+]]:_(s32), [[UV102:%[0-9]+]]:_(s32), [[UV103:%[0-9]+]]:_(s32), [[UV104:%[0-9]+]]:_(s32), [[UV105:%[0-9]+]]:_(s32), [[UV106:%[0-9]+]]:_(s32), [[UV107:%[0-9]+]]:_(s32), [[UV108:%[0-9]+]]:_(s32), [[UV109:%[0-9]+]]:_(s32), [[UV110:%[0-9]+]]:_(s32), [[UV111:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[UV112:%[0-9]+]]:_(s32), [[UV113:%[0-9]+]]:_(s32), [[UV114:%[0-9]+]]:_(s32), [[UV115:%[0-9]+]]:_(s32), [[UV116:%[0-9]+]]:_(s32), [[UV117:%[0-9]+]]:_(s32), [[UV118:%[0-9]+]]:_(s32), [[UV119:%[0-9]+]]:_(s32), [[UV120:%[0-9]+]]:_(s32), [[UV121:%[0-9]+]]:_(s32), [[UV122:%[0-9]+]]:_(s32), [[UV123:%[0-9]+]]:_(s32), [[UV124:%[0-9]+]]:_(s32), [[UV125:%[0-9]+]]:_(s32), [[UV126:%[0-9]+]]:_(s32), [[UV127:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV64]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV65]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV66]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV67]] - ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV68]] - ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV69]] - ; CHECK: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV70]] - ; CHECK: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV71]] - ; CHECK: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV72]] - ; CHECK: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV73]] - ; CHECK: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV74]] - ; CHECK: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV75]] - ; CHECK: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV76]] - ; CHECK: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV77]] - ; CHECK: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV78]] - ; CHECK: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV79]] - ; CHECK: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV16]], [[UV80]] - ; CHECK: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[UV81]] - ; CHECK: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV18]], [[UV82]] - ; CHECK: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[UV83]] - ; CHECK: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[UV20]], [[UV84]] - ; CHECK: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[UV85]] - ; CHECK: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[UV22]], [[UV86]] - ; CHECK: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UV23]], [[UV87]] - ; CHECK: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV24]], [[UV88]] - ; CHECK: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[UV89]] - ; CHECK: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[UV26]], [[UV90]] - ; CHECK: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[UV27]], [[UV91]] - ; CHECK: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[UV28]], [[UV92]] - ; CHECK: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[UV93]] - ; CHECK: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[UV30]], [[UV94]] - ; CHECK: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[UV95]] - ; CHECK: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[UV32]], [[UV96]] - ; CHECK: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UV33]], [[UV97]] - ; CHECK: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV34]], [[UV98]] - ; CHECK: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UV35]], [[UV99]] - ; CHECK: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[UV36]], [[UV100]] - ; CHECK: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[UV37]], [[UV101]] - ; CHECK: [[ADD38:%[0-9]+]]:_(s32) = G_ADD [[UV38]], [[UV102]] - ; CHECK: [[ADD39:%[0-9]+]]:_(s32) = G_ADD [[UV39]], [[UV103]] - ; CHECK: [[ADD40:%[0-9]+]]:_(s32) = G_ADD [[UV40]], [[UV104]] - ; CHECK: [[ADD41:%[0-9]+]]:_(s32) = G_ADD [[UV41]], [[UV105]] - ; CHECK: [[ADD42:%[0-9]+]]:_(s32) = G_ADD [[UV42]], [[UV106]] - ; CHECK: [[ADD43:%[0-9]+]]:_(s32) = G_ADD [[UV43]], [[UV107]] - ; CHECK: [[ADD44:%[0-9]+]]:_(s32) = G_ADD [[UV44]], [[UV108]] - ; CHECK: [[ADD45:%[0-9]+]]:_(s32) = G_ADD [[UV45]], [[UV109]] - ; CHECK: [[ADD46:%[0-9]+]]:_(s32) = G_ADD [[UV46]], [[UV110]] - ; CHECK: [[ADD47:%[0-9]+]]:_(s32) = G_ADD [[UV47]], [[UV111]] - ; CHECK: [[ADD48:%[0-9]+]]:_(s32) = G_ADD [[UV48]], [[UV112]] - ; CHECK: [[ADD49:%[0-9]+]]:_(s32) = G_ADD [[UV49]], [[UV113]] - ; CHECK: [[ADD50:%[0-9]+]]:_(s32) = G_ADD [[UV50]], [[UV114]] - ; CHECK: [[ADD51:%[0-9]+]]:_(s32) = G_ADD [[UV51]], [[UV115]] - ; CHECK: [[ADD52:%[0-9]+]]:_(s32) = G_ADD [[UV52]], [[UV116]] - ; CHECK: [[ADD53:%[0-9]+]]:_(s32) = G_ADD [[UV53]], [[UV117]] - ; CHECK: [[ADD54:%[0-9]+]]:_(s32) = G_ADD [[UV54]], [[UV118]] - ; CHECK: [[ADD55:%[0-9]+]]:_(s32) = G_ADD [[UV55]], [[UV119]] - ; CHECK: [[ADD56:%[0-9]+]]:_(s32) = G_ADD [[UV56]], [[UV120]] - ; CHECK: [[ADD57:%[0-9]+]]:_(s32) = G_ADD [[UV57]], [[UV121]] - ; CHECK: [[ADD58:%[0-9]+]]:_(s32) = G_ADD [[UV58]], [[UV122]] - ; CHECK: [[ADD59:%[0-9]+]]:_(s32) = G_ADD [[UV59]], [[UV123]] - ; CHECK: [[ADD60:%[0-9]+]]:_(s32) = G_ADD [[UV60]], [[UV124]] - ; CHECK: [[ADD61:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[UV125]] - ; CHECK: [[ADD62:%[0-9]+]]:_(s32) = G_ADD [[UV62]], [[UV126]] - ; CHECK: [[ADD63:%[0-9]+]]:_(s32) = G_ADD [[UV63]], [[UV127]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32) - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD32]](s32), [[ADD33]](s32), [[ADD34]](s32), [[ADD35]](s32), [[ADD36]](s32), [[ADD37]](s32), [[ADD38]](s32), [[ADD39]](s32), [[ADD40]](s32), [[ADD41]](s32), [[ADD42]](s32), [[ADD43]](s32), [[ADD44]](s32), [[ADD45]](s32), [[ADD46]](s32), [[ADD47]](s32) - ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD48]](s32), [[ADD49]](s32), [[ADD50]](s32), [[ADD51]](s32), [[ADD52]](s32), [[ADD53]](s32), [[ADD54]](s32), [[ADD55]](s32), [[ADD56]](s32), [[ADD57]](s32), [[ADD58]](s32), [[ADD59]](s32), [[ADD60]](s32), [[ADD61]](s32), [[ADD62]](s32), [[ADD63]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1 - ; CHECK: [[PHI1:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR1]](<16 x s32>), %bb.1 - ; CHECK: [[PHI2:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR2]](<16 x s32>), %bb.1 - ; CHECK: [[PHI3:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR3]](<16 x s32>), %bb.1 - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[PHI]](<16 x s32>), [[PHI1]](<16 x s32>), [[PHI2]](<16 x s32>), [[PHI3]](<16 x s32>) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[CONCAT_VECTORS]](<64 x s32>) + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32), [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32), [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32), [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32), [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32), [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32), [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32), [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32), [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32), [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32), [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32), [[UV88:%[0-9]+]]:_(s32), [[UV89:%[0-9]+]]:_(s32), [[UV90:%[0-9]+]]:_(s32), [[UV91:%[0-9]+]]:_(s32), [[UV92:%[0-9]+]]:_(s32), [[UV93:%[0-9]+]]:_(s32), [[UV94:%[0-9]+]]:_(s32), [[UV95:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV96:%[0-9]+]]:_(s32), [[UV97:%[0-9]+]]:_(s32), [[UV98:%[0-9]+]]:_(s32), [[UV99:%[0-9]+]]:_(s32), [[UV100:%[0-9]+]]:_(s32), [[UV101:%[0-9]+]]:_(s32), [[UV102:%[0-9]+]]:_(s32), [[UV103:%[0-9]+]]:_(s32), [[UV104:%[0-9]+]]:_(s32), [[UV105:%[0-9]+]]:_(s32), [[UV106:%[0-9]+]]:_(s32), [[UV107:%[0-9]+]]:_(s32), [[UV108:%[0-9]+]]:_(s32), [[UV109:%[0-9]+]]:_(s32), [[UV110:%[0-9]+]]:_(s32), [[UV111:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[UV112:%[0-9]+]]:_(s32), [[UV113:%[0-9]+]]:_(s32), [[UV114:%[0-9]+]]:_(s32), [[UV115:%[0-9]+]]:_(s32), [[UV116:%[0-9]+]]:_(s32), [[UV117:%[0-9]+]]:_(s32), [[UV118:%[0-9]+]]:_(s32), [[UV119:%[0-9]+]]:_(s32), [[UV120:%[0-9]+]]:_(s32), [[UV121:%[0-9]+]]:_(s32), [[UV122:%[0-9]+]]:_(s32), [[UV123:%[0-9]+]]:_(s32), [[UV124:%[0-9]+]]:_(s32), [[UV125:%[0-9]+]]:_(s32), [[UV126:%[0-9]+]]:_(s32), [[UV127:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV64]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV65]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV66]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV67]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV68]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV69]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV70]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV71]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV72]] + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV73]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV74]] + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV75]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV76]] + ; CHECK-NEXT: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV77]] + ; CHECK-NEXT: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV78]] + ; CHECK-NEXT: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV79]] + ; CHECK-NEXT: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV16]], [[UV80]] + ; CHECK-NEXT: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[UV81]] + ; CHECK-NEXT: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV18]], [[UV82]] + ; CHECK-NEXT: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[UV83]] + ; CHECK-NEXT: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[UV20]], [[UV84]] + ; CHECK-NEXT: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[UV85]] + ; CHECK-NEXT: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[UV22]], [[UV86]] + ; CHECK-NEXT: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UV23]], [[UV87]] + ; CHECK-NEXT: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV24]], [[UV88]] + ; CHECK-NEXT: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[UV89]] + ; CHECK-NEXT: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[UV26]], [[UV90]] + ; CHECK-NEXT: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[UV27]], [[UV91]] + ; CHECK-NEXT: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[UV28]], [[UV92]] + ; CHECK-NEXT: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[UV93]] + ; CHECK-NEXT: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[UV30]], [[UV94]] + ; CHECK-NEXT: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[UV95]] + ; CHECK-NEXT: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[UV32]], [[UV96]] + ; CHECK-NEXT: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[UV33]], [[UV97]] + ; CHECK-NEXT: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[UV34]], [[UV98]] + ; CHECK-NEXT: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UV35]], [[UV99]] + ; CHECK-NEXT: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[UV36]], [[UV100]] + ; CHECK-NEXT: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[UV37]], [[UV101]] + ; CHECK-NEXT: [[ADD38:%[0-9]+]]:_(s32) = G_ADD [[UV38]], [[UV102]] + ; CHECK-NEXT: [[ADD39:%[0-9]+]]:_(s32) = G_ADD [[UV39]], [[UV103]] + ; CHECK-NEXT: [[ADD40:%[0-9]+]]:_(s32) = G_ADD [[UV40]], [[UV104]] + ; CHECK-NEXT: [[ADD41:%[0-9]+]]:_(s32) = G_ADD [[UV41]], [[UV105]] + ; CHECK-NEXT: [[ADD42:%[0-9]+]]:_(s32) = G_ADD [[UV42]], [[UV106]] + ; CHECK-NEXT: [[ADD43:%[0-9]+]]:_(s32) = G_ADD [[UV43]], [[UV107]] + ; CHECK-NEXT: [[ADD44:%[0-9]+]]:_(s32) = G_ADD [[UV44]], [[UV108]] + ; CHECK-NEXT: [[ADD45:%[0-9]+]]:_(s32) = G_ADD [[UV45]], [[UV109]] + ; CHECK-NEXT: [[ADD46:%[0-9]+]]:_(s32) = G_ADD [[UV46]], [[UV110]] + ; CHECK-NEXT: [[ADD47:%[0-9]+]]:_(s32) = G_ADD [[UV47]], [[UV111]] + ; CHECK-NEXT: [[ADD48:%[0-9]+]]:_(s32) = G_ADD [[UV48]], [[UV112]] + ; CHECK-NEXT: [[ADD49:%[0-9]+]]:_(s32) = G_ADD [[UV49]], [[UV113]] + ; CHECK-NEXT: [[ADD50:%[0-9]+]]:_(s32) = G_ADD [[UV50]], [[UV114]] + ; CHECK-NEXT: [[ADD51:%[0-9]+]]:_(s32) = G_ADD [[UV51]], [[UV115]] + ; CHECK-NEXT: [[ADD52:%[0-9]+]]:_(s32) = G_ADD [[UV52]], [[UV116]] + ; CHECK-NEXT: [[ADD53:%[0-9]+]]:_(s32) = G_ADD [[UV53]], [[UV117]] + ; CHECK-NEXT: [[ADD54:%[0-9]+]]:_(s32) = G_ADD [[UV54]], [[UV118]] + ; CHECK-NEXT: [[ADD55:%[0-9]+]]:_(s32) = G_ADD [[UV55]], [[UV119]] + ; CHECK-NEXT: [[ADD56:%[0-9]+]]:_(s32) = G_ADD [[UV56]], [[UV120]] + ; CHECK-NEXT: [[ADD57:%[0-9]+]]:_(s32) = G_ADD [[UV57]], [[UV121]] + ; CHECK-NEXT: [[ADD58:%[0-9]+]]:_(s32) = G_ADD [[UV58]], [[UV122]] + ; CHECK-NEXT: [[ADD59:%[0-9]+]]:_(s32) = G_ADD [[UV59]], [[UV123]] + ; CHECK-NEXT: [[ADD60:%[0-9]+]]:_(s32) = G_ADD [[UV60]], [[UV124]] + ; CHECK-NEXT: [[ADD61:%[0-9]+]]:_(s32) = G_ADD [[UV61]], [[UV125]] + ; CHECK-NEXT: [[ADD62:%[0-9]+]]:_(s32) = G_ADD [[UV62]], [[UV126]] + ; CHECK-NEXT: [[ADD63:%[0-9]+]]:_(s32) = G_ADD [[UV63]], [[UV127]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD32]](s32), [[ADD33]](s32), [[ADD34]](s32), [[ADD35]](s32), [[ADD36]](s32), [[ADD37]](s32), [[ADD38]](s32), [[ADD39]](s32), [[ADD40]](s32), [[ADD41]](s32), [[ADD42]](s32), [[ADD43]](s32), [[ADD44]](s32), [[ADD45]](s32), [[ADD46]](s32), [[ADD47]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[ADD48]](s32), [[ADD49]](s32), [[ADD50]](s32), [[ADD51]](s32), [[ADD52]](s32), [[ADD53]](s32), [[ADD54]](s32), [[ADD55]](s32), [[ADD56]](s32), [[ADD57]](s32), [[ADD58]](s32), [[ADD59]](s32), [[ADD60]](s32), [[ADD61]](s32), [[ADD62]](s32), [[ADD63]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR1]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR2]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:_(<16 x s32>) = G_PHI [[DEF]](<16 x s32>), %bb.0, [[BUILD_VECTOR3]](<16 x s32>), %bb.1 + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[PHI]](<16 x s32>), [[PHI1]](<16 x s32>), [[PHI2]](<16 x s32>), [[PHI3]](<16 x s32>) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[CONCAT_VECTORS]](<64 x s32>) bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 @@ -768,26 +812,30 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_s64 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; CHECK: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(s64) = G_PHI [[COPY]](s64), %bb.0, [[MV]](s64), %bb.1 - ; CHECK: $vgpr0_vgpr1 = COPY [[PHI]](s64) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s64) = G_PHI [[COPY]](s64), %bb.0, [[MV]](s64), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](s64) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1, $vgpr2 @@ -818,34 +866,38 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v2s64 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; CHECK: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; CHECK: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; CHECK: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; CHECK: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; CHECK: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; CHECK: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<2 x s64>) = G_PHI [[COPY]](<2 x s64>), %bb.0, [[BUILD_VECTOR]](<2 x s64>), %bb.1 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[PHI]](<2 x s64>) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<2 x s64>) = G_PHI [[COPY]](<2 x s64>), %bb.0, [[BUILD_VECTOR]](<2 x s64>), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[PHI]](<2 x s64>) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 @@ -876,42 +928,46 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v3s64 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 - ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; CHECK: [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; CHECK: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; CHECK: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV6]], [[UV8]] - ; CHECK: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV7]], [[UV9]], [[UADDO1]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; CHECK: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; CHECK: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV4]](s64) - ; CHECK: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV10]], [[UV12]] - ; CHECK: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV11]], [[UV13]], [[UADDO3]] - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; CHECK: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV5]](s64) - ; CHECK: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV14]], [[UV16]] - ; CHECK: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV15]], [[UV17]], [[UADDO5]] - ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO4]](s32), [[UADDE4]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<3 x s64>) = G_PHI [[EXTRACT]](<3 x s64>), %bb.0, [[BUILD_VECTOR]](<3 x s64>), %bb.1 - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[PHI]](<3 x s64>), 0 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV6]], [[UV8]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV7]], [[UV9]], [[UADDO1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV4]](s64) + ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV10]], [[UV12]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV11]], [[UV13]], [[UADDO3]] + ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV5]](s64) + ; CHECK-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV14]], [[UV16]] + ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV15]], [[UV17]], [[UADDO5]] + ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO4]](s32), [[UADDE4]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<3 x s64>) = G_PHI [[EXTRACT]](<3 x s64>), %bb.0, [[BUILD_VECTOR]](<3 x s64>), %bb.1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[PHI]](<3 x s64>), 0 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 @@ -945,23 +1001,27 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_p3 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0, $vgpr1 - ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(p3) = G_PHI [[COPY]](p3), %bb.0, [[PTR_ADD]](p3), %bb.1 - ; CHECK: $vgpr0 = COPY [[PHI]](p3) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(p3) = G_PHI [[COPY]](p3), %bb.0, [[PTR_ADD]](p3), %bb.1 + ; CHECK-NEXT: $vgpr0 = COPY [[PHI]](p3) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0, $vgpr1 @@ -993,23 +1053,27 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_p5 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0, $vgpr1 - ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(p5) = G_PHI [[COPY]](p5), %bb.0, [[PTR_ADD]](p5), %bb.1 - ; CHECK: $vgpr0 = COPY [[PHI]](p5) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(p5) = G_PHI [[COPY]](p5), %bb.0, [[PTR_ADD]](p5), %bb.1 + ; CHECK-NEXT: $vgpr0 = COPY [[PHI]](p5) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0, $vgpr1 @@ -1041,23 +1105,27 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_p0 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(p0) = G_PHI [[COPY]](p0), %bb.0, [[PTR_ADD]](p0), %bb.1 - ; CHECK: $vgpr0_vgpr1 = COPY [[PHI]](p0) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(p0) = G_PHI [[COPY]](p0), %bb.0, [[PTR_ADD]](p0), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](p0) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1, $vgpr2 @@ -1089,23 +1157,27 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_p1 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(p1) = G_PHI [[COPY]](p1), %bb.0, [[PTR_ADD]](p1), %bb.1 - ; CHECK: $vgpr0_vgpr1 = COPY [[PHI]](p1) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(p1) = G_PHI [[COPY]](p1), %bb.0, [[PTR_ADD]](p1), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](p1) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1, $vgpr2 @@ -1137,23 +1209,27 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_p4 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(p4) = G_PHI [[COPY]](p4), %bb.0, [[PTR_ADD]](p4), %bb.1 - ; CHECK: $vgpr0_vgpr1 = COPY [[PHI]](p4) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(p4) = G_PHI [[COPY]](p4), %bb.0, [[PTR_ADD]](p4), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](p4) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1, $vgpr2 @@ -1185,22 +1261,26 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_p9999 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2 - ; CHECK: [[COPY:%[0-9]+]]:_(p9999) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[DEF:%[0-9]+]]:_(p9999) = G_IMPLICIT_DEF - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(p9999) = G_PHI [[COPY]](p9999), %bb.0, [[DEF]](p9999), %bb.1 - ; CHECK: $vgpr0_vgpr1 = COPY [[PHI]](p9999) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p9999) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p9999) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(p9999) = G_PHI [[COPY]](p9999), %bb.0, [[DEF]](p9999), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[PHI]](p9999) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1, $vgpr2 @@ -1231,24 +1311,28 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_s1 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0, $vgpr1 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[DEF]](s1), %bb.1 - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[PHI]](s1) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[TRUNC]](s1), %bb.0, [[DEF]](s1), %bb.1 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[PHI]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0, $vgpr1 @@ -1281,27 +1365,31 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_s7 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0, $vgpr1 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[TRUNC1]](s16), %bb.1 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK: $vgpr0 = COPY [[AND]](s32) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[TRUNC1]](s16), %bb.1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0, $vgpr1 @@ -1334,27 +1422,31 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_s8 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0, $vgpr1 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[TRUNC1]](s16), %bb.1 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK: $vgpr0 = COPY [[AND]](s32) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[TRUNC1]](s16), %bb.1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s16) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0, $vgpr1 @@ -1387,24 +1479,28 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_s16 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0, $vgpr1 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[DEF]](s16), %bb.1 - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[PHI]](s16) - ; CHECK: $vgpr0 = COPY [[ZEXT]](s32) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s16) = G_PHI [[TRUNC]](s16), %bb.0, [[DEF]](s16), %bb.1 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[PHI]](s16) + ; CHECK-NEXT: $vgpr0 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0, $vgpr1 @@ -1437,28 +1533,32 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_s128 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 - ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s128) - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s128) - ; CHECK: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV4]] - ; CHECK: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV5]], [[UADDO1]] - ; CHECK: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV6]], [[UADDE1]] - ; CHECK: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV3]], [[UV7]], [[UADDE3]] - ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32), [[UADDE4]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(s128) = G_PHI [[COPY]](s128), %bb.0, [[MV]](s128), %bb.1 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[PHI]](s128) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s128) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s128) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV4]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV5]], [[UADDO1]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV6]], [[UADDE1]] + ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV3]], [[UV7]], [[UADDE3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32), [[UADDE4]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s128) = G_PHI [[COPY]](s128), %bb.0, [[MV]](s128), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[PHI]](s128) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 @@ -1489,32 +1589,36 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_s256 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 - ; CHECK: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s256) - ; CHECK: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s256) - ; CHECK: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV8]] - ; CHECK: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV9]], [[UADDO1]] - ; CHECK: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV10]], [[UADDE1]] - ; CHECK: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV3]], [[UV11]], [[UADDE3]] - ; CHECK: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UV4]], [[UV12]], [[UADDE5]] - ; CHECK: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV13]], [[UADDE7]] - ; CHECK: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV6]], [[UV14]], [[UADDE9]] - ; CHECK: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV7]], [[UV15]], [[UADDE11]] - ; CHECK: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32), [[UADDE4]](s32), [[UADDE6]](s32), [[UADDE8]](s32), [[UADDE10]](s32), [[UADDE12]](s32) - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(s256) = G_PHI [[COPY]](s256), %bb.0, [[MV]](s256), %bb.1 - ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[PHI]](s256) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s256) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s256) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV8]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV9]], [[UADDO1]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV10]], [[UADDE1]] + ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV3]], [[UV11]], [[UADDE3]] + ; CHECK-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UV4]], [[UV12]], [[UADDE5]] + ; CHECK-NEXT: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV13]], [[UADDE7]] + ; CHECK-NEXT: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV6]], [[UV14]], [[UADDE9]] + ; CHECK-NEXT: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV7]], [[UV15]], [[UADDE11]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32), [[UADDE4]](s32), [[UADDE6]](s32), [[UADDE8]](s32), [[UADDE10]](s32), [[UADDE12]](s32) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s256) = G_PHI [[COPY]](s256), %bb.0, [[MV]](s256), %bb.1 + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[PHI]](s256) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8 @@ -1545,54 +1649,58 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: test_phi_v2s1 ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[AND1]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND2]](s32), [[AND3]] - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP2]](s1), %bb.1 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND4]](s32), [[AND5]] - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND6]](s32), [[AND7]] - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP3]](s1), %bb.1 - ; CHECK: [[PHI1:%[0-9]+]]:_(s1) = G_PHI [[ICMP1]](s1), %bb.0, [[ICMP4]](s1), %bb.1 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s1) - ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C5]] - ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C5]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31 + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND]](s32), [[AND1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND2]](s32), [[AND3]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP2]](s1), %bb.1 + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C3]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C3]](s32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C4]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND4]](s32), [[AND5]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C4]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[AND6]](s32), [[AND7]] + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP3]](s1), %bb.1 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:_(s1) = G_PHI [[ICMP1]](s1), %bb.0, [[ICMP4]](s1), %bb.1 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[PHI1]](s1) + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C5]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C5]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND8]](s32), [[AND9]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir index 6c17acdde8e2..da1491884bfd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck %s --- name: test_saddo_s7 @@ -9,21 +9,21 @@ body: | ; CHECK-LABEL: name: test_saddo_s7 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 7 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[COPY2]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK: $vgpr0 = COPY [[AND]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 7 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[COPY2]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -43,18 +43,18 @@ body: | ; CHECK-LABEL: name: test_saddo_s16 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[C]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK: $vgpr0 = COPY [[ADD]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -74,15 +74,15 @@ body: | ; CHECK-LABEL: name: test_saddo_s32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD]](s32), [[COPY]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK: $vgpr0 = COPY [[ADD]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD]](s32), [[COPY]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32), %3:_(s1) = G_SADDO %0, %1 @@ -99,19 +99,19 @@ body: | ; CHECK-LABEL: name: test_saddo_s64 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; CHECK: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; CHECK: $vgpr2 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64), %3:_(s1) = G_SADDO %0, %1 @@ -128,46 +128,46 @@ body: | ; CHECK-LABEL: name: test_saddo_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST1]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD1]], 16 - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[COPY2]] - ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG5]](s32), [[C2]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] - ; CHECK: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND2]](s32), [[AND3]](s32) - ; CHECK: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) - ; CHECK: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD1]], 16 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[COPY2]] + ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG5]](s32), [[C2]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND2]](s32), [[AND3]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>), %3:_(<2 x s1>) = G_SADDO %0, %1 @@ -183,80 +183,80 @@ body: | liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-LABEL: name: test_saddo_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST3]] - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD1]], 16 - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD2]], 16 - ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST5]], 16 - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[SEXT_INREG5]] - ; CHECK: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[COPY2]] - ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG7]](s32), [[COPY3]] - ; CHECK: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 - ; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG8]](s32), [[C1]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP]] - ; CHECK: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP1]] - ; CHECK: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP2]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C2]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C2]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32), [[AND8]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST3]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD1]], 16 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] + ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD2]], 16 + ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST5]], 16 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[SEXT_INREG5]] + ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[COPY2]] + ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG7]](s32), [[COPY3]] + ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG8]](s32), [[C1]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP1]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP2]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C2]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32), [[AND8]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -277,84 +277,84 @@ body: | ; CHECK-LABEL: name: test_saddo_v4s16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr1_vgpr2 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR2]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST3]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR3]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD1]], 16 - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR4]], 16 - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD2]], 16 - ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[SEXT_INREG5]] - ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD3]], 16 - ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR5]], 16 - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[SEXT_INREG7]] - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) - ; CHECK: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST8]], 16 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG8]](s32), [[COPY2]] - ; CHECK: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR6]], 16 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG9]](s32), [[COPY3]] - ; CHECK: [[SEXT_INREG10:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST9]], 16 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG10]](s32), [[COPY4]] - ; CHECK: [[SEXT_INREG11:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR7]], 16 - ; CHECK: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG11]](s32), [[C2]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP]] - ; CHECK: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP1]] - ; CHECK: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP6]], [[ICMP2]] - ; CHECK: [[XOR3:%[0-9]+]]:_(s1) = G_XOR [[ICMP7]], [[ICMP3]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR3]](s1) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; CHECK: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR2]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST3]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR1]], [[LSHR3]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD3]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD1]], 16 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR4]], 16 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] + ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD2]], 16 + ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[SEXT_INREG5]] + ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ADD3]], 16 + ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR5]], 16 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[SEXT_INREG7]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST8]], 16 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG8]](s32), [[COPY2]] + ; CHECK-NEXT: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR6]], 16 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG9]](s32), [[COPY3]] + ; CHECK-NEXT: [[SEXT_INREG10:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST9]], 16 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG10]](s32), [[COPY4]] + ; CHECK-NEXT: [[SEXT_INREG11:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR7]], 16 + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG11]](s32), [[C2]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP1]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP6]], [[ICMP2]] + ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(s1) = G_XOR [[ICMP7]], [[ICMP3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR3]](s1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr1_vgpr2 %2:_(<4 x s16>), %3:_(<4 x s1>) = G_SADDO %0, %1 @@ -371,29 +371,29 @@ body: | ; CHECK-LABEL: name: test_saddo_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV2]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD]](s32), [[UV4]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD1]](s32), [[UV5]] - ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV6]](s32), [[C]] - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV7]](s32), [[C]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] - ; CHECK: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CHECK: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD]](s32), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD1]](s32), [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV6]](s32), [[C]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV7]](s32), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>), %3:_(<2 x s1>) = G_SADDO %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir index 0330579b6144..7b0547e8b03b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: saddsat_s7 @@ -12,55 +12,55 @@ body: | ; GFX6-LABEL: name: saddsat_s7 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[ASHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) ; GFX8-LABEL: name: saddsat_s7 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C3]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C3]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[SHL]], [[SMIN1]] - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[ADD]], [[C]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[SHL]], [[SMIN1]] + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[ADD]], [[C]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: saddsat_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]] - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SADDSAT]], [[C]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SADDSAT]], [[C]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -78,55 +78,55 @@ body: | ; GFX6-LABEL: name: saddsat_s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[ASHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) ; GFX8-LABEL: name: saddsat_s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C3]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C3]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[SHL]], [[SMIN1]] - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[ADD]], [[C]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[SHL]], [[SMIN1]] + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[ADD]], [[C]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: saddsat_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]] - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SADDSAT]], [[C]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SADDSAT]], [[C]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -144,113 +144,113 @@ body: | ; GFX6-LABEL: name: saddsat_v2s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C4]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMAX]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C4]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SMIN]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C1]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) - ; GFX6: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C4]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMAX2]] - ; GFX6: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C4]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SMIN2]] - ; GFX6: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[SMIN3]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ADD1]], [[C1]](s32) - ; GFX6: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) - ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C5]] - ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C6]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C4]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C4]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C1]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C4]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMAX2]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C4]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SMIN2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ADD1]], [[C1]](s32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C5]] + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C6]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: saddsat_v2s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C4]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMAX]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C4]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C3]], [[SMIN]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[SHL]], [[SMIN1]] - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[ADD]], [[C1]](s16) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) - ; GFX8: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) - ; GFX8: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[SHL2]], [[C4]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMAX2]] - ; GFX8: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[SHL2]], [[C4]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[C3]], [[SMIN2]] - ; GFX8: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX8: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX8: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[SHL2]], [[SMIN3]] - ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ADD1]], [[C1]](s16) - ; GFX8: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C5]] - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; GFX8: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C4]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C4]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C3]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[SHL]], [[SMIN1]] + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[ADD]], [[C1]](s16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[SHL2]], [[C4]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMAX2]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[SHL2]], [[C4]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[C3]], [[SMIN2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[SHL2]], [[SMIN3]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ADD1]], [[C1]](s16) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C5]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: saddsat_v2s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[LSHR]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[LSHR1]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY2]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[SHL]], [[SHL1]] - ; GFX9: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SADDSAT]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SADDSAT]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -271,47 +271,47 @@ body: | ; GFX6-LABEL: name: saddsat_s16 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[ASHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) ; GFX8-LABEL: name: saddsat_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C2]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[SMAX]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C2]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMIN]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[TRUNC1]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[SMIN1]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C2]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C2]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[TRUNC1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: saddsat_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -329,82 +329,82 @@ body: | ; GFX6-LABEL: name: saddsat_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX2]] - ; GFX6: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN2]] - ; GFX6: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[SMIN3]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ADD1]], [[C]](s32) - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] - ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX2]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ADD1]], [[C]](s32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: saddsat_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[TRUNC2]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[SMIN1]] - ; GFX8: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX2]] - ; GFX8: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN2]] - ; GFX8: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC3]] - ; GFX8: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX8: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[SMIN3]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[TRUNC2]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX2]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC3]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[SMIN3]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: saddsat_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[SADDSAT]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SADDSAT]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SADDSAT %0, %1 @@ -419,169 +419,169 @@ body: | ; GFX6-LABEL: name: saddsat_v3s16 ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) - ; GFX6: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX2]] - ; GFX6: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN2]] - ; GFX6: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[SMIN3]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ADD1]], [[C]](s32) - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6: [[SMAX4:%[0-9]+]]:_(s32) = G_SMAX [[SHL4]], [[C3]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX4]] - ; GFX6: [[SMIN4:%[0-9]+]]:_(s32) = G_SMIN [[SHL4]], [[C3]] - ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN4]] - ; GFX6: [[SMAX5:%[0-9]+]]:_(s32) = G_SMAX [[SUB5]], [[SHL5]] - ; GFX6: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB4]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[SMIN5]] - ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ADD2]], [[C]](s32) - ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX2]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ADD1]], [[C]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; GFX6-NEXT: [[SMAX4:%[0-9]+]]:_(s32) = G_SMAX [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX4]] + ; GFX6-NEXT: [[SMIN4:%[0-9]+]]:_(s32) = G_SMIN [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN4]] + ; GFX6-NEXT: [[SMAX5:%[0-9]+]]:_(s32) = G_SMAX [[SUB5]], [[SHL5]] + ; GFX6-NEXT: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB4]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[SMIN5]] + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ADD2]], [[C]](s32) + ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: saddsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[TRUNC3]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[SMIN1]] - ; GFX8: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX2]] - ; GFX8: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN2]] - ; GFX8: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC4]] - ; GFX8: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX8: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[SMIN3]] - ; GFX8: [[SMAX4:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[C3]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX4]] - ; GFX8: [[SMIN4:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[C3]] - ; GFX8: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN4]] - ; GFX8: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB5]], [[TRUNC5]] - ; GFX8: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB4]] - ; GFX8: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[SMIN5]] - ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[TRUNC3]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX2]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC4]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[SMIN3]] + ; GFX8-NEXT: [[SMAX4:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX4]] + ; GFX8-NEXT: [[SMIN4:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN4]] + ; GFX8-NEXT: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB5]], [[TRUNC5]] + ; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB4]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[SMIN5]] + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: saddsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) - ; GFX9: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) - ; GFX9: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] - ; GFX9: [[SADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT]](<2 x s16>) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT1]](<2 x s16>) - ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[SADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT1]](<2 x s16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 %3:_(<3 x s16>) = G_SADDSAT %1, %2 @@ -598,148 +598,148 @@ body: | ; GFX6-LABEL: name: saddsat_v4s16 ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX2]] - ; GFX6: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN2]] - ; GFX6: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[SMIN3]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ADD1]], [[C]](s32) - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) - ; GFX6: [[SMAX4:%[0-9]+]]:_(s32) = G_SMAX [[SHL4]], [[C3]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX4]] - ; GFX6: [[SMIN4:%[0-9]+]]:_(s32) = G_SMIN [[SHL4]], [[C3]] - ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN4]] - ; GFX6: [[SMAX5:%[0-9]+]]:_(s32) = G_SMAX [[SUB5]], [[SHL5]] - ; GFX6: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB4]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[SMIN5]] - ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ADD2]], [[C]](s32) - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) - ; GFX6: [[SMAX6:%[0-9]+]]:_(s32) = G_SMAX [[SHL6]], [[C3]] - ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX6]] - ; GFX6: [[SMIN6:%[0-9]+]]:_(s32) = G_SMIN [[SHL6]], [[C3]] - ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN6]] - ; GFX6: [[SMAX7:%[0-9]+]]:_(s32) = G_SMAX [[SUB7]], [[SHL7]] - ; GFX6: [[SMIN7:%[0-9]+]]:_(s32) = G_SMIN [[SMAX7]], [[SUB6]] - ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SHL6]], [[SMIN7]] - ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ADD3]], [[C]](s32) - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] - ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C4]] - ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] - ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX2]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ADD1]], [[C]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SMAX4:%[0-9]+]]:_(s32) = G_SMAX [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX4]] + ; GFX6-NEXT: [[SMIN4:%[0-9]+]]:_(s32) = G_SMIN [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN4]] + ; GFX6-NEXT: [[SMAX5:%[0-9]+]]:_(s32) = G_SMAX [[SUB5]], [[SHL5]] + ; GFX6-NEXT: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB4]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[SMIN5]] + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ADD2]], [[C]](s32) + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) + ; GFX6-NEXT: [[SMAX6:%[0-9]+]]:_(s32) = G_SMAX [[SHL6]], [[C3]] + ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX6]] + ; GFX6-NEXT: [[SMIN6:%[0-9]+]]:_(s32) = G_SMIN [[SHL6]], [[C3]] + ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SMIN6]] + ; GFX6-NEXT: [[SMAX7:%[0-9]+]]:_(s32) = G_SMAX [[SUB7]], [[SHL7]] + ; GFX6-NEXT: [[SMIN7:%[0-9]+]]:_(s32) = G_SMIN [[SMAX7]], [[SUB6]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SHL6]], [[SMIN7]] + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ADD3]], [[C]](s32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C4]] + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX8-LABEL: name: saddsat_v4s16 ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[TRUNC4]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[SMIN1]] - ; GFX8: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX2]] - ; GFX8: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN2]] - ; GFX8: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC5]] - ; GFX8: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX8: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[SMIN3]] - ; GFX8: [[SMAX4:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[C3]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX4]] - ; GFX8: [[SMIN4:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[C3]] - ; GFX8: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN4]] - ; GFX8: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB5]], [[TRUNC6]] - ; GFX8: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB4]] - ; GFX8: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[SMIN5]] - ; GFX8: [[SMAX6:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC3]], [[C3]] - ; GFX8: [[SUB6:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX6]] - ; GFX8: [[SMIN6:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC3]], [[C3]] - ; GFX8: [[SUB7:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN6]] - ; GFX8: [[SMAX7:%[0-9]+]]:_(s16) = G_SMAX [[SUB7]], [[TRUNC7]] - ; GFX8: [[SMIN7:%[0-9]+]]:_(s16) = G_SMIN [[SMAX7]], [[SUB6]] - ; GFX8: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[SMIN7]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB1]], [[TRUNC4]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX2]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC5]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[SMIN3]] + ; GFX8-NEXT: [[SMAX4:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX4]] + ; GFX8-NEXT: [[SMIN4:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN4]] + ; GFX8-NEXT: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB5]], [[TRUNC6]] + ; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB4]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[SMIN5]] + ; GFX8-NEXT: [[SMAX6:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC3]], [[C3]] + ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(s16) = G_SUB [[C1]], [[SMAX6]] + ; GFX8-NEXT: [[SMIN6:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC3]], [[C3]] + ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(s16) = G_SUB [[C2]], [[SMIN6]] + ; GFX8-NEXT: [[SMAX7:%[0-9]+]]:_(s16) = G_SMAX [[SUB7]], [[TRUNC7]] + ; GFX8-NEXT: [[SMIN7:%[0-9]+]]:_(s16) = G_SMIN [[SMAX7]], [[SUB6]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[SMIN7]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: saddsat_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[UV]], [[UV2]] - ; GFX9: [[SADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[UV1]], [[UV3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SADDSAT]](<2 x s16>), [[SADDSAT1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[SADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SADDSAT]](<2 x s16>), [[SADDSAT1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_SADDSAT %0, %1 @@ -754,37 +754,37 @@ body: | ; GFX6-LABEL: name: saddsat_s32 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[C2]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[C2]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[COPY1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[SMIN1]] - ; GFX6: $vgpr0 = COPY [[ADD]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[C2]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[C2]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[COPY1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[SMIN1]] + ; GFX6-NEXT: $vgpr0 = COPY [[ADD]](s32) ; GFX8-LABEL: name: saddsat_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[C2]] - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[C2]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[COPY1]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[SMIN1]] - ; GFX8: $vgpr0 = COPY [[ADD]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[C2]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[C2]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[COPY1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[SMIN1]] + ; GFX8-NEXT: $vgpr0 = COPY [[ADD]](s32) ; GFX9-LABEL: name: saddsat_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[SADDSAT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SADDSAT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_SADDSAT %0, %1 @@ -799,61 +799,61 @@ body: | ; GFX6-LABEL: name: saddsat_v2s32 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[C2]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[C2]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[UV2]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[SMIN1]] - ; GFX6: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[C2]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX2]] - ; GFX6: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[C2]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN2]] - ; GFX6: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[UV3]] - ; GFX6: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[SMIN3]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) - ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[C2]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[C2]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[UV2]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[SMIN1]] + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[C2]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX2]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[C2]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[UV3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[SMIN3]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: saddsat_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[C2]] - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[C2]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[UV2]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[SMIN1]] - ; GFX8: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[C2]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX2]] - ; GFX8: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[C2]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN2]] - ; GFX8: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[UV3]] - ; GFX8: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] - ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[SMIN3]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[C2]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[C2]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[UV2]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB]] + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[C2]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C]], [[SMAX2]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[C2]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMIN2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[UV3]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB2]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[SMIN3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: saddsat_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[UV]], [[UV2]] - ; GFX9: [[SADDSAT1:%[0-9]+]]:_(s32) = G_SADDSAT [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SADDSAT]](s32), [[SADDSAT1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[SADDSAT1:%[0-9]+]]:_(s32) = G_SADDSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SADDSAT]](s32), [[SADDSAT1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_SADDSAT %0, %1 @@ -868,70 +868,70 @@ body: | ; GFX6-LABEL: name: saddsat_s64 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]] - ; GFX6: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX6: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX8-LABEL: name: saddsat_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]] - ; GFX8: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX8: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX9-LABEL: name: saddsat_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]] - ; GFX9: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s64), [[C]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_SADDSAT %0, %1 @@ -946,124 +946,124 @@ body: | ; GFX6-LABEL: name: saddsat_v2s64 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV2]](s64), [[C]] - ; GFX6: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV12]], [[UV14]] - ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]] - ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO4]](s32), [[UADDE4]](s32) - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV3]](s64), [[C]] - ; GFX6: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) - ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX6: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] - ; GFX6: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO7]] - ; GFX6: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO6]](s32), [[UADDE6]](s32) - ; GFX6: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV2]](s64), [[C]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX6-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV12]], [[UV14]] + ; GFX6-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO4]](s32), [[UADDE4]](s32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV3]](s64), [[C]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) + ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX6-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] + ; GFX6-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO7]] + ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO6]](s32), [[UADDE6]](s32) + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX8-LABEL: name: saddsat_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV2]](s64), [[C]] - ; GFX8: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV12]], [[UV14]] - ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]] - ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO4]](s32), [[UADDE4]](s32) - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV3]](s64), [[C]] - ; GFX8: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] - ; GFX8: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) - ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX8: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] - ; GFX8: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO7]] - ; GFX8: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO6]](s32), [[UADDE6]](s32) - ; GFX8: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV2]](s64), [[C]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV12]], [[UV14]] + ; GFX8-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO4]](s32), [[UADDE4]](s32) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV3]](s64), [[C]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] + ; GFX8-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO7]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO6]](s32), [[UADDE6]](s32) + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: saddsat_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV2]](s64), [[C]] - ; GFX9: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV12]], [[UV14]] - ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]] - ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO4]](s32), [[UADDE4]](s32) - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV3]](s64), [[C]] - ; GFX9: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] - ; GFX9: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) - ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX9: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] - ; GFX9: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO7]] - ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO6]](s32), [[UADDE6]](s32) - ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV2]](s64), [[C]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV12]], [[UV14]] + ; GFX9-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO4]](s32), [[UADDE4]](s32) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV3]](s64), [[C]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] + ; GFX9-NEXT: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO7]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO6]](s32), [[UADDE6]](s32) + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_SADDSAT %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sbfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sbfx.mir index 88d6a71d1383..b0afa6227800 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sbfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sbfx.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck --check-prefix=GCN %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck --check-prefix=GCN %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck --check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck --check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck --check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck --check-prefix=GCN %s ... --- name: test_sbfx_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir index 3b969e94d5ba..b2e80985ddf5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_sdiv_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir index 93224e57d07b..46a4616d1933 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX8 %s -# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX6 %s -# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_sext_inreg_s32_1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir index 70633234f553..f336e7b5aa8f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir @@ -12,19 +12,19 @@ body: | ; SI-LABEL: name: test_smax_s32 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[COPY1]] - ; SI: $vgpr0 = COPY [[SMAX]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[SMAX]](s32) ; VI-LABEL: name: test_smax_s32 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[COPY1]] - ; VI: $vgpr0 = COPY [[SMAX]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[SMAX]](s32) ; GFX9-LABEL: name: test_smax_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[SMAX]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SMAX]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_SMAX %0, %1 @@ -39,22 +39,22 @@ body: | ; SI-LABEL: name: test_smax_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]] - ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; SI: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; VI-LABEL: name: test_smax_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]] - ; VI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; VI: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]] + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX9-LABEL: name: test_smax_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY]](s64), [[COPY1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_SMAX %0, %1 @@ -69,27 +69,27 @@ body: | ; SI-LABEL: name: test_smax_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: $vgpr0 = COPY [[SMAX]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: $vgpr0 = COPY [[SMAX]](s32) ; VI-LABEL: name: test_smax_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_smax_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -107,34 +107,34 @@ body: | ; SI-LABEL: name: test_smax_s8 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: $vgpr0 = COPY [[SMAX]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: $vgpr0 = COPY [[SMAX]](s32) ; VI-LABEL: name: test_smax_s8 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C]](s16) - ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[ASHR]], [[ASHR1]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C]](s16) + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[ASHR]], [[ASHR1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_smax_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) - ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG1]](s32) - ; GFX9: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG1]](s32) + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -152,25 +152,25 @@ body: | ; SI-LABEL: name: test_smax_s17 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: $vgpr0 = COPY [[SMAX]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: $vgpr0 = COPY [[SMAX]](s32) ; VI-LABEL: name: test_smax_s17 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; VI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; VI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; VI: $vgpr0 = COPY [[SMAX]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 + ; VI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; VI-NEXT: $vgpr0 = COPY [[SMAX]](s32) ; GFX9-LABEL: name: test_smax_s17 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; GFX9: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9: $vgpr0 = COPY [[SMAX]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SMAX]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s17) = G_TRUNC %0 @@ -188,31 +188,31 @@ body: | ; SI-LABEL: name: test_smax_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV2]] - ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV3]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV2]] + ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_smax_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV2]] - ; VI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV2]] + ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_smax_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV2]] - ; GFX9: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV2]] + ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_SMAX %0, %1 @@ -227,34 +227,34 @@ body: | ; SI-LABEL: name: test_smax_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV3]] - ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV4]] - ; SI: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV2]], [[UV5]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV3]] + ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV4]] + ; SI-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV2]], [[UV5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_smax_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV3]] - ; VI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV4]] - ; VI: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV2]], [[UV5]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV3]] + ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV4]] + ; VI-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV2]], [[UV5]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_smax_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV3]] - ; GFX9: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV4]] - ; GFX9: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV2]], [[UV5]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[UV3]] + ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[UV4]] + ; GFX9-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV2]], [[UV5]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_SMAX %0, %1 @@ -269,50 +269,50 @@ body: | ; SI-LABEL: name: test_smax_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG2]], [[SEXT_INREG3]] - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[SMAX]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SMAX1]], [[C1]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 + ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG2]], [[SEXT_INREG3]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SMAX]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SMAX1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_smax_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC2]] - ; VI: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC3]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC2]] + ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_smax_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[SMAX]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SMAX]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SMAX %0, %1 @@ -327,73 +327,73 @@ body: | ; SI-LABEL: name: test_smax_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16 - ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG2]], [[SEXT_INREG3]] - ; SI: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 - ; SI: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG4]], [[SEXT_INREG5]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) - ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16 + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 + ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG2]], [[SEXT_INREG3]] + ; SI-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 + ; SI-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 + ; SI-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG4]], [[SEXT_INREG5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMAX]](s32), [[SMAX1]](s32), [[SMAX2]](s32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_smax_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC3]] - ; VI: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC4]] - ; VI: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[TRUNC5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC3]] + ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC4]] + ; VI-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[TRUNC5]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX]](s16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX1]](s16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SMAX2]](s16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_smax_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV8]], [[UV10]] - ; GFX9: [[SMAX1:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV9]], [[UV11]] - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV8]], [[UV10]] + ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV9]], [[UV11]] + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_SMAX %0, %1 @@ -409,90 +409,90 @@ body: | ; SI-LABEL: name: test_smax_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16 - ; SI: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 - ; SI: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG2]], [[SEXT_INREG3]] - ; SI: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 - ; SI: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG4]], [[SEXT_INREG5]] - ; SI: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 - ; SI: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG6]], [[SEXT_INREG7]] - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[SMAX]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SMAX1]], [[C1]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SMAX2]], [[C1]] - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SMAX3]], [[C1]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16 + ; SI-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 + ; SI-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG2]], [[SEXT_INREG3]] + ; SI-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 + ; SI-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 + ; SI-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG4]], [[SEXT_INREG5]] + ; SI-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 + ; SI-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 + ; SI-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SEXT_INREG6]], [[SEXT_INREG7]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SMAX]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SMAX1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SMAX2]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SMAX3]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_smax_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC4]] - ; VI: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC5]] - ; VI: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[TRUNC6]] - ; VI: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC3]], [[TRUNC7]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX2]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX3]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[TRUNC4]] + ; VI-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[TRUNC5]] + ; VI-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[TRUNC6]] + ; VI-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC3]], [[TRUNC7]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX2]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SMAX3]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_smax_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV]], [[UV2]] - ; GFX9: [[SMAX1:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV1]], [[UV3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SMAX]](<2 x s16>), [[SMAX1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV]], [[UV2]] + ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SMAX]](<2 x s16>), [[SMAX1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_SMAX %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir index c693402b7005..60ae4743796c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir @@ -12,19 +12,19 @@ body: | ; SI-LABEL: name: test_smin_s32 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[COPY1]] - ; SI: $vgpr0 = COPY [[SMIN]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[SMIN]](s32) ; VI-LABEL: name: test_smin_s32 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[COPY1]] - ; VI: $vgpr0 = COPY [[SMIN]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[SMIN]](s32) ; GFX9-LABEL: name: test_smin_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[SMIN]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SMIN]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_SMIN %0, %1 @@ -39,22 +39,22 @@ body: | ; SI-LABEL: name: test_smin_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] - ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; SI: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; VI-LABEL: name: test_smin_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] - ; VI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; VI: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX9-LABEL: name: test_smin_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[COPY1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_SMIN %0, %1 @@ -69,27 +69,27 @@ body: | ; SI-LABEL: name: test_smin_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: $vgpr0 = COPY [[SMIN]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: $vgpr0 = COPY [[SMIN]](s32) ; VI-LABEL: name: test_smin_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_smin_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -107,34 +107,34 @@ body: | ; SI-LABEL: name: test_smin_s8 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: $vgpr0 = COPY [[SMIN]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: $vgpr0 = COPY [[SMIN]](s32) ; VI-LABEL: name: test_smin_s8 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; VI: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; VI: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; VI: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C]](s16) - ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[ASHR]], [[ASHR1]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[C]](s16) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[C]](s16) + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[ASHR]], [[ASHR1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_smin_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) - ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG1]](s32) - ; GFX9: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 8 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32) + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG1]](s32) + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -152,25 +152,25 @@ body: | ; SI-LABEL: name: test_smin_s17 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: $vgpr0 = COPY [[SMIN]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: $vgpr0 = COPY [[SMIN]](s32) ; VI-LABEL: name: test_smin_s17 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; VI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; VI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; VI: $vgpr0 = COPY [[SMIN]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 + ; VI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; VI-NEXT: $vgpr0 = COPY [[SMIN]](s32) ; GFX9-LABEL: name: test_smin_s17 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 - ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 - ; GFX9: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9: $vgpr0 = COPY [[SMIN]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 17 + ; GFX9-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 17 + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SMIN]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s17) = G_TRUNC %0 @@ -188,31 +188,31 @@ body: | ; SI-LABEL: name: test_smin_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV2]] - ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV3]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV2]] + ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_smin_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV2]] - ; VI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV2]] + ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_smin_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV2]] - ; GFX9: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV2]] + ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_SMIN %0, %1 @@ -227,34 +227,34 @@ body: | ; SI-LABEL: name: test_smin_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV3]] - ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV4]] - ; SI: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV2]], [[UV5]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV3]] + ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV4]] + ; SI-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV2]], [[UV5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_smin_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV3]] - ; VI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV4]] - ; VI: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV2]], [[UV5]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV3]] + ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV4]] + ; VI-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV2]], [[UV5]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_smin_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV3]] - ; GFX9: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV4]] - ; GFX9: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV2]], [[UV5]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[UV3]] + ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[UV4]] + ; GFX9-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV2]], [[UV5]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_SMIN %0, %1 @@ -269,50 +269,50 @@ body: | ; SI-LABEL: name: test_smin_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG2]], [[SEXT_INREG3]] - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[SMIN]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SMIN1]], [[C1]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 + ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG2]], [[SEXT_INREG3]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SMIN]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SMIN1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_smin_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC2]] - ; VI: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC3]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC2]] + ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_smin_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[SMIN]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SMIN]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SMIN %0, %1 @@ -327,73 +327,73 @@ body: | ; SI-LABEL: name: test_smin_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16 - ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG2]], [[SEXT_INREG3]] - ; SI: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 - ; SI: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG4]], [[SEXT_INREG5]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) - ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16 + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 + ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG2]], [[SEXT_INREG3]] + ; SI-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 + ; SI-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 + ; SI-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG4]], [[SEXT_INREG5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[SMIN]](s32), [[SMIN1]](s32), [[SMIN2]](s32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_smin_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC3]] - ; VI: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC4]] - ; VI: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[TRUNC5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC3]] + ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC4]] + ; VI-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[TRUNC5]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN]](s16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN1]](s16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SMIN2]](s16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_smin_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV8]], [[UV10]] - ; GFX9: [[SMIN1:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV9]], [[UV11]] - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV8]], [[UV10]] + ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV9]], [[UV11]] + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_SMIN %0, %1 @@ -409,90 +409,90 @@ body: | ; SI-LABEL: name: test_smin_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 - ; SI: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16 - ; SI: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] - ; SI: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 - ; SI: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 - ; SI: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG2]], [[SEXT_INREG3]] - ; SI: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 - ; SI: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 - ; SI: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG4]], [[SEXT_INREG5]] - ; SI: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 - ; SI: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 - ; SI: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG6]], [[SEXT_INREG7]] - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[SMIN]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SMIN1]], [[C1]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SMIN2]], [[C1]] - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SMIN3]], [[C1]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST]], 16 + ; SI-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST2]], 16 + ; SI-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG]], [[SEXT_INREG1]] + ; SI-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR]], 16 + ; SI-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 + ; SI-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG2]], [[SEXT_INREG3]] + ; SI-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST1]], 16 + ; SI-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 + ; SI-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG4]], [[SEXT_INREG5]] + ; SI-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR1]], 16 + ; SI-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 + ; SI-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SEXT_INREG6]], [[SEXT_INREG7]] + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SMIN]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SMIN1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SMIN2]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SMIN3]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_smin_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC4]] - ; VI: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC5]] - ; VI: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[TRUNC6]] - ; VI: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC3]], [[TRUNC7]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN2]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN3]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[TRUNC4]] + ; VI-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[TRUNC5]] + ; VI-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[TRUNC6]] + ; VI-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC3]], [[TRUNC7]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN2]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SMIN3]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_smin_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV]], [[UV2]] - ; GFX9: [[SMIN1:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV1]], [[UV3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SMIN]](<2 x s16>), [[SMIN1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV]], [[UV2]] + ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SMIN]](<2 x s16>), [[SMIN1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_SMIN %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir index cc23f0e64e56..841aac4304bc 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_srem_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir index 9ad2e5c30258..21a553538466 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: sshlsat_s7 @@ -12,64 +12,64 @@ body: | ; GFX6-LABEL: name: sshlsat_s7 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) - ; GFX6: $vgpr0 = COPY [[ASHR1]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR1]](s32) ; GFX8-LABEL: name: sshlsat_s7 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C4]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C4]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: sshlsat_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C4]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C4]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -87,64 +87,64 @@ body: | ; GFX6-LABEL: name: sshlsat_s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) - ; GFX6: $vgpr0 = COPY [[ASHR1]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR1]](s32) ; GFX8-LABEL: name: sshlsat_s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C4]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C4]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: sshlsat_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C4]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C4]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C1]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -162,121 +162,121 @@ body: | ; GFX6-LABEL: name: sshlsat_v2s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C5]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[C4]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C5]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C3]], [[C4]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] - ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C1]](s32) - ; GFX6: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C6]] - ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C2]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC1]] - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C5]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[C4]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C5]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C3]], [[C4]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C1]](s32) + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C6]] + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C2]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC1]] + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: sshlsat_v2s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C5]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C3]], [[C4]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C2]](s16) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C2]](s16) - ; GFX8: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND1]](s16) - ; GFX8: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[AND1]](s16) - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s16), [[C5]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C3]], [[C4]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[ASHR2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] - ; GFX8: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SELECT3]], [[C2]](s16) - ; GFX8: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C1]] - ; GFX8: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ASHR3]], [[C1]] - ; GFX8: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C5]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C3]], [[C4]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C2]](s16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C2]](s16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND1]](s16) + ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[AND1]](s16) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s16), [[C5]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C3]], [[C4]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[ASHR2]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] + ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SELECT3]], [[C2]](s16) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ASHR3]], [[C1]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: sshlsat_v2s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C5]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C3]], [[C4]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C2]](s16) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C2]](s16) - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND1]](s16) - ; GFX9: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[AND1]](s16) - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s16), [[C5]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C3]], [[C4]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[ASHR2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] - ; GFX9: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SELECT3]], [[C2]](s16) - ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C1]] - ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ASHR3]], [[C1]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[AND]](s16) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s16), [[C5]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C3]], [[C4]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SELECT1]], [[C2]](s16) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C2]](s16) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND1]](s16) + ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[AND1]](s16) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s16), [[C5]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C3]], [[C4]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[ASHR2]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] + ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SELECT3]], [[C2]](s16) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C1]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[ASHR3]], [[C1]] + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -297,54 +297,54 @@ body: | ; GFX6-LABEL: name: sshlsat_s16 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) - ; GFX6: $vgpr0 = COPY [[ASHR1]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C1]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR1]](s32) ; GFX8-LABEL: name: sshlsat_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC1]](s16) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC1]](s16) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: sshlsat_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC1]](s16) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -362,104 +362,104 @@ body: | ; GFX6-LABEL: name: sshlsat_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C4]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] - ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: sshlsat_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC2]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) - ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC3]](s16) - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC2]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC3]](s16) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: sshlsat_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC2]](s16) - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) - ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC3]](s16) - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC2]](s16) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC3]](s16) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SSHLSAT %0, %1 @@ -474,183 +474,183 @@ body: | ; GFX6-LABEL: name: sshlsat_v3s16 ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C4]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] - ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) - ; GFX6: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[AND2]](s32) - ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL4]](s32), [[C4]] - ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[ASHR4]] - ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL5]] - ; GFX6: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SELECT5]], [[C]](s32) - ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] - ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR5]], [[C1]] - ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL7]] - ; GFX6: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX6: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL8]] - ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) + ; GFX6-NEXT: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[AND2]](s32) + ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL4]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[ASHR4]] + ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL5]] + ; GFX6-NEXT: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SELECT5]], [[C]](s32) + ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR5]], [[C1]] + ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; GFX6-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: sshlsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC3]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) - ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC4]](s16) - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) - ; GFX8: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[TRUNC5]](s16) - ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC2]](s16), [[C3]] - ; GFX8: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] - ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] - ; GFX8: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] - ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) - ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX8: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT5]](s16) - ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; GFX8: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] - ; GFX8: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] - ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC3]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC4]](s16) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) + ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[TRUNC5]](s16) + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC2]](s16), [[C3]] + ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] + ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT5]](s16) + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] + ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] + ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: sshlsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC3]](s16) - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) - ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC4]](s16) - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) - ; GFX9: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[TRUNC5]](s16) - ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC2]](s16), [[C3]] - ; GFX9: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] - ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] - ; GFX9: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT5]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST4]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST5]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC3]](s16) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC4]](s16) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) + ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[TRUNC5]](s16) + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC2]](s16), [[C3]] + ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT5]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST4]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 %3:_(<3 x s16>) = G_SSHLSAT %1, %2 @@ -667,188 +667,188 @@ body: | ; GFX6-LABEL: name: sshlsat_v4s16 ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C4]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] - ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) - ; GFX6: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[AND2]](s32) - ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL4]](s32), [[C4]] - ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[ASHR4]] - ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL5]] - ; GFX6: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SELECT5]], [[C]](s32) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[SHL6]], [[AND3]](s32) - ; GFX6: [[ASHR6:%[0-9]+]]:_(s32) = G_ASHR [[SHL7]], [[AND3]](s32) - ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL6]](s32), [[C4]] - ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s1), [[C2]], [[C3]] - ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL6]](s32), [[ASHR6]] - ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s1), [[SELECT6]], [[SHL7]] - ; GFX6: [[ASHR7:%[0-9]+]]:_(s32) = G_ASHR [[SELECT7]], [[C]](s32) - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ASHR5]], [[C1]] - ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ASHR7]], [[C1]] - ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL9]] - ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL2]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[ASHR2]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL3]] + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SELECT3]], [[C]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) + ; GFX6-NEXT: [[ASHR4:%[0-9]+]]:_(s32) = G_ASHR [[SHL5]], [[AND2]](s32) + ; GFX6-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL4]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[ASHR4]] + ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL5]] + ; GFX6-NEXT: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SELECT5]], [[C]](s32) + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[SHL6]], [[AND3]](s32) + ; GFX6-NEXT: [[ASHR6:%[0-9]+]]:_(s32) = G_ASHR [[SHL7]], [[AND3]](s32) + ; GFX6-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SHL6]](s32), [[C4]] + ; GFX6-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s1), [[C2]], [[C3]] + ; GFX6-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL6]](s32), [[ASHR6]] + ; GFX6-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s1), [[SELECT6]], [[SHL7]] + ; GFX6-NEXT: [[ASHR7:%[0-9]+]]:_(s32) = G_ASHR [[SELECT7]], [[C]](s32) + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ASHR5]], [[C1]] + ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ASHR7]], [[C1]] + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL9]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX8-LABEL: name: sshlsat_v4s16 ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC4]](s16) - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC4]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) - ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC5]](s16) - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) - ; GFX8: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[TRUNC6]](s16) - ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC2]](s16), [[C3]] - ; GFX8: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] - ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] - ; GFX8: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] - ; GFX8: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) - ; GFX8: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[TRUNC7]](s16) - ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC3]](s16), [[C3]] - ; GFX8: [[SELECT6:%[0-9]+]]:_(s16) = G_SELECT [[ICMP6]](s1), [[C1]], [[C2]] - ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC3]](s16), [[ASHR3]] - ; GFX8: [[SELECT7:%[0-9]+]]:_(s16) = G_SELECT [[ICMP7]](s1), [[SELECT6]], [[SHL3]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) - ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT5]](s16) - ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT7]](s16) - ; GFX8: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC4]](s16) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC4]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC5]](s16) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) + ; GFX8-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[TRUNC6]](s16) + ; GFX8-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC2]](s16), [[C3]] + ; GFX8-NEXT: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] + ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) + ; GFX8-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[TRUNC7]](s16) + ; GFX8-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC3]](s16), [[C3]] + ; GFX8-NEXT: [[SELECT6:%[0-9]+]]:_(s16) = G_SELECT [[ICMP6]](s1), [[C1]], [[C2]] + ; GFX8-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC3]](s16), [[ASHR3]] + ; GFX8-NEXT: [[SELECT7:%[0-9]+]]:_(s16) = G_SELECT [[ICMP7]](s1), [[SELECT6]], [[SHL3]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT5]](s16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT7]](s16) + ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: sshlsat_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC4]](s16) - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC4]](s16) - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) - ; GFX9: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC5]](s16) - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) - ; GFX9: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[TRUNC6]](s16) - ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC2]](s16), [[C3]] - ; GFX9: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] - ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] - ; GFX9: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) - ; GFX9: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[TRUNC7]](s16) - ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC3]](s16), [[C3]] - ; GFX9: [[SELECT6:%[0-9]+]]:_(s16) = G_SELECT [[ICMP6]](s1), [[C1]], [[C2]] - ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC3]](s16), [[ASHR3]] - ; GFX9: [[SELECT7:%[0-9]+]]:_(s16) = G_SELECT [[ICMP7]](s1), [[SELECT6]], [[SHL3]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT5]](s16) - ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT7]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC4]](s16) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC4]](s16) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC]](s16), [[C3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SHL1]], [[TRUNC5]](s16) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC1]](s16), [[C3]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[ASHR1]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) + ; GFX9-NEXT: [[ASHR2:%[0-9]+]]:_(s16) = G_ASHR [[SHL2]], [[TRUNC6]](s16) + ; GFX9-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC2]](s16), [[C3]] + ; GFX9-NEXT: [[SELECT4:%[0-9]+]]:_(s16) = G_SELECT [[ICMP4]](s1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] + ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) + ; GFX9-NEXT: [[ASHR3:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[TRUNC7]](s16) + ; GFX9-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[TRUNC3]](s16), [[C3]] + ; GFX9-NEXT: [[SELECT6:%[0-9]+]]:_(s16) = G_SELECT [[ICMP6]](s1), [[C1]], [[C2]] + ; GFX9-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC3]](s16), [[ASHR3]] + ; GFX9-NEXT: [[SELECT7:%[0-9]+]]:_(s16) = G_SELECT [[ICMP7]](s1), [[SELECT6]], [[SHL3]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT5]](s16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT7]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_SSHLSAT %0, %1 @@ -863,43 +863,43 @@ body: | ; GFX6-LABEL: name: sshlsat_s32 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[COPY1]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[ASHR]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX6: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[COPY1]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX6-NEXT: $vgpr0 = COPY [[SELECT1]](s32) ; GFX8-LABEL: name: sshlsat_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[ASHR]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: $vgpr0 = COPY [[SELECT1]](s32) ; GFX9-LABEL: name: sshlsat_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[COPY1]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[ASHR]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9: $vgpr0 = COPY [[SELECT1]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: $vgpr0 = COPY [[SELECT1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_SSHLSAT %0, %1 @@ -914,70 +914,70 @@ body: | ; GFX6-LABEL: name: sshlsat_v2s32 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[UV2]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s32), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[ASHR]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[UV3]](s32) - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C2]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[ASHR1]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) - ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[UV2]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s32), [[C2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[UV3]](s32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C2]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[ASHR1]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: sshlsat_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) - ; GFX8: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[UV2]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s32), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[ASHR]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) - ; GFX8: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[UV3]](s32) - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C2]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[ASHR1]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[UV2]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s32), [[C2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[UV3]](s32) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C2]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[ASHR1]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: sshlsat_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) - ; GFX9: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[UV2]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s32), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[ASHR]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) - ; GFX9: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[UV3]](s32) - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C2]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[ASHR1]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL]], [[UV2]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s32), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SHL1]], [[UV3]](s32) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s32), [[C2]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[ASHR1]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_SSHLSAT %0, %1 @@ -992,46 +992,46 @@ body: | ; GFX6-LABEL: name: sshlsat_s64 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[ASHR]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX6: $vgpr0_vgpr1 = COPY [[SELECT1]](s64) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[C2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT1]](s64) ; GFX8-LABEL: name: sshlsat_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[ASHR]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8: $vgpr0_vgpr1 = COPY [[SELECT1]](s64) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[C2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT1]](s64) ; GFX9-LABEL: name: sshlsat_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[ASHR]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT1]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s64), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT1]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_SSHLSAT %0, %1 @@ -1046,76 +1046,76 @@ body: | ; GFX6-LABEL: name: sshlsat_v2s64 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) - ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s64), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[ASHR]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; GFX6: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) - ; GFX6: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[TRUNC1]](s32) - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s64), [[C2]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[ASHR1]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT1]](s64), [[SELECT3]](s64) - ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s64), [[C2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[ASHR]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[TRUNC1]](s32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s64), [[C2]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[ASHR1]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT1]](s64), [[SELECT3]](s64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX8-LABEL: name: sshlsat_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) - ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s64), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[ASHR]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; GFX8: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) - ; GFX8: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[TRUNC1]](s32) - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s64), [[C2]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[ASHR1]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT1]](s64), [[SELECT3]](s64) - ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s64), [[C2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[ASHR]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[TRUNC1]](s32) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s64), [[C2]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[ASHR1]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT1]](s64), [[SELECT3]](s64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: sshlsat_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) - ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s64), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[ASHR]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; GFX9: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) - ; GFX9: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[TRUNC1]](s32) - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s64), [[C2]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[ASHR1]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT1]](s64), [[SELECT3]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV]](s64), [[C2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[C1]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[ASHR]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[TRUNC1]](s32) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[UV1]](s64), [[C2]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[C]], [[C1]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[ASHR1]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[SELECT2]], [[SHL1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT1]](s64), [[SELECT3]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_SSHLSAT %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir index 348838dbdac7..8a4f380c02ae 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck %s --- name: test_ssubo_s7 @@ -9,21 +9,21 @@ body: | ; CHECK-LABEL: name: test_ssubo_s7 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 7 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG2]](s32), [[COPY2]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK: $vgpr0 = COPY [[AND]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 7 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 7 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 7 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG2]](s32), [[COPY2]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[AND]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -43,18 +43,18 @@ body: | ; CHECK-LABEL: name: test_ssubo_s16 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG2]](s32), [[C]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK: $vgpr0 = COPY [[SUB]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG2]](s32), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[SUB]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -74,15 +74,15 @@ body: | ; CHECK-LABEL: name: test_ssubo_s32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[COPY]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s32), [[C]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK: $vgpr0 = COPY [[SUB]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[COPY]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[SUB]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32), %3:_(s1) = G_SSUBO %0, %1 @@ -99,19 +99,19 @@ body: | ; CHECK-LABEL: name: test_ssubo_s64 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; CHECK: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) - ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; CHECK: $vgpr2 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64), %3:_(s1) = G_SSUBO %0, %1 @@ -128,46 +128,46 @@ body: | ; CHECK-LABEL: name: test_ssubo_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[BITCAST]], [[BITCAST1]] - ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR1]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB1]], 16 - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG4]](s32), [[COPY2]] - ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG5]](s32), [[C2]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] - ; CHECK: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND2]](s32), [[AND3]](s32) - ; CHECK: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) - ; CHECK: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST3]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB1]], 16 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG4]](s32), [[COPY2]] + ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG5]](s32), [[C2]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND2]](s32), [[AND3]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>), %3:_(<2 x s1>) = G_SSUBO %0, %1 @@ -183,80 +183,80 @@ body: | liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-LABEL: name: test_ssubo_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[BITCAST]], [[BITCAST2]] - ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR1]] - ; CHECK: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[BITCAST1]], [[BITCAST3]] - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB1]], 16 - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB2]], 16 - ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST5]], 16 - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[SEXT_INREG5]] - ; CHECK: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG6]](s32), [[COPY2]] - ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG7]](s32), [[COPY3]] - ; CHECK: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 - ; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG8]](s32), [[C1]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP]] - ; CHECK: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP1]] - ; CHECK: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP2]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C2]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C2]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C2]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C2]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32), [[AND8]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[BITCAST]], [[BITCAST2]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[BITCAST1]], [[BITCAST3]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST4]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB1]], 16 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR2]], 16 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] + ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB2]], 16 + ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST5]], 16 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[SEXT_INREG5]] + ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG6]](s32), [[COPY2]] + ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR3]], 16 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG7]](s32), [[COPY3]] + ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG8]](s32), [[C1]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP1]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP2]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C2]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32), [[AND8]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -277,84 +277,84 @@ body: | ; CHECK-LABEL: name: test_ssubo_v4s16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr1_vgpr2 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[BITCAST]], [[BITCAST2]] - ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR2]] - ; CHECK: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[BITCAST1]], [[BITCAST3]] - ; CHECK: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[LSHR1]], [[LSHR3]] - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB3]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 - ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] - ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB1]], 16 - ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR4]], 16 - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] - ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB2]], 16 - ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[SEXT_INREG5]] - ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB3]], 16 - ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR5]], 16 - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[SEXT_INREG7]] - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) - ; CHECK: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST8]], 16 - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG8]](s32), [[COPY2]] - ; CHECK: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR6]], 16 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG9]](s32), [[COPY3]] - ; CHECK: [[SEXT_INREG10:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST9]], 16 - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; CHECK: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG10]](s32), [[COPY4]] - ; CHECK: [[SEXT_INREG11:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR7]], 16 - ; CHECK: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG11]](s32), [[C2]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP]] - ; CHECK: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP1]] - ; CHECK: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP6]], [[ICMP2]] - ; CHECK: [[XOR3:%[0-9]+]]:_(s1) = G_XOR [[ICMP7]], [[ICMP3]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR3]](s1) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] - ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] - ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; CHECK: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[BITCAST]], [[BITCAST2]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[LSHR]], [[LSHR2]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[BITCAST1]], [[BITCAST3]] + ; CHECK-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[LSHR1]], [[LSHR3]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[SUB1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB2]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB3]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB]], 16 + ; CHECK-NEXT: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST6]], 16 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK-NEXT: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB1]], 16 + ; CHECK-NEXT: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR4]], 16 + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]] + ; CHECK-NEXT: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB2]], 16 + ; CHECK-NEXT: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST7]], 16 + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG4]](s32), [[SEXT_INREG5]] + ; CHECK-NEXT: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SUB3]], 16 + ; CHECK-NEXT: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR5]], 16 + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[SEXT_INREG7]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; CHECK-NEXT: [[SEXT_INREG8:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST8]], 16 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG8]](s32), [[COPY2]] + ; CHECK-NEXT: [[SEXT_INREG9:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR6]], 16 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG9]](s32), [[COPY3]] + ; CHECK-NEXT: [[SEXT_INREG10:%[0-9]+]]:_(s32) = G_SEXT_INREG [[BITCAST9]], 16 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG10]](s32), [[COPY4]] + ; CHECK-NEXT: [[SEXT_INREG11:%[0-9]+]]:_(s32) = G_SEXT_INREG [[LSHR7]], 16 + ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG11]](s32), [[C2]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP4]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP5]], [[ICMP1]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[ICMP6]], [[ICMP2]] + ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(s1) = G_XOR [[ICMP7]], [[ICMP3]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR2]](s1) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR3]](s1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C3]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C3]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C3]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32), [[AND6]](s32), [[AND7]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr1_vgpr2 %2:_(<4 x s16>), %3:_(<4 x s1>) = G_SSUBO %0, %1 @@ -371,29 +371,29 @@ body: | ; CHECK-LABEL: name: test_ssubo_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[UV2]] - ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[UV3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB]](s32), [[SUB1]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[UV4]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB1]](s32), [[UV5]] - ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV6]](s32), [[C]] - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV7]](s32), [[C]] - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] - ; CHECK: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CHECK: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[UV2]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB]](s32), [[SUB1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB]](s32), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SUB1]](s32), [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV6]](s32), [[C]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV7]](s32), [[C]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP2]], [[ICMP]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[XOR1]](s1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>), %3:_(<2 x s1>) = G_SSUBO %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir index 4fdea4bcf996..e2516e5d79f7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: ssubsat_s7 @@ -12,55 +12,55 @@ body: | ; GFX6-LABEL: name: ssubsat_s7 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[ASHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) ; GFX8-LABEL: name: ssubsat_s7 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C3]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C3]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[SHL1]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[SHL]], [[SMIN1]] - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SUB2]], [[C]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[SHL1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[SHL]], [[SMIN1]] + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SUB2]], [[C]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: ssubsat_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]] - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SSUBSAT]], [[C]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SSUBSAT]], [[C]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -78,55 +78,55 @@ body: | ; GFX6-LABEL: name: ssubsat_s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[ASHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) ; GFX8-LABEL: name: ssubsat_s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C3]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C3]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[SHL1]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[SHL]], [[SMIN1]] - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SUB2]], [[C]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[SHL1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[SHL]], [[SMIN1]] + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SUB2]], [[C]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: ssubsat_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]] - ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SSUBSAT]], [[C]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SSUBSAT]], [[C]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -144,113 +144,113 @@ body: | ; GFX6-LABEL: name: ssubsat_v2s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C4]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C2]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C4]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C3]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C1]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) - ; GFX6: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C4]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C2]] - ; GFX6: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C4]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C3]] - ; GFX6: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[SMIN3]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SUB5]], [[C1]](s32) - ; GFX6: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) - ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C5]] - ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C6]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C4]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C2]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C4]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C3]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C1]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C4]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C2]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C4]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C3]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SUB5]], [[C1]](s32) + ; GFX6-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C5]] + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C6]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: ssubsat_v2s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C4]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C2]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C4]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C3]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[SHL1]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[SHL]], [[SMIN1]] - ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SUB2]], [[C1]](s16) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) - ; GFX8: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) - ; GFX8: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[SHL2]], [[C4]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[SMAX2]], [[C2]] - ; GFX8: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[SHL2]], [[C4]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[SMIN2]], [[C3]] - ; GFX8: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX8: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX8: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[SHL2]], [[SMIN3]] - ; GFX8: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SUB5]], [[C1]](s16) - ; GFX8: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C5]] - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] - ; GFX8: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[SHL]], [[C4]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C2]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[SHL]], [[C4]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C3]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[SHL1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[SHL]], [[SMIN1]] + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SUB2]], [[C1]](s16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[SHL2]], [[C4]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[SMAX2]], [[C2]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[SHL2]], [[C4]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[SMIN2]], [[C3]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[SHL2]], [[SMIN3]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[SUB5]], [[C1]](s16) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ASHR]], [[C5]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ASHR1]], [[C5]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: ssubsat_v2s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[LSHR]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[LSHR1]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY2]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[SHL]], [[SHL1]] - ; GFX9: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SSUBSAT]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SSUBSAT]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -271,47 +271,47 @@ body: | ; GFX6-LABEL: name: ssubsat_s16 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[ASHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[ASHR]](s32) ; GFX8-LABEL: name: ssubsat_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C2]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C2]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C1]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[TRUNC1]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[SMIN1]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB2]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C2]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C2]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C1]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[TRUNC1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SUB2]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: ssubsat_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -329,82 +329,82 @@ body: | ; GFX6-LABEL: name: ssubsat_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C1]] - ; GFX6: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C2]] - ; GFX6: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[SMIN3]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SUB5]], [[C]](s32) - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] - ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C1]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SUB5]], [[C]](s32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: ssubsat_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[TRUNC2]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[SMIN1]] - ; GFX8: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[SMAX2]], [[C1]] - ; GFX8: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[SMIN2]], [[C2]] - ; GFX8: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC3]] - ; GFX8: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX8: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[SMIN3]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB5]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[TRUNC2]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[SMAX2]], [[C1]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[SMIN2]], [[C2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC3]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[SMIN3]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB5]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: ssubsat_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[SSUBSAT]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SSUBSAT]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SSUBSAT %0, %1 @@ -419,169 +419,169 @@ body: | ; GFX6-LABEL: name: ssubsat_v3s16 ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) - ; GFX6: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C1]] - ; GFX6: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C2]] - ; GFX6: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[SMIN3]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SUB5]], [[C]](s32) - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6: [[SMAX4:%[0-9]+]]:_(s32) = G_SMAX [[SHL4]], [[C3]] - ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SMAX4]], [[C1]] - ; GFX6: [[SMIN4:%[0-9]+]]:_(s32) = G_SMIN [[SHL4]], [[C3]] - ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SMIN4]], [[C2]] - ; GFX6: [[SMAX5:%[0-9]+]]:_(s32) = G_SMAX [[SUB6]], [[SHL5]] - ; GFX6: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB7]] - ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[SMIN5]] - ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SUB8]], [[C]](s32) - ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C1]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SUB5]], [[C]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; GFX6-NEXT: [[SMAX4:%[0-9]+]]:_(s32) = G_SMAX [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SMAX4]], [[C1]] + ; GFX6-NEXT: [[SMIN4:%[0-9]+]]:_(s32) = G_SMIN [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SMIN4]], [[C2]] + ; GFX6-NEXT: [[SMAX5:%[0-9]+]]:_(s32) = G_SMAX [[SUB6]], [[SHL5]] + ; GFX6-NEXT: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB7]] + ; GFX6-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[SMIN5]] + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SUB8]], [[C]](s32) + ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: ssubsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[TRUNC3]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[SMIN1]] - ; GFX8: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[SMAX2]], [[C1]] - ; GFX8: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[SMIN2]], [[C2]] - ; GFX8: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC4]] - ; GFX8: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX8: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[SMIN3]] - ; GFX8: [[SMAX4:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[C3]] - ; GFX8: [[SUB6:%[0-9]+]]:_(s16) = G_SUB [[SMAX4]], [[C1]] - ; GFX8: [[SMIN4:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[C3]] - ; GFX8: [[SUB7:%[0-9]+]]:_(s16) = G_SUB [[SMIN4]], [[C2]] - ; GFX8: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB6]], [[TRUNC5]] - ; GFX8: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB7]] - ; GFX8: [[SUB8:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[SMIN5]] - ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB5]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB8]](s16) - ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[TRUNC3]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[SMAX2]], [[C1]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[SMIN2]], [[C2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC4]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[SMIN3]] + ; GFX8-NEXT: [[SMAX4:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(s16) = G_SUB [[SMAX4]], [[C1]] + ; GFX8-NEXT: [[SMIN4:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(s16) = G_SUB [[SMIN4]], [[C2]] + ; GFX8-NEXT: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB6]], [[TRUNC5]] + ; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB7]] + ; GFX8-NEXT: [[SUB8:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[SMIN5]] + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB5]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB8]](s16) + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: ssubsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) - ; GFX9: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) - ; GFX9: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] - ; GFX9: [[SSUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT]](<2 x s16>) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT1]](<2 x s16>) - ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[SSUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT1]](<2 x s16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 %3:_(<3 x s16>) = G_SSUBSAT %1, %2 @@ -598,148 +598,148 @@ body: | ; GFX6-LABEL: name: ssubsat_v4s16 ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] - ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C1]] - ; GFX6: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C2]] - ; GFX6: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] - ; GFX6: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[SMIN3]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SUB5]], [[C]](s32) - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) - ; GFX6: [[SMAX4:%[0-9]+]]:_(s32) = G_SMAX [[SHL4]], [[C3]] - ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SMAX4]], [[C1]] - ; GFX6: [[SMIN4:%[0-9]+]]:_(s32) = G_SMIN [[SHL4]], [[C3]] - ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SMIN4]], [[C2]] - ; GFX6: [[SMAX5:%[0-9]+]]:_(s32) = G_SMAX [[SUB6]], [[SHL5]] - ; GFX6: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB7]] - ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[SMIN5]] - ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SUB8]], [[C]](s32) - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) - ; GFX6: [[SMAX6:%[0-9]+]]:_(s32) = G_SMAX [[SHL6]], [[C3]] - ; GFX6: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[SMAX6]], [[C1]] - ; GFX6: [[SMIN6:%[0-9]+]]:_(s32) = G_SMIN [[SHL6]], [[C3]] - ; GFX6: [[SUB10:%[0-9]+]]:_(s32) = G_SUB [[SMIN6]], [[C2]] - ; GFX6: [[SMAX7:%[0-9]+]]:_(s32) = G_SMAX [[SUB9]], [[SHL7]] - ; GFX6: [[SMIN7:%[0-9]+]]:_(s32) = G_SMIN [[SMAX7]], [[SUB10]] - ; GFX6: [[SUB11:%[0-9]+]]:_(s32) = G_SUB [[SHL6]], [[SMIN7]] - ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SUB11]], [[C]](s32) - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] - ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C4]] - ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] - ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C1]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SHL]], [[C3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C2]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[SHL1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C1]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C2]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[SHL3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[SMIN3]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SUB5]], [[C]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SMAX4:%[0-9]+]]:_(s32) = G_SMAX [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SMAX4]], [[C1]] + ; GFX6-NEXT: [[SMIN4:%[0-9]+]]:_(s32) = G_SMIN [[SHL4]], [[C3]] + ; GFX6-NEXT: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SMIN4]], [[C2]] + ; GFX6-NEXT: [[SMAX5:%[0-9]+]]:_(s32) = G_SMAX [[SUB6]], [[SHL5]] + ; GFX6-NEXT: [[SMIN5:%[0-9]+]]:_(s32) = G_SMIN [[SMAX5]], [[SUB7]] + ; GFX6-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[SMIN5]] + ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SUB8]], [[C]](s32) + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) + ; GFX6-NEXT: [[SMAX6:%[0-9]+]]:_(s32) = G_SMAX [[SHL6]], [[C3]] + ; GFX6-NEXT: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[SMAX6]], [[C1]] + ; GFX6-NEXT: [[SMIN6:%[0-9]+]]:_(s32) = G_SMIN [[SHL6]], [[C3]] + ; GFX6-NEXT: [[SUB10:%[0-9]+]]:_(s32) = G_SUB [[SMIN6]], [[C2]] + ; GFX6-NEXT: [[SMAX7:%[0-9]+]]:_(s32) = G_SMAX [[SUB9]], [[SHL7]] + ; GFX6-NEXT: [[SMIN7:%[0-9]+]]:_(s32) = G_SMIN [[SMAX7]], [[SUB10]] + ; GFX6-NEXT: [[SUB11:%[0-9]+]]:_(s32) = G_SUB [[SHL6]], [[SMIN7]] + ; GFX6-NEXT: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SUB11]], [[C]](s32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C4]] + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX8-LABEL: name: ssubsat_v4s16 ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] - ; GFX8: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[TRUNC4]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[SMIN1]] - ; GFX8: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[SMAX2]], [[C1]] - ; GFX8: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[SMIN2]], [[C2]] - ; GFX8: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC5]] - ; GFX8: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX8: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[SMIN3]] - ; GFX8: [[SMAX4:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[C3]] - ; GFX8: [[SUB6:%[0-9]+]]:_(s16) = G_SUB [[SMAX4]], [[C1]] - ; GFX8: [[SMIN4:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[C3]] - ; GFX8: [[SUB7:%[0-9]+]]:_(s16) = G_SUB [[SMIN4]], [[C2]] - ; GFX8: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB6]], [[TRUNC6]] - ; GFX8: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB7]] - ; GFX8: [[SUB8:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[SMIN5]] - ; GFX8: [[SMAX6:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC3]], [[C3]] - ; GFX8: [[SUB9:%[0-9]+]]:_(s16) = G_SUB [[SMAX6]], [[C1]] - ; GFX8: [[SMIN6:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC3]], [[C3]] - ; GFX8: [[SUB10:%[0-9]+]]:_(s16) = G_SUB [[SMIN6]], [[C2]] - ; GFX8: [[SMAX7:%[0-9]+]]:_(s16) = G_SMAX [[SUB9]], [[TRUNC7]] - ; GFX8: [[SMIN7:%[0-9]+]]:_(s16) = G_SMIN [[SMAX7]], [[SUB10]] - ; GFX8: [[SUB11:%[0-9]+]]:_(s16) = G_SUB [[TRUNC3]], [[SMIN7]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB5]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB8]](s16) - ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SUB11]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[SMAX]], [[C1]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[SMIN]], [[C2]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s16) = G_SMAX [[SUB]], [[TRUNC4]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s16) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[SMAX2]], [[C1]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s16) = G_SUB [[SMIN2]], [[C2]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s16) = G_SMAX [[SUB3]], [[TRUNC5]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s16) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[SMIN3]] + ; GFX8-NEXT: [[SMAX4:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB6:%[0-9]+]]:_(s16) = G_SUB [[SMAX4]], [[C1]] + ; GFX8-NEXT: [[SMIN4:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[SUB7:%[0-9]+]]:_(s16) = G_SUB [[SMIN4]], [[C2]] + ; GFX8-NEXT: [[SMAX5:%[0-9]+]]:_(s16) = G_SMAX [[SUB6]], [[TRUNC6]] + ; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB7]] + ; GFX8-NEXT: [[SUB8:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[SMIN5]] + ; GFX8-NEXT: [[SMAX6:%[0-9]+]]:_(s16) = G_SMAX [[TRUNC3]], [[C3]] + ; GFX8-NEXT: [[SUB9:%[0-9]+]]:_(s16) = G_SUB [[SMAX6]], [[C1]] + ; GFX8-NEXT: [[SMIN6:%[0-9]+]]:_(s16) = G_SMIN [[TRUNC3]], [[C3]] + ; GFX8-NEXT: [[SUB10:%[0-9]+]]:_(s16) = G_SUB [[SMIN6]], [[C2]] + ; GFX8-NEXT: [[SMAX7:%[0-9]+]]:_(s16) = G_SMAX [[SUB9]], [[TRUNC7]] + ; GFX8-NEXT: [[SMIN7:%[0-9]+]]:_(s16) = G_SMIN [[SMAX7]], [[SUB10]] + ; GFX8-NEXT: [[SUB11:%[0-9]+]]:_(s16) = G_SUB [[TRUNC3]], [[SMIN7]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB5]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB8]](s16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SUB11]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: ssubsat_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[UV]], [[UV2]] - ; GFX9: [[SSUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[UV1]], [[UV3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SSUBSAT]](<2 x s16>), [[SSUBSAT1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[SSUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[SSUBSAT]](<2 x s16>), [[SSUBSAT1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_SSUBSAT %0, %1 @@ -754,37 +754,37 @@ body: | ; GFX6-LABEL: name: ssubsat_s32 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[C2]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[C2]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C1]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[COPY1]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[SMIN1]] - ; GFX6: $vgpr0 = COPY [[SUB2]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[C2]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[C2]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C1]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[COPY1]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[SMIN1]] + ; GFX6-NEXT: $vgpr0 = COPY [[SUB2]](s32) ; GFX8-LABEL: name: ssubsat_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX8: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[C2]] - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[C2]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C1]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[COPY1]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[SMIN1]] - ; GFX8: $vgpr0 = COPY [[SUB2]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[COPY]], [[C2]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[COPY]], [[C2]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C1]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[COPY1]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[SMIN1]] + ; GFX8-NEXT: $vgpr0 = COPY [[SUB2]](s32) ; GFX9-LABEL: name: ssubsat_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[SSUBSAT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[SSUBSAT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_SSUBSAT %0, %1 @@ -799,61 +799,61 @@ body: | ; GFX6-LABEL: name: ssubsat_v2s32 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[C2]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C]] - ; GFX6: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[C2]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C1]] - ; GFX6: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[UV2]] - ; GFX6: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[SMIN1]] - ; GFX6: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[C2]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C]] - ; GFX6: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[C2]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C1]] - ; GFX6: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[UV3]] - ; GFX6: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[SMIN3]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB2]](s32), [[SUB5]](s32) - ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[C2]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C]] + ; GFX6-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[C2]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C1]] + ; GFX6-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[UV2]] + ; GFX6-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[SMIN1]] + ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[C2]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C]] + ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[C2]] + ; GFX6-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C1]] + ; GFX6-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[UV3]] + ; GFX6-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX6-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[SMIN3]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB2]](s32), [[SUB5]](s32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: ssubsat_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX8: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[C2]] - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C]] - ; GFX8: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[C2]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C1]] - ; GFX8: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[UV2]] - ; GFX8: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[SMIN1]] - ; GFX8: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[C2]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C]] - ; GFX8: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[C2]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C1]] - ; GFX8: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[UV3]] - ; GFX8: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] - ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[SMIN3]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB2]](s32), [[SUB5]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX8-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[UV]], [[C2]] + ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SMAX]], [[C]] + ; GFX8-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[UV]], [[C2]] + ; GFX8-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SMIN]], [[C1]] + ; GFX8-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[UV2]] + ; GFX8-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[SUB1]] + ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[SMIN1]] + ; GFX8-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[UV1]], [[C2]] + ; GFX8-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C]] + ; GFX8-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[UV1]], [[C2]] + ; GFX8-NEXT: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SMIN2]], [[C1]] + ; GFX8-NEXT: [[SMAX3:%[0-9]+]]:_(s32) = G_SMAX [[SUB3]], [[UV3]] + ; GFX8-NEXT: [[SMIN3:%[0-9]+]]:_(s32) = G_SMIN [[SMAX3]], [[SUB4]] + ; GFX8-NEXT: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[SMIN3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB2]](s32), [[SUB5]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: ssubsat_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[UV]], [[UV2]] - ; GFX9: [[SSUBSAT1:%[0-9]+]]:_(s32) = G_SSUBSAT [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SSUBSAT]](s32), [[SSUBSAT1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[SSUBSAT1:%[0-9]+]]:_(s32) = G_SSUBSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SSUBSAT]](s32), [[SSUBSAT1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_SSUBSAT %0, %1 @@ -868,70 +868,70 @@ body: | ; GFX6-LABEL: name: ssubsat_s64 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]] - ; GFX6: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX6: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX8-LABEL: name: ssubsat_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]] - ; GFX8: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX8: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX9-LABEL: name: ssubsat_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]] - ; GFX9: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[COPY]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY1]](s64), [[C]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_SSUBSAT %0, %1 @@ -946,124 +946,124 @@ body: | ; GFX6-LABEL: name: ssubsat_v2s64 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] - ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] - ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV2]](s64), [[C]] - ; GFX6: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO1]] - ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV12]], [[UV14]] - ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV13]], [[UV15]], [[USUBO3]] - ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV3]](s64), [[C]] - ; GFX6: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] - ; GFX6: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) - ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX6: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] - ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO3]] - ; GFX6: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX6: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV2]](s64), [[C]] + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO1]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX6-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX6-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV12]], [[UV14]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV13]], [[UV15]], [[USUBO3]] + ; GFX6-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV3]](s64), [[C]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] + ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) + ; GFX6-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX6-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO3]] + ; GFX6-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX8-LABEL: name: ssubsat_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] - ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV2]](s64), [[C]] - ; GFX8: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO1]] - ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV12]], [[UV14]] - ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV13]], [[UV15]], [[USUBO3]] - ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV3]](s64), [[C]] - ; GFX8: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] - ; GFX8: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) - ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX8: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] - ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO3]] - ; GFX8: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX8: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV2]](s64), [[C]] + ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO1]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX8-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV12]], [[UV14]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV13]], [[UV15]], [[USUBO3]] + ; GFX8-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV3]](s64), [[C]] + ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] + ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) + ; GFX8-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX8-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO3]] + ; GFX8-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: ssubsat_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] - ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV2]](s64), [[C]] - ; GFX9: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 - ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) - ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO1]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] - ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV12]], [[UV14]] - ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV13]], [[UV15]], [[USUBO3]] - ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV3]](s64), [[C]] - ; GFX9: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] - ; GFX9: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) - ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) - ; GFX9: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) - ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] - ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO3]] - ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV]](s64), [[UV]] + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV2]](s64), [[C]] + ; GFX9-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[MV]], [[C1]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -9223372036854775808 + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO1]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[XOR]](s1), [[MV1]], [[MV]] + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV12]], [[UV14]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV13]], [[UV15]], [[USUBO3]] + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[MV2]](s64), [[UV1]] + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[UV3]](s64), [[C]] + ; GFX9-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[ICMP3]], [[ICMP2]] + ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[MV2]], [[C1]](s32) + ; GFX9-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV16]], [[UV18]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV17]], [[UV19]], [[UADDO3]] + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[XOR1]](s1), [[MV3]], [[MV2]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_SSUBSAT %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir index 7a2012cf9d27..88cd74b29361 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-trunc.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -o - %s | FileCheck %s --- name: test_trunc_s64_to_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uadde.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uadde.mir index 3401b2857fb2..3eb5179c89c5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uadde.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uadde.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: test_uadde_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir index 9aafa0da26f6..b057afa4f0e5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddo.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: test_uaddo_s32 @@ -9,11 +9,11 @@ body: | ; CHECK-LABEL: name: test_uaddo_s32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[COPY]], [[COPY1]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; CHECK: $vgpr0 = COPY [[UADDO]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[UADDO]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32), %3:_(s1) = G_UADDO %0, %1 @@ -30,17 +30,17 @@ body: | ; CHECK-LABEL: name: test_uaddo_s7 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK: $vgpr0 = COPY [[AND3]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[AND3]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -61,17 +61,17 @@ body: | ; CHECK-LABEL: name: test_uaddo_s16 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK: $vgpr0 = COPY [[AND3]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ADD]](s32), [[AND2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[AND3]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -92,16 +92,16 @@ body: | ; CHECK-LABEL: name: test_uaddo_s64 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; CHECK: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[COPY1]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; CHECK: $vgpr2 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[COPY1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64), %3:_(s1) = G_UADDO %0, %1 @@ -118,37 +118,37 @@ body: | ; CHECK-LABEL: name: test_uaddo_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC2]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC3]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD]](s16), [[TRUNC4]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD1]](s16), [[TRUNC5]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) - ; CHECK: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC3]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD]](s16), [[TRUNC4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD1]](s16), [[TRUNC5]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>), %3:_(<2 x s1>) = G_UADDO %0, %1 @@ -164,67 +164,67 @@ body: | liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-LABEL: name: test_uaddo_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC3]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC4]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC5]] - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD]](s16), [[TRUNC6]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD1]](s16), [[TRUNC7]] - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD2]](s16), [[TRUNC8]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; CHECK: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; CHECK: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND3]](s32), [[AND4]](s32), [[AND5]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC3]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC4]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD]](s16), [[TRUNC6]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD1]](s16), [[TRUNC7]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD2]](s16), [[TRUNC8]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND3]](s32), [[AND4]](s32), [[AND5]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -245,66 +245,66 @@ body: | ; CHECK-LABEL: name: test_uaddo_v4s16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr1_vgpr2 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC4]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC5]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC6]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC7]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD]](s16), [[TRUNC8]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD1]](s16), [[TRUNC9]] - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD2]](s16), [[TRUNC10]] - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD3]](s16), [[TRUNC11]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C1]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; CHECK: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC4]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC5]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC6]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC3]], [[TRUNC7]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ADD3]](s16) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD]](s16), [[TRUNC8]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD1]](s16), [[TRUNC9]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD2]](s16), [[TRUNC10]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD3]](s16), [[TRUNC11]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr1_vgpr2 %2:_(<4 x s16>), %3:_(<4 x s1>) = G_UADDO %0, %1 @@ -321,23 +321,23 @@ body: | ; CHECK-LABEL: name: test_uaddo_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV2]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD]](s32), [[UV4]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD1]](s32), [[UV5]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CHECK: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV2]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD]](s32), [[UV4]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[ADD1]](s32), [[UV5]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>), %3:_(<2 x s1>) = G_UADDO %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir index c12643df1c6b..411bbc68b47c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: uaddsat_s7 @@ -12,40 +12,40 @@ body: | ; GFX6-LABEL: name: uaddsat_s7 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[LSHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) ; GFX8-LABEL: name: uaddsat_s7 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] - ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: uaddsat_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -63,40 +63,40 @@ body: | ; GFX6-LABEL: name: uaddsat_s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[LSHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) ; GFX8-LABEL: name: uaddsat_s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] - ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: uaddsat_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -114,88 +114,88 @@ body: | ; GFX6-LABEL: name: uaddsat_v2s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C2]] - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C1]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) - ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C2]] - ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C1]](s32) - ; GFX6: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C2]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C1]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C2]] + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C1]](s32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: uaddsat_v2s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] - ; GFX8: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C1]](s16) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) - ; GFX8: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) - ; GFX8: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL2]], [[SHL3]] - ; GFX8: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT1]], [[C1]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[LSHR2]], [[C2]] - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C2]] - ; GFX8: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C1]](s16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) + ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL2]], [[SHL3]] + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT1]], [[C1]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[LSHR2]], [[C2]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C2]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: uaddsat_v2s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[LSHR]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[LSHR1]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY2]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[SHL]], [[SHL1]] - ; GFX9: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UADDSAT]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR2]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[UADDSAT]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR2]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -216,32 +216,32 @@ body: | ; GFX6-LABEL: name: uaddsat_s16 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[LSHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) ; GFX8-LABEL: name: uaddsat_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: uaddsat_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -259,57 +259,57 @@ body: | ; GFX6-LABEL: name: uaddsat_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C1]] - ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C2]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C2]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] - ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C1]] + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C2]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C2]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: uaddsat_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC2]] - ; GFX8: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC1]], [[TRUNC3]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT1]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC2]] + ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC1]], [[TRUNC3]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: uaddsat_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[UADDSAT]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UADDSAT]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_UADDSAT %0, %1 @@ -324,134 +324,134 @@ body: | ; GFX6-LABEL: name: uaddsat_v3s16 ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) - ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C1]] - ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] - ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C]](s32) - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SHL4]], [[C1]] - ; GFX6: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[XOR2]], [[SHL5]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[UMIN2]] - ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[ADD2]], [[C]](s32) - ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C2]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C2]] - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C1]] + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SHL4]], [[C1]] + ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[XOR2]], [[SHL5]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[UMIN2]] + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[ADD2]], [[C]](s32) + ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C2]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C2]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: uaddsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC3]] - ; GFX8: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC1]], [[TRUNC4]] - ; GFX8: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC2]], [[TRUNC5]] - ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT1]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT2]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC3]] + ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC1]], [[TRUNC4]] + ; GFX8-NEXT: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC2]], [[TRUNC5]] + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT2]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: uaddsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) - ; GFX9: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) - ; GFX9: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] - ; GFX9: [[UADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT]](<2 x s16>) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT1]](<2 x s16>) - ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[UADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT1]](<2 x s16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 %3:_(<3 x s16>) = G_UADDSAT %1, %2 @@ -468,103 +468,103 @@ body: | ; GFX6-LABEL: name: uaddsat_v4s16 ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] - ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C1]] - ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] - ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C]](s32) - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) - ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SHL4]], [[C1]] - ; GFX6: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[XOR2]], [[SHL5]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[UMIN2]] - ; GFX6: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[ADD2]], [[C]](s32) - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) - ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SHL6]], [[C1]] - ; GFX6: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[XOR3]], [[SHL7]] - ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SHL6]], [[UMIN3]] - ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[ADD3]], [[C]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C2]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] - ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C2]] - ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] - ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[SHL]], [[C1]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[SHL1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C1]] + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[ADD1]], [[C]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SHL4]], [[C1]] + ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[XOR2]], [[SHL5]] + ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[UMIN2]] + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[ADD2]], [[C]](s32) + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) + ; GFX6-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SHL6]], [[C1]] + ; GFX6-NEXT: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[XOR3]], [[SHL7]] + ; GFX6-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SHL6]], [[UMIN3]] + ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[ADD3]], [[C]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C2]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C2]] + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX8-LABEL: name: uaddsat_v4s16 ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC4]] - ; GFX8: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC1]], [[TRUNC5]] - ; GFX8: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC2]], [[TRUNC6]] - ; GFX8: [[UADDSAT3:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC3]], [[TRUNC7]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT1]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT2]](s16) - ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT3]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC4]] + ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC1]], [[TRUNC5]] + ; GFX8-NEXT: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC2]], [[TRUNC6]] + ; GFX8-NEXT: [[UADDSAT3:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC3]], [[TRUNC7]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT2]](s16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT3]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: uaddsat_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[UV]], [[UV2]] - ; GFX9: [[UADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[UV1]], [[UV3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UADDSAT]](<2 x s16>), [[UADDSAT1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UADDSAT]](<2 x s16>), [[UADDSAT1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_UADDSAT %0, %1 @@ -579,22 +579,22 @@ body: | ; GFX6-LABEL: name: uaddsat_s32 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C]] - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[COPY1]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[UMIN]] - ; GFX6: $vgpr0 = COPY [[ADD]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[COPY1]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[UMIN]] + ; GFX6-NEXT: $vgpr0 = COPY [[ADD]](s32) ; GFX8-LABEL: name: uaddsat_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] - ; GFX8: $vgpr0 = COPY [[UADDSAT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] + ; GFX8-NEXT: $vgpr0 = COPY [[UADDSAT]](s32) ; GFX9-LABEL: name: uaddsat_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[UADDSAT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UADDSAT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_UADDSAT %0, %1 @@ -609,36 +609,36 @@ body: | ; GFX6-LABEL: name: uaddsat_v2s32 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[C]] - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[UV2]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UMIN]] - ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV1]], [[C]] - ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[UV3]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UMIN1]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) - ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[C]] + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[XOR]], [[UV2]] + ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UMIN]] + ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV1]], [[C]] + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[UV3]] + ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UMIN1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: uaddsat_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[UV]], [[UV2]] - ; GFX8: [[UADDSAT1:%[0-9]+]]:_(s32) = G_UADDSAT [[UV1]], [[UV3]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UADDSAT]](s32), [[UADDSAT1]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[UV]], [[UV2]] + ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s32) = G_UADDSAT [[UV1]], [[UV3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UADDSAT]](s32), [[UADDSAT1]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: uaddsat_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[UV]], [[UV2]] - ; GFX9: [[UADDSAT1:%[0-9]+]]:_(s32) = G_UADDSAT [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UADDSAT]](s32), [[UADDSAT1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDSAT1:%[0-9]+]]:_(s32) = G_UADDSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UADDSAT]](s32), [[UADDSAT1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_UADDSAT %0, %1 @@ -653,40 +653,40 @@ body: | ; GFX6-LABEL: name: uaddsat_s64 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[COPY1]] - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX6: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[COPY1]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX8-LABEL: name: uaddsat_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[COPY1]] - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX8: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[COPY1]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX9-LABEL: name: uaddsat_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] - ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[COPY1]] - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[COPY1]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_UADDSAT %0, %1 @@ -701,70 +701,70 @@ body: | ; GFX6-LABEL: name: uaddsat_v2s64 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[UV2]] - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV1]](s64), [[UV3]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX6-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[UV2]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX6-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX6-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV1]](s64), [[UV3]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX8-LABEL: name: uaddsat_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[UV2]] - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV1]](s64), [[UV3]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[UV2]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX8-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV1]](s64), [[UV3]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: uaddsat_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] - ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[UV2]] - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] - ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV1]](s64), [[UV3]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV]](s64), [[UV2]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX9-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[MV1]](s64), [[UV3]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_UADDSAT %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ubfx.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ubfx.mir index 25136c1db223..f63455446f24 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ubfx.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ubfx.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck --check-prefix=GCN %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck --check-prefix=GCN %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck --check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck --check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck --check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck --check-prefix=GCN %s ... --- name: test_ubfx_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir index e350fca51c26..d3d1c0b65d98 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir @@ -12,19 +12,19 @@ body: | ; SI-LABEL: name: test_umax_s32 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[COPY]], [[COPY1]] - ; SI: $vgpr0 = COPY [[UMAX]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[UMAX]](s32) ; VI-LABEL: name: test_umax_s32 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[COPY]], [[COPY1]] - ; VI: $vgpr0 = COPY [[UMAX]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[UMAX]](s32) ; GFX9-LABEL: name: test_umax_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[UMAX]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UMAX]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_UMAX %0, %1 @@ -39,22 +39,22 @@ body: | ; SI-LABEL: name: test_umax_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] - ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; SI: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; VI-LABEL: name: test_umax_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] - ; VI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; VI: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX9-LABEL: name: test_umax_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[COPY]](s64), [[COPY1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_UMAX %0, %1 @@ -69,28 +69,28 @@ body: | ; SI-LABEL: name: test_umax_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI: $vgpr0 = COPY [[UMAX]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] + ; SI-NEXT: $vgpr0 = COPY [[UMAX]](s32) ; VI-LABEL: name: test_umax_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_umax_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -108,34 +108,34 @@ body: | ; SI-LABEL: name: test_umax_s8 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI: $vgpr0 = COPY [[UMAX]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] + ; SI-NEXT: $vgpr0 = COPY [[UMAX]](s32) ; VI-LABEL: name: test_umax_s8 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[AND]], [[AND1]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[AND]], [[AND1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_umax_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[AND]], [[AND1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[AND]], [[AND1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -153,28 +153,28 @@ body: | ; SI-LABEL: name: test_umax_s17 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI: $vgpr0 = COPY [[UMAX]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] + ; SI-NEXT: $vgpr0 = COPY [[UMAX]](s32) ; VI-LABEL: name: test_umax_s17 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; VI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; VI: $vgpr0 = COPY [[UMAX]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] + ; VI-NEXT: $vgpr0 = COPY [[UMAX]](s32) ; GFX9-LABEL: name: test_umax_s17 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; GFX9: $vgpr0 = COPY [[UMAX]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UMAX]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s17) = G_TRUNC %0 @@ -192,31 +192,31 @@ body: | ; SI-LABEL: name: test_umax_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV2]] - ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV3]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV2]] + ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_umax_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV2]] - ; VI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV2]] + ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_umax_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV2]] - ; GFX9: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV2]] + ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_UMAX %0, %1 @@ -231,34 +231,34 @@ body: | ; SI-LABEL: name: test_umax_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV3]] - ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV4]] - ; SI: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[UV2]], [[UV5]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV3]] + ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV4]] + ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[UV2]], [[UV5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_umax_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV3]] - ; VI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV4]] - ; VI: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[UV2]], [[UV5]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV3]] + ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV4]] + ; VI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[UV2]], [[UV5]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_umax_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV3]] - ; GFX9: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV4]] - ; GFX9: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[UV2]], [[UV5]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[UV]], [[UV3]] + ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[UV1]], [[UV4]] + ; GFX9-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[UV2]], [[UV5]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_UMAX %0, %1 @@ -273,50 +273,50 @@ body: | ; SI-LABEL: name: test_umax_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UMAX]], [[C1]] - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UMAX1]], [[C1]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UMAX]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UMAX1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_umax_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC2]] - ; VI: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC3]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC2]] + ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_umax_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[UMAX]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UMAX]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_UMAX %0, %1 @@ -331,74 +331,74 @@ body: | ; SI-LABEL: name: test_umax_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND4]], [[AND5]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) - ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND4]], [[AND5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMAX]](s32), [[UMAX1]](s32), [[UMAX2]](s32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_umax_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC3]] - ; VI: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC4]] - ; VI: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC2]], [[TRUNC5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC3]] + ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC4]] + ; VI-NEXT: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC2]], [[TRUNC5]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX]](s16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX1]](s16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMAX2]](s16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_umax_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV8]], [[UV10]] - ; GFX9: [[UMAX1:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV9]], [[UV11]] - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV8]], [[UV10]] + ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV9]], [[UV11]] + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_UMAX %0, %1 @@ -414,90 +414,90 @@ body: | ; SI-LABEL: name: test_umax_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND4]], [[AND5]] - ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI: [[UMAX3:%[0-9]+]]:_(s32) = G_UMAX [[AND6]], [[AND7]] - ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UMAX]], [[C1]] - ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UMAX1]], [[C1]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] - ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UMAX2]], [[C1]] - ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UMAX3]], [[C1]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL1]] - ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[UMAX:%[0-9]+]]:_(s32) = G_UMAX [[AND]], [[AND1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[UMAX1:%[0-9]+]]:_(s32) = G_UMAX [[AND2]], [[AND3]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[UMAX2:%[0-9]+]]:_(s32) = G_UMAX [[AND4]], [[AND5]] + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; SI-NEXT: [[UMAX3:%[0-9]+]]:_(s32) = G_UMAX [[AND6]], [[AND7]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UMAX]], [[C1]] + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UMAX1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UMAX2]], [[C1]] + ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UMAX3]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL1]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_umax_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC4]] - ; VI: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC5]] - ; VI: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC2]], [[TRUNC6]] - ; VI: [[UMAX3:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC3]], [[TRUNC7]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX2]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX3]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI-NEXT: [[UMAX:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC]], [[TRUNC4]] + ; VI-NEXT: [[UMAX1:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC1]], [[TRUNC5]] + ; VI-NEXT: [[UMAX2:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC2]], [[TRUNC6]] + ; VI-NEXT: [[UMAX3:%[0-9]+]]:_(s16) = G_UMAX [[TRUNC3]], [[TRUNC7]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX2]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UMAX3]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_umax_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV]], [[UV2]] - ; GFX9: [[UMAX1:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV1]], [[UV3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UMAX]](<2 x s16>), [[UMAX1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV]], [[UV2]] + ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UMAX]](<2 x s16>), [[UMAX1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_UMAX %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir index c0a2192a7945..ec873d966ffe 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir @@ -12,19 +12,19 @@ body: | ; SI-LABEL: name: test_umin_s32 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] - ; SI: $vgpr0 = COPY [[UMIN]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0 = COPY [[UMIN]](s32) ; VI-LABEL: name: test_umin_s32 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] - ; VI: $vgpr0 = COPY [[UMIN]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0 = COPY [[UMIN]](s32) ; GFX9-LABEL: name: test_umin_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[UMIN]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UMIN]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_UMIN %0, %1 @@ -39,22 +39,22 @@ body: | ; SI-LABEL: name: test_umin_s64 ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; SI: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] + ; SI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; VI-LABEL: name: test_umin_s64 ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; VI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; VI: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] + ; VI-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX9-LABEL: name: test_umin_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] - ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_UMIN %0, %1 @@ -69,28 +69,28 @@ body: | ; SI-LABEL: name: test_umin_s16 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI: $vgpr0 = COPY [[UMIN]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] + ; SI-NEXT: $vgpr0 = COPY [[UMIN]](s32) ; VI-LABEL: name: test_umin_s16 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_umin_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -108,34 +108,34 @@ body: | ; SI-LABEL: name: test_umin_s8 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI: $vgpr0 = COPY [[UMIN]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] + ; SI-NEXT: $vgpr0 = COPY [[UMIN]](s32) ; VI-LABEL: name: test_umin_s8 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; VI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; VI: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[AND]], [[AND1]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) - ; VI: $vgpr0 = COPY [[ANYEXT]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; VI-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[AND]], [[AND1]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) + ; VI-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_umin_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[AND]], [[AND1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[AND]], [[AND1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -153,28 +153,28 @@ body: | ; SI-LABEL: name: test_umin_s17 ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI: $vgpr0 = COPY [[UMIN]](s32) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] + ; SI-NEXT: $vgpr0 = COPY [[UMIN]](s32) ; VI-LABEL: name: test_umin_s17 ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; VI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; VI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; VI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; VI: $vgpr0 = COPY [[UMIN]](s32) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] + ; VI-NEXT: $vgpr0 = COPY [[UMIN]](s32) ; GFX9-LABEL: name: test_umin_s17 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; GFX9: $vgpr0 = COPY [[UMIN]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 131071 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UMIN]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s17) = G_TRUNC %0 @@ -192,31 +192,31 @@ body: | ; SI-LABEL: name: test_umin_v2s32 ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV2]] - ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32) - ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV2]] + ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; VI-LABEL: name: test_umin_v2s32 ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; VI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV2]] - ; VI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32) - ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV2]] + ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_umin_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV2]] - ; GFX9: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV2]] + ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_UMIN %0, %1 @@ -231,34 +231,34 @@ body: | ; SI-LABEL: name: test_umin_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; SI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; SI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV3]] - ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV4]] - ; SI: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[UV2]], [[UV5]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) - ; SI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV3]] + ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV4]] + ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[UV2]], [[UV5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_umin_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; VI: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; VI: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV3]] - ; VI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV4]] - ; VI: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[UV2]], [[UV5]] - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) - ; VI: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV3]] + ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV4]] + ; VI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[UV2]], [[UV5]] + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_umin_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV3]] - ; GFX9: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV4]] - ; GFX9: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[UV2]], [[UV5]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV3]] + ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV4]] + ; GFX9-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[UV2]], [[UV5]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_UMIN %0, %1 @@ -273,50 +273,50 @@ body: | ; SI-LABEL: name: test_umin_v2s16 ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UMIN]], [[C1]] - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UMIN1]], [[C1]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] - ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UMIN]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UMIN1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; VI-LABEL: name: test_umin_v2s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC2]] - ; VI: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC3]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC2]] + ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC3]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_umin_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[UMIN]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UMIN]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_UMIN %0, %1 @@ -331,74 +331,74 @@ body: | ; SI-LABEL: name: test_umin_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND4]], [[AND5]] - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) - ; SI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND4]], [[AND5]] + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UMIN]](s32), [[UMIN1]](s32), [[UMIN2]](s32) + ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_umin_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC3]] - ; VI: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC4]] - ; VI: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC2]], [[TRUNC5]] - ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN1]](s16) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN2]](s16) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) - ; VI: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC3]] + ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC4]] + ; VI-NEXT: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC2]], [[TRUNC5]] + ; VI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN]](s16) + ; VI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN1]](s16) + ; VI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UMIN2]](s16) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_umin_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV8]], [[UV10]] - ; GFX9: [[UMIN1:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV9]], [[UV11]] - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; GFX9: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) + ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) + ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 + ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV8]], [[UV10]] + ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV9]], [[UV11]] + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) + ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF %2:_(<3 x s16>) = G_UMIN %0, %1 @@ -414,90 +414,90 @@ body: | ; SI-LABEL: name: test_umin_v4s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; SI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; SI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; SI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; SI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; SI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; SI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; SI: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] - ; SI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; SI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] - ; SI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; SI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND4]], [[AND5]] - ; SI: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; SI: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[AND6]], [[AND7]] - ; SI: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UMIN]], [[C1]] - ; SI: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UMIN1]], [[C1]] - ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] - ; SI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UMIN2]], [[C1]] - ; SI: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UMIN3]], [[C1]] - ; SI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) - ; SI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL1]] - ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AND]], [[AND1]] + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AND2]], [[AND3]] + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[AND4]], [[AND5]] + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; SI-NEXT: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[AND6]], [[AND7]] + ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UMIN]], [[C1]] + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UMIN1]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UMIN2]], [[C1]] + ; SI-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UMIN3]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL1]] + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_umin_v4s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; VI: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; VI: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; VI: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; VI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; VI: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; VI: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; VI: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; VI: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; VI: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC4]] - ; VI: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC5]] - ; VI: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC2]], [[TRUNC6]] - ; VI: [[UMIN3:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC3]], [[TRUNC7]] - ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN]](s16) - ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN1]](s16) - ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN2]](s16) - ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN3]](s16) - ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; VI-NEXT: [[UMIN:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC]], [[TRUNC4]] + ; VI-NEXT: [[UMIN1:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC1]], [[TRUNC5]] + ; VI-NEXT: [[UMIN2:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC2]], [[TRUNC6]] + ; VI-NEXT: [[UMIN3:%[0-9]+]]:_(s16) = G_UMIN [[TRUNC3]], [[TRUNC7]] + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN1]](s16) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN2]](s16) + ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UMIN3]](s16) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_umin_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV]], [[UV2]] - ; GFX9: [[UMIN1:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV1]], [[UV3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UMIN]](<2 x s16>), [[UMIN1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV]], [[UV2]] + ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[UMIN]](<2 x s16>), [[UMIN1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_UMIN %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir index 3c66fb4e4d60..830c6459e428 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir @@ -10,14 +10,14 @@ body: | ; GFX8-LABEL: name: test_umulh_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GFX8: $vgpr0 = COPY [[UMULH]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX8-NEXT: $vgpr0 = COPY [[UMULH]](s32) ; GFX9-LABEL: name: test_umulh_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[UMULH]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[UMULH]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_UMULH %0, %1 @@ -32,22 +32,22 @@ body: | ; GFX8-LABEL: name: test_umulh_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMULH]](s32), [[UMULH1]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMULH]](s32), [[UMULH1]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_umulh_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMULH]](s32), [[UMULH1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UMULH]](s32), [[UMULH1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_UMULH %0, %1 @@ -62,60 +62,60 @@ body: | ; GFX8-LABEL: name: test_umulh_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) ; GFX9-LABEL: name: test_umulh_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] - ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] - ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV2]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_UMULH %0, %1 @@ -130,114 +130,114 @@ body: | ; GFX8-LABEL: name: test_umulh_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] - ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] - ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] - ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] - ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] - ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]] - ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV11]] - ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV10]] - ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV11]] - ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]] - ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]] - ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV11]] - ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] + ; GFX8-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] + ; GFX8-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX8-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] + ; GFX8-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] + ; GFX8-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] + ; GFX8-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX8-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX8-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] + ; GFX8-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] + ; GFX8-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] + ; GFX8-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] + ; GFX8-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX8-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]] + ; GFX8-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX8-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV11]] + ; GFX8-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV10]] + ; GFX8-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV11]] + ; GFX8-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX8-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX8-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]] + ; GFX8-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX8-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]] + ; GFX8-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX8-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX8-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV11]] + ; GFX8-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_umulh_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] - ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] - ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) - ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] - ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] - ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] - ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] - ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) - ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] - ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) - ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] - ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] - ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] - ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) - ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] - ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] - ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] - ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] - ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) - ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]] - ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) - ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] - ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV11]] - ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV10]] - ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV11]] - ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] - ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) - ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]] - ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) - ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] - ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]] - ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) - ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] - ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV11]] - ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV6]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV4]], [[UV7]] + ; GFX9-NEXT: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV6]] + ; GFX9-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL]], [[MUL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1) + ; GFX9-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]] + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9-NEXT: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV5]], [[UV7]] + ; GFX9-NEXT: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV6]] + ; GFX9-NEXT: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV4]], [[UV7]] + ; GFX9-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL2]], [[UMULH1]] + ; GFX9-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX9-NEXT: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH2]] + ; GFX9-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9-NEXT: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[UADDO6]], [[ADD]] + ; GFX9-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX9-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[ZEXT4]] + ; GFX9-NEXT: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV5]], [[UV7]] + ; GFX9-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[ADD2]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO8]](s32), [[ADD3]](s32) + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV10]] + ; GFX9-NEXT: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV8]], [[UV11]] + ; GFX9-NEXT: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV10]] + ; GFX9-NEXT: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX9-NEXT: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[UMULH4]] + ; GFX9-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX9-NEXT: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9-NEXT: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV9]], [[UV11]] + ; GFX9-NEXT: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV10]] + ; GFX9-NEXT: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UV8]], [[UV11]] + ; GFX9-NEXT: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH5]] + ; GFX9-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO15]](s1) + ; GFX9-NEXT: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UMULH6]] + ; GFX9-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX9-NEXT: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9-NEXT: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[ADD4]] + ; GFX9-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX9-NEXT: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[ZEXT9]] + ; GFX9-NEXT: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UV9]], [[UV11]] + ; GFX9-NEXT: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH7]], [[ADD6]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO18]](s32), [[ADD7]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_UMULH %0, %1 @@ -252,26 +252,26 @@ body: | ; GFX8-LABEL: name: test_umulh_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] - ; GFX8: $vgpr0 = COPY [[AND2]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] + ; GFX8-NEXT: $vgpr0 = COPY [[AND2]](s32) ; GFX9-LABEL: name: test_umulh_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] - ; GFX9: $vgpr0 = COPY [[AND2]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] + ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -289,34 +289,34 @@ body: | ; GFX8-LABEL: name: test_umulh_s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX8: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; GFX8: $vgpr0 = COPY [[AND2]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; GFX8-NEXT: $vgpr0 = COPY [[AND2]](s32) ; GFX9-LABEL: name: test_umulh_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; GFX9: $vgpr0 = COPY [[AND2]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; GFX9-NEXT: $vgpr0 = COPY [[AND2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -333,52 +333,52 @@ body: | liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX8-LABEL: name: test_umulh_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND2]], [[AND3]] - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C1]](s32) - ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] - ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C]] - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] - ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX8: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] - ; GFX8: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND2]], [[AND3]] + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C1]](s32) + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C]] + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C]] + ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND6]](s32), [[AND7]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_umulh_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND2]], [[AND3]] - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[BUILD_VECTOR_TRUNC]](<2 x s16>) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]] - ; GFX9: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[AND]], [[AND1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[MUL]], [[C1]](s32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[AND2]], [[AND3]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[MUL1]], [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND4]](s32), [[AND5]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s16>) = G_TRUNC %0 @@ -395,117 +395,117 @@ body: | liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX8-LABEL: name: test_umulh_v3s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX8: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX8: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[AND2]], [[AND3]] - ; GFX8: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[MUL1]], [[C1]](s16) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX8: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; GFX8: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] - ; GFX8: [[MUL2:%[0-9]+]]:_(s16) = G_MUL [[AND4]], [[AND5]] - ; GFX8: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[MUL2]], [[C1]](s16) - ; GFX8: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX8: [[AND6:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C]] - ; GFX8: [[AND7:%[0-9]+]]:_(s16) = G_AND [[LSHR1]], [[C]] - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]] - ; GFX8: [[AND8:%[0-9]+]]:_(s16) = G_AND [[LSHR2]], [[C]] - ; GFX8: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX8: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) - ; GFX8: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL1]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX8: $vgpr0 = COPY [[OR2]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[AND2]], [[AND3]] + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[MUL1]], [[C1]](s16) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s16) = G_MUL [[AND4]], [[AND5]] + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[MUL2]], [[C1]](s16) + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C]] + ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[LSHR1]], [[C]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]] + ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[LSHR2]], [[C]] + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](s32) ; GFX9-LABEL: name: test_umulh_v3s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[AND2]](s16) - ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[AND3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]] - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[C1]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; GFX9: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[AND4]](s16) - ; GFX9: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[AND5]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT4]](s32), [[ANYEXT5]](s32) - ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] - ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] - ; GFX9: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[AND6]](s16) - ; GFX9: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[AND7]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT6]](s32), [[ANYEXT7]](s32) - ; GFX9: [[MUL1:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC4]] - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL1]], [[BUILD_VECTOR_TRUNC5]](<2 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR1]](<2 x s16>) - ; GFX9: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] - ; GFX9: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]] - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C3]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL]] - ; GFX9: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]] - ; GFX9: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[DEF1]](s32) - ; GFX9: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C3]](s16) - ; GFX9: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL1]] - ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) - ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) - ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX9: $vgpr0 = COPY [[OR2]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[AND2]](s16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[AND3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[C1]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[AND4]](s16) + ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[AND5]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT4]](s32), [[ANYEXT5]](s32) + ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] + ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] + ; GFX9-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[AND6]](s16) + ; GFX9-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[AND7]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT6]](s32), [[ANYEXT7]](s32) + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC4]] + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL1]], [[BUILD_VECTOR_TRUNC5]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] + ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]] + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C3]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL]] + ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]] + ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[DEF1]](s32) + ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C3]](s16) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL1]] + ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) + ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -534,66 +534,66 @@ body: | liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX8-LABEL: name: test_umulh_v2s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX8: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX8: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[AND2]], [[AND3]] - ; GFX8: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[MUL1]], [[C1]](s16) - ; GFX8: [[AND4:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C]] - ; GFX8: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR1]], [[C]] - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C1]](s16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[AND2]], [[AND3]] + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[MUL1]], [[C1]](s16) + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[LSHR]], [[C]] + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[LSHR1]], [[C]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_umulh_v2s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[AND2]](s16) - ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[AND3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[C2]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C3]](s32) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL]] - ; GFX9: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT4]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[AND2]](s16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[AND3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[C2]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C3]](s32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL]] + ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -618,125 +618,125 @@ body: | liveins: $vgpr0, $vgpr1 ; GFX8-LABEL: name: test_umulh_v4s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) - ; GFX8: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX8: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] - ; GFX8: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C4]](s16) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[AND2]], [[AND3]] - ; GFX8: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[MUL1]], [[C4]](s16) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX8: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX8: [[MUL2:%[0-9]+]]:_(s16) = G_MUL [[AND4]], [[AND5]] - ; GFX8: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[MUL2]], [[C4]](s16) - ; GFX8: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] - ; GFX8: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX8: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX8: [[MUL3:%[0-9]+]]:_(s16) = G_MUL [[AND6]], [[AND7]] - ; GFX8: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[MUL3]], [[C4]](s16) - ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR6]](s16) - ; GFX8: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C5]] - ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) - ; GFX8: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C5]] - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] - ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) - ; GFX8: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C5]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX8: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR9]](s16) - ; GFX8: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C5]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX8: $vgpr0 = COPY [[OR2]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) + ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX8-NEXT: [[MUL:%[0-9]+]]:_(s16) = G_MUL [[AND]], [[AND1]] + ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[MUL]], [[C4]](s16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] + ; GFX8-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[AND2]], [[AND3]] + ; GFX8-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[MUL1]], [[C4]](s16) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; GFX8-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] + ; GFX8-NEXT: [[MUL2:%[0-9]+]]:_(s16) = G_MUL [[AND4]], [[AND5]] + ; GFX8-NEXT: [[LSHR8:%[0-9]+]]:_(s16) = G_LSHR [[MUL2]], [[C4]](s16) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; GFX8-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] + ; GFX8-NEXT: [[MUL3:%[0-9]+]]:_(s16) = G_MUL [[AND6]], [[AND7]] + ; GFX8-NEXT: [[LSHR9:%[0-9]+]]:_(s16) = G_LSHR [[MUL3]], [[C4]](s16) + ; GFX8-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR6]](s16) + ; GFX8-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C5]] + ; GFX8-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR7]](s16) + ; GFX8-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C5]] + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] + ; GFX8-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR8]](s16) + ; GFX8-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C5]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GFX8-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR9]](s16) + ; GFX8-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C5]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; GFX8-NEXT: $vgpr0 = COPY [[OR2]](s32) ; GFX9-LABEL: name: test_umulh_v4s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) - ; GFX9: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] - ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[AND2]](s16) - ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[AND3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]] - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) - ; GFX9: [[LSHR6:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] - ; GFX9: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] - ; GFX9: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[AND4]](s16) - ; GFX9: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[AND5]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT4]](s32), [[ANYEXT5]](s32) - ; GFX9: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] - ; GFX9: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] - ; GFX9: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[AND6]](s16) - ; GFX9: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[AND7]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT6]](s32), [[ANYEXT7]](s32) - ; GFX9: [[MUL1:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC4]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) - ; GFX9: [[LSHR7:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL1]], [[BUILD_VECTOR_TRUNC5]](<2 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR6]](<2 x s16>) - ; GFX9: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR7]](<2 x s16>) - ; GFX9: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX9: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] - ; GFX9: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]] - ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) - ; GFX9: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] - ; GFX9: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C4]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32) - ; GFX9: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; GFX9: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C4]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) - ; GFX9: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; GFX9: $vgpr0 = COPY [[OR2]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C1]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C2]](s32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C1]](s32) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C2]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C3]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C3]] + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[AND2]](s16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[AND3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]] + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C3]] + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C3]] + ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[AND4]](s16) + ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[AND5]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT4]](s32), [[ANYEXT5]](s32) + ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C3]] + ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C3]] + ; GFX9-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[AND6]](s16) + ; GFX9-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[AND7]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT6]](s32), [[ANYEXT7]](s32) + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC4]] + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL1]], [[BUILD_VECTOR_TRUNC5]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR6]](<2 x s16>) + ; GFX9-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR7]](<2 x s16>) + ; GFX9-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; GFX9-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C4]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL]] + ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C4]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C1]](s32) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C4]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) + ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir index 3ed9e0cd6afb..7c93db9ba2af 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: ushlsat_s7 @@ -12,52 +12,52 @@ body: | ; GFX6-LABEL: name: ushlsat_s7 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) - ; GFX6: $vgpr0 = COPY [[LSHR1]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR1]](s32) ; GFX8-LABEL: name: ushlsat_s7 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX8: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: ushlsat_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 127 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -75,52 +75,52 @@ body: | ; GFX6-LABEL: name: ushlsat_s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) - ; GFX6: $vgpr0 = COPY [[LSHR1]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR1]](s32) ; GFX8-LABEL: name: ushlsat_s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX8: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: ushlsat_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX9: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C1]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -138,103 +138,103 @@ body: | ; GFX6-LABEL: name: ushlsat_v2s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[SHL1]] - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR4]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C3]], [[SHL3]] - ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C1]](s32) - ; GFX6: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] - ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C2]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC1]] - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[SHL1]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR4]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C3]], [[SHL3]] + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C1]](s32) + ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C4]] + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C2]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY2]](s32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[TRUNC1]] + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: ushlsat_v2s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) - ; GFX8: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C3]], [[SHL1]] - ; GFX8: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C2]](s16) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C2]](s16) - ; GFX8: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND1]](s16) - ; GFX8: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[AND1]](s16) - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[LSHR4]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C3]], [[SHL3]] - ; GFX8: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SELECT1]], [[C2]](s16) - ; GFX8: [[AND2:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C1]] - ; GFX8: [[AND3:%[0-9]+]]:_(s16) = G_AND [[LSHR5]], [[C1]] - ; GFX8: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) + ; GFX8-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C3]], [[SHL1]] + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C2]](s16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C2]](s16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND1]](s16) + ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[AND1]](s16) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[LSHR4]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C3]], [[SHL3]] + ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SELECT1]], [[C2]](s16) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[LSHR5]], [[C1]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: ushlsat_v2s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C3]], [[SHL1]] - ; GFX9: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C2]](s16) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C2]](s16) - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND1]](s16) - ; GFX9: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[AND1]](s16) - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[LSHR4]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C3]], [[SHL3]] - ; GFX9: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SELECT1]], [[C2]](s16) - ; GFX9: [[AND2:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C1]] - ; GFX9: [[AND3:%[0-9]+]]:_(s16) = G_AND [[LSHR5]], [[C1]] - ; GFX9: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C2]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[SHL]], [[AND]](s16) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[AND]](s16) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s16), [[LSHR2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C3]], [[SHL1]] + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SELECT]], [[C2]](s16) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C2]](s16) + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[SHL2]], [[AND1]](s16) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[AND1]](s16) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s16), [[LSHR4]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C3]], [[SHL3]] + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SELECT1]], [[C2]](s16) + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C1]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[LSHR5]], [[C1]] + ; GFX9-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -255,42 +255,42 @@ body: | ; GFX6-LABEL: name: ushlsat_s16 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) - ; GFX6: $vgpr0 = COPY [[LSHR1]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C1]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR1]](s32) ; GFX8-LABEL: name: ushlsat_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC1]](s16) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC1]](s16) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: ushlsat_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC1]](s16) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -308,86 +308,86 @@ body: | ; GFX6-LABEL: name: ushlsat_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR4]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[SHL3]] - ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] - ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR2]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR4]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[SHL3]] + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL4]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: ushlsat_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC2]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC3]](s16) - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR3]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] - ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC2]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR2]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC3]](s16) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR3]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: ushlsat_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC2]](s16) - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC3]](s16) - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR3]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC2]](s16) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC2]](s16) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR2]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC3]](s16) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC3]](s16) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR3]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_USHLSAT %0, %1 @@ -402,159 +402,159 @@ body: | ; GFX6-LABEL: name: ushlsat_v3s16 ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR3]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR5]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[SHL3]] - ; GFX6: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) - ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SHL5]], [[AND2]](s32) - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[LSHR7]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[SHL5]] - ; GFX6: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SELECT2]], [[C]](s32) - ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] - ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C1]] - ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL7]] - ; GFX6: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C1]] - ; GFX6: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL8]] - ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR3]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR5]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[SHL3]] + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) + ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SHL5]], [[AND2]](s32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[LSHR7]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[SHL5]] + ; GFX6-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SELECT2]], [[C]](s32) + ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX6-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C1]] + ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C1]] + ; GFX6-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: ushlsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC3]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR3]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) - ; GFX8: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC4]](s16) - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR4]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) - ; GFX8: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC5]](s16) - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR5]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] - ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) - ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX8: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] - ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; GFX8: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]] - ; GFX8: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] - ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC3]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR3]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) + ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC4]](s16) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR4]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) + ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC5]](s16) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR5]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX8-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]] + ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] + ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: ushlsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC3]](s16) - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR3]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) - ; GFX9: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC4]](s16) - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR4]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) - ; GFX9: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC5]](s16) - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR5]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] - ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST4]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR6]](s32), [[BITCAST5]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC3]](s16) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR3]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC4]](s16) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR4]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC5]](s16) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR5]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST4]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR6]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 %3:_(<3 x s16>) = G_USHLSAT %1, %2 @@ -571,158 +571,158 @@ body: | ; GFX6-LABEL: name: ushlsat_v4s16 ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) - ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR4]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] - ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) - ; GFX6: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR6]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[SHL3]] - ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) - ; GFX6: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SHL5]], [[AND2]](s32) - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[LSHR8]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[SHL5]] - ; GFX6: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[SELECT2]], [[C]](s32) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[SHL6]], [[AND3]](s32) - ; GFX6: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[SHL7]], [[AND3]](s32) - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL6]](s32), [[LSHR10]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[C2]], [[SHL7]] - ; GFX6: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[SELECT3]], [[C]](s32) - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C1]] - ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C1]] - ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL9]] - ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SHL]], [[AND]](s32) + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[AND]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR4]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL2]](s32), [[LSHR6]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C2]], [[SHL3]] + ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[SHL4]], [[AND2]](s32) + ; GFX6-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SHL5]], [[AND2]](s32) + ; GFX6-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL4]](s32), [[LSHR8]] + ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[SHL5]] + ; GFX6-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[SELECT2]], [[C]](s32) + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[SHL6]], [[AND3]](s32) + ; GFX6-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[SHL7]], [[AND3]](s32) + ; GFX6-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL6]](s32), [[LSHR10]] + ; GFX6-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[C2]], [[SHL7]] + ; GFX6-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[SELECT3]], [[C]](s32) + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C1]] + ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR11]], [[C1]] + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL9]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX8-LABEL: name: ushlsat_v4s16 ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC4]](s16) - ; GFX8: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC4]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR4]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) - ; GFX8: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC5]](s16) - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR5]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) - ; GFX8: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC6]](s16) - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR6]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] - ; GFX8: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) - ; GFX8: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[TRUNC7]](s16) - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC3]](s16), [[LSHR7]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[C1]], [[SHL3]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) - ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] - ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) - ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) - ; GFX8: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] - ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC4]](s16) + ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC4]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR4]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) + ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC5]](s16) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR5]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) + ; GFX8-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC6]](s16) + ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR6]] + ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) + ; GFX8-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[TRUNC7]](s16) + ; GFX8-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC3]](s16), [[LSHR7]] + ; GFX8-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[C1]], [[SHL3]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) + ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: ushlsat_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC4]](s16) - ; GFX9: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC4]](s16) - ; GFX9: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR4]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) - ; GFX9: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC5]](s16) - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR5]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) - ; GFX9: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC6]](s16) - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR6]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] - ; GFX9: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) - ; GFX9: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[TRUNC7]](s16) - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC3]](s16), [[LSHR7]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[C1]], [[SHL3]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) - ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) - ; GFX9: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC4]](s16) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC4]](s16) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC]](s16), [[LSHR4]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[C1]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC5]](s16) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s16) = G_LSHR [[SHL1]], [[TRUNC5]](s16) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC1]](s16), [[LSHR5]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP1]](s1), [[C1]], [[SHL1]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC6]](s16) + ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s16) = G_LSHR [[SHL2]], [[TRUNC6]](s16) + ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR6]] + ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] + ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[TRUNC7]](s16) + ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(s16) = G_LSHR [[SHL3]], [[TRUNC7]](s16) + ; GFX9-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC3]](s16), [[LSHR7]] + ; GFX9-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP3]](s1), [[C1]], [[SHL3]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) + ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) + ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_USHLSAT %0, %1 @@ -737,31 +737,31 @@ body: | ; GFX6-LABEL: name: ushlsat_s32 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[COPY1]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[LSHR]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX6: $vgpr0 = COPY [[SELECT]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[COPY1]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[LSHR]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX6-NEXT: $vgpr0 = COPY [[SELECT]](s32) ; GFX8-LABEL: name: ushlsat_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[LSHR]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX8: $vgpr0 = COPY [[SELECT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[LSHR]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX8-NEXT: $vgpr0 = COPY [[SELECT]](s32) ; GFX9-LABEL: name: ushlsat_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[COPY1]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[LSHR]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX9: $vgpr0 = COPY [[SELECT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[COPY1]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[LSHR]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX9-NEXT: $vgpr0 = COPY [[SELECT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_USHLSAT %0, %1 @@ -776,52 +776,52 @@ body: | ; GFX6-LABEL: name: ushlsat_v2s32 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[UV2]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[LSHR]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[UV3]](s32) - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[LSHR1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[UV2]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[LSHR]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[UV3]](s32) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[LSHR1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: ushlsat_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[UV2]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[LSHR]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[UV3]](s32) - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[LSHR1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[UV2]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[LSHR]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[UV3]](s32) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[LSHR1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: ushlsat_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[UV2]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[LSHR]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[UV3]](s32) - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[LSHR1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[UV]], [[UV2]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SHL]], [[UV2]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s32), [[LSHR]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[UV1]], [[UV3]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[SHL1]], [[UV3]](s32) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s32), [[LSHR1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT]](s32), [[SELECT1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_USHLSAT %0, %1 @@ -836,34 +836,34 @@ body: | ; GFX6-LABEL: name: ushlsat_s64 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; GFX6: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[LSHR]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX6: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[LSHR]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX8-LABEL: name: ushlsat_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; GFX8: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[LSHR]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX8: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[LSHR]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX9-LABEL: name: ushlsat_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[LSHR]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[TRUNC]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[LSHR]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_USHLSAT %0, %1 @@ -878,58 +878,58 @@ body: | ; GFX6-LABEL: name: ushlsat_v2s64 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) - ; GFX6: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[LSHR]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; GFX6: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[SHL1]], [[TRUNC1]](s32) - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[LSHR1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[LSHR]] + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[SHL1]], [[TRUNC1]](s32) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[LSHR1]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX8-LABEL: name: ushlsat_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) - ; GFX8: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[LSHR]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; GFX8: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[SHL1]], [[TRUNC1]](s32) - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[LSHR1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[LSHR]] + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[SHL1]], [[TRUNC1]](s32) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[LSHR1]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: ushlsat_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) - ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) - ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[LSHR]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) - ; GFX9: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[SHL1]], [[TRUNC1]](s32) - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[LSHR1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[UV2]](s64) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[SHL]], [[TRUNC]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[LSHR]] + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[SHL]] + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[UV3]](s64) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC1]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[SHL1]], [[TRUNC1]](s32) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[LSHR1]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[SHL1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_USHLSAT %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usube.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usube.mir index bd4cfd255870..818e990bbd1b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usube.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usube.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: test_usube_s32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir index 9512db51141a..1099626dd503 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubo.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: test_usubo_s32 @@ -9,11 +9,11 @@ body: | ; CHECK-LABEL: name: test_usubo_s32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[COPY]], [[COPY1]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBO1]](s1) - ; CHECK: $vgpr0 = COPY [[USUBO]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBO1]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[USUBO]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32), %3:_(s1) = G_USUBO %0, %1 @@ -30,17 +30,17 @@ body: | ; CHECK-LABEL: name: test_usubo_s7 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[AND1]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK: $vgpr0 = COPY [[AND3]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[AND1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[AND3]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -61,17 +61,17 @@ body: | ; CHECK-LABEL: name: test_usubo_s16 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[AND1]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK: $vgpr0 = COPY [[AND3]](s32) - ; CHECK: $vgpr1 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[AND1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SUB]](s32), [[AND2]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SUB]], [[C]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: $vgpr0 = COPY [[AND3]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -92,16 +92,16 @@ body: | ; CHECK-LABEL: name: test_usubo_s64 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; CHECK: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; CHECK: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64) - ; CHECK: $vgpr2 = COPY [[ZEXT]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) + ; CHECK-NEXT: $vgpr2 = COPY [[ZEXT]](s32) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64), %3:_(s1) = G_USUBO %0, %1 @@ -118,41 +118,41 @@ body: | ; CHECK-LABEL: name: test_usubo_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC2]] - ; CHECK: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[TRUNC3]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB1]](s16) - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC4]](s16), [[TRUNC6]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC5]](s16), [[TRUNC7]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) - ; CHECK: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC2]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[TRUNC3]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB1]](s16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC4]](s16), [[TRUNC6]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC5]](s16), [[TRUNC7]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: $vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>), %3:_(<2 x s1>) = G_USUBO %0, %1 @@ -168,74 +168,74 @@ body: | liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5 ; CHECK-LABEL: name: test_usubo_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC3]] - ; CHECK: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[TRUNC4]] - ; CHECK: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[TRUNC5]] - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) - ; CHECK: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC6]](s16), [[TRUNC9]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC7]](s16), [[TRUNC10]] - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC8]](s16), [[TRUNC11]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB1]](s16) - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C1]] - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; CHECK: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C1]] - ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; CHECK: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] - ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND3]](s32), [[AND4]](s32), [[AND5]](s32) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC3]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[TRUNC4]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[TRUNC5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; CHECK-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC6]](s16), [[TRUNC9]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC7]](s16), [[TRUNC10]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC8]](s16), [[TRUNC11]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB1]](s16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND3]](s32), [[AND4]](s32), [[AND5]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -256,75 +256,75 @@ body: | ; CHECK-LABEL: name: test_usubo_v4s16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr1_vgpr2 - ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; CHECK: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC4]] - ; CHECK: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[TRUNC5]] - ; CHECK: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[TRUNC6]] - ; CHECK: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[TRUNC3]], [[TRUNC7]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB]](s16) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB1]](s16) - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) - ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SUB3]](s16) - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; CHECK: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; CHECK: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) - ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) - ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; CHECK: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) - ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) - ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) - ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) - ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC8]](s16), [[TRUNC12]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC9]](s16), [[TRUNC13]] - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC10]](s16), [[TRUNC14]] - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC11]](s16), [[TRUNC15]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C1]] - ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C1]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) - ; CHECK: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr1_vgpr2 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s16) = G_SUB [[TRUNC]], [[TRUNC4]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s16) = G_SUB [[TRUNC1]], [[TRUNC5]] + ; CHECK-NEXT: [[SUB2:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[TRUNC6]] + ; CHECK-NEXT: [[SUB3:%[0-9]+]]:_(s16) = G_SUB [[TRUNC3]], [[TRUNC7]] + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB1]](s16) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) + ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[SUB3]](s16) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC8]](s16), [[TRUNC12]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC9]](s16), [[TRUNC13]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC10]](s16), [[TRUNC14]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC11]](s16), [[TRUNC15]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP2]](s1) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP3]](s1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32), [[AND3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr1_vgpr2 %2:_(<4 x s16>), %3:_(<4 x s1>) = G_USUBO %0, %1 @@ -341,24 +341,24 @@ body: | ; CHECK-LABEL: name: test_usubo_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[UV2]] - ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[UV3]] - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB]](s32), [[SUB1]](s32) - ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; CHECK: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV4]](s32), [[UV6]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV5]](s32), [[UV7]] - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) - ; CHECK: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) - ; CHECK: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[UV2]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[UV3]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB]](s32), [[SUB1]](s32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV4]](s32), [[UV6]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV5]](s32), [[UV7]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP]](s1) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[ICMP1]](s1) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C]] + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: $vgpr2_vgpr3 = COPY [[BUILD_VECTOR1]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>), %3:_(<2 x s1>) = G_USUBO %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir index 23558fb72c06..6221a15da5d5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: usubsat_s7 @@ -12,38 +12,38 @@ body: | ; GFX6-LABEL: name: usubsat_s7 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[LSHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 25 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) ; GFX8-LABEL: name: usubsat_s7 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] - ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: usubsat_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -61,38 +61,38 @@ body: | ; GFX6-LABEL: name: usubsat_s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[LSHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) ; GFX8-LABEL: name: usubsat_s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] - ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: usubsat_s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) - ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) - ; GFX9: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] - ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C]](s16) + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s8) = G_TRUNC %0 @@ -110,85 +110,85 @@ body: | ; GFX6-LABEL: name: usubsat_v2s8 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C1]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) - ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C1]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX6: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] - ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C1]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C1]](s32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C1]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C1]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C1]](s32) + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C1]](s32) + ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX6-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]] + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX6-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY2]](s32) + ; GFX6-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]] + ; GFX6-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX6-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: usubsat_v2s8 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) - ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] - ; GFX8: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C1]](s16) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) - ; GFX8: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) - ; GFX8: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL2]], [[SHL3]] - ; GFX8: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT1]], [[C1]](s16) - ; GFX8: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX8: [[AND:%[0-9]+]]:_(s16) = G_AND [[LSHR2]], [[C2]] - ; GFX8: [[AND1:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C2]] - ; GFX8: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) - ; GFX8: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C1]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[C1]](s16) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C1]](s16) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[C1]](s16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[TRUNC3]], [[C1]](s16) + ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL2]], [[SHL3]] + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT1]], [[C1]](s16) + ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[LSHR2]], [[C2]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[LSHR3]], [[C2]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: usubsat_v2s8 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[LSHR]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[LSHR1]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY2]](s32) - ; GFX9: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[SHL]], [[SHL1]] - ; GFX9: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[USUBSAT]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR2]](<2 x s16>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX9: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; GFX9: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] - ; GFX9: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] - ; GFX9: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) - ; GFX9: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY1]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY2]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[SHL]], [[SHL1]] + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[USUBSAT]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR2]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C3]] + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -209,30 +209,30 @@ body: | ; GFX6-LABEL: name: usubsat_s16 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) - ; GFX6: $vgpr0 = COPY [[LSHR]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C]](s32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[LSHR]](s32) ; GFX8-LABEL: name: usubsat_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) - ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] + ; GFX8-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) + ; GFX8-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: usubsat_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) - ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -250,54 +250,54 @@ body: | ; GFX6-LABEL: name: usubsat_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] - ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL4]] + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: usubsat_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC2]] - ; GFX8: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC1]], [[TRUNC3]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT1]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC2]] + ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC1]], [[TRUNC3]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: usubsat_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[USUBSAT]](<2 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[USUBSAT]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_USUBSAT %0, %1 @@ -312,130 +312,130 @@ body: | ; GFX6-LABEL: name: usubsat_v3s16 ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) - ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] - ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C]](s32) - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SHL4]], [[SHL5]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[UMIN2]] - ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SUB2]], [[C]](s32) - ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SHL4]], [[SHL5]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[UMIN2]] + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SUB2]], [[C]](s32) + ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: usubsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC3]] - ; GFX8: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC1]], [[TRUNC4]] - ; GFX8: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC2]], [[TRUNC5]] - ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT1]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT2]](s16) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; GFX8: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; GFX8: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC3]] + ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC1]], [[TRUNC4]] + ; GFX8-NEXT: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC2]], [[TRUNC5]] + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT2]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: usubsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) - ; GFX9: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) - ; GFX9: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] - ; GFX9: [[USUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT]](<2 x s16>) - ; GFX9: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT1]](<2 x s16>) - ; GFX9: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32) - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[USUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT1]](<2 x s16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0 %3:_(<3 x s16>) = G_USUBSAT %1, %2 @@ -452,98 +452,98 @@ body: | ; GFX6-LABEL: name: usubsat_v4s16 ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX6: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX6: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX6: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) - ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] - ; GFX6: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) - ; GFX6: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) - ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] - ; GFX6: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C]](s32) - ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) - ; GFX6: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) - ; GFX6: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SHL4]], [[SHL5]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[UMIN2]] - ; GFX6: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SUB2]], [[C]](s32) - ; GFX6: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) - ; GFX6: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) - ; GFX6: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[SHL6]], [[SHL7]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SHL6]], [[UMIN3]] - ; GFX6: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SUB3]], [[C]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; GFX6: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] - ; GFX6: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] - ; GFX6: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] - ; GFX6: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX6: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) + ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] + ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) + ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C]](s32) + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] + ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C]](s32) + ; GFX6-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SHL4]], [[SHL5]] + ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[UMIN2]] + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[SUB2]], [[C]](s32) + ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) + ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[LSHR3]], [[C]](s32) + ; GFX6-NEXT: [[UMIN3:%[0-9]+]]:_(s32) = G_UMIN [[SHL6]], [[SHL7]] + ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SHL6]], [[UMIN3]] + ; GFX6-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[SUB3]], [[C]](s32) + ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL8]] + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] + ; GFX6-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL9]] + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX8-LABEL: name: usubsat_v4s16 ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX8: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; GFX8: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX8: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC4]] - ; GFX8: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC1]], [[TRUNC5]] - ; GFX8: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC2]], [[TRUNC6]] - ; GFX8: [[USUBSAT3:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC3]], [[TRUNC7]] - ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16) - ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT1]](s16) - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT2]](s16) - ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT3]](s16) - ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GFX8: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC4]] + ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC1]], [[TRUNC5]] + ; GFX8-NEXT: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC2]], [[TRUNC6]] + ; GFX8-NEXT: [[USUBSAT3:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC3]], [[TRUNC7]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT2]](s16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT3]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: usubsat_v4s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) - ; GFX9: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[UV]], [[UV2]] - ; GFX9: [[USUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[UV1]], [[UV3]] - ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[USUBSAT]](<2 x s16>), [[USUBSAT1]](<2 x s16>) - ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[USUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[USUBSAT]](<2 x s16>), [[USUBSAT1]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<4 x s16>) = G_USUBSAT %0, %1 @@ -558,20 +558,20 @@ body: | ; GFX6-LABEL: name: usubsat_s32 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[UMIN]] - ; GFX6: $vgpr0 = COPY [[SUB]](s32) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[COPY]], [[COPY1]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[UMIN]] + ; GFX6-NEXT: $vgpr0 = COPY [[SUB]](s32) ; GFX8-LABEL: name: usubsat_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] - ; GFX8: $vgpr0 = COPY [[USUBSAT]](s32) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] + ; GFX8-NEXT: $vgpr0 = COPY [[USUBSAT]](s32) ; GFX9-LABEL: name: usubsat_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[USUBSAT]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[USUBSAT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_USUBSAT %0, %1 @@ -586,33 +586,33 @@ body: | ; GFX6-LABEL: name: usubsat_v2s32 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV2]] - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[UMIN]] - ; GFX6: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[UMIN1]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB]](s32), [[SUB1]](s32) - ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[UV]], [[UV2]] + ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[UMIN]] + ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[UV1]], [[UV3]] + ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[UMIN1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB]](s32), [[SUB1]](s32) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: usubsat_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[UV]], [[UV2]] - ; GFX8: [[USUBSAT1:%[0-9]+]]:_(s32) = G_USUBSAT [[UV1]], [[UV3]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[USUBSAT]](s32), [[USUBSAT1]](s32) - ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[UV]], [[UV2]] + ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s32) = G_USUBSAT [[UV1]], [[UV3]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[USUBSAT]](s32), [[USUBSAT1]](s32) + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: usubsat_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[UV]], [[UV2]] - ; GFX9: [[USUBSAT1:%[0-9]+]]:_(s32) = G_USUBSAT [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[USUBSAT]](s32), [[USUBSAT1]](s32) - ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[UV]], [[UV2]] + ; GFX9-NEXT: [[USUBSAT1:%[0-9]+]]:_(s32) = G_USUBSAT [[UV1]], [[UV3]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[USUBSAT]](s32), [[USUBSAT1]](s32) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 %2:_(<2 x s32>) = G_USUBSAT %0, %1 @@ -627,40 +627,40 @@ body: | ; GFX6-LABEL: name: usubsat_s64 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX6: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX8-LABEL: name: usubsat_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX8: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) ; GFX9-LABEL: name: usubsat_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 - ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) - ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) - ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] - ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX9: $vgpr0_vgpr1 = COPY [[SELECT]](s64) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV2]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV3]], [[USUBO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY]](s64), [[COPY1]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[SELECT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_USUBSAT %0, %1 @@ -675,70 +675,70 @@ body: | ; GFX6-LABEL: name: usubsat_v2s64 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] - ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] - ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV]](s64), [[UV2]] - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX6: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV8]], [[UV10]] - ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV9]], [[UV11]], [[USUBO3]] - ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV1]](s64), [[UV3]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX6-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX6-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX6-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX6-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] + ; GFX6-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] + ; GFX6-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV]](s64), [[UV2]] + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX6-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX6-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX6-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV8]], [[UV10]] + ; GFX6-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV9]], [[UV11]], [[USUBO3]] + ; GFX6-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV1]](s64), [[UV3]] + ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX8-LABEL: name: usubsat_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] - ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] - ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV]](s64), [[UV2]] - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX8: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV8]], [[UV10]] - ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV9]], [[UV11]], [[USUBO3]] - ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV1]](s64), [[UV3]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX8-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] + ; GFX8-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] + ; GFX8-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX8-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV]](s64), [[UV2]] + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX8-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV8]], [[UV10]] + ; GFX8-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV9]], [[UV11]], [[USUBO3]] + ; GFX8-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX8-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV1]](s64), [[UV3]] + ; GFX8-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] + ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: usubsat_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] - ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] - ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV]](s64), [[UV2]] - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] - ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV8]], [[UV10]] - ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV9]], [[UV11]], [[USUBO3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV1]](s64), [[UV3]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9-NEXT: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV4]], [[UV6]] + ; GFX9-NEXT: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV5]], [[UV7]], [[USUBO1]] + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32) + ; GFX9-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV]](s64), [[UV2]] + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9-NEXT: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[C]], [[MV]] + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9-NEXT: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV8]], [[UV10]] + ; GFX9-NEXT: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV9]], [[UV11]], [[USUBO3]] + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX9-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[UV1]](s64), [[UV3]] + ; GFX9-NEXT: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[C]], [[MV1]] + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SELECT]](s64), [[SELECT1]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 %2:_(<2 x s64>) = G_USUBSAT %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx7.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx7.mir new file mode 100644 index 000000000000..075e09d18df6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx7.mir @@ -0,0 +1,355 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX7 %s + +--- | + + define <2 x i16> @and_v2i16(<2 x i16> %a, <2 x i16> %b) #0 { + %and = and <2 x i16> %a, %b + ret <2 x i16> %and + } + + define <3 x i16> @add_v3i16(<3 x i16> %a, <3 x i16> %b) #0 { + %add = add <3 x i16> %a, %b + ret <3 x i16> %add + } + + define <3 x i16> @shl_v3i16(<3 x i16> %a, <3 x i16> %b) #0 { + %shl = shl <3 x i16> %a, %b + ret <3 x i16> %shl + } + + define <4 x half> @fma_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) { + %fma = call <4 x half> @llvm.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) + ret <4 x half> %fma + } + + define amdgpu_ps <5 x half> @maxnum_v5i16(<5 x half> %a, <5 x half> %b) { + %fma = call <5 x half> @llvm.maxnum.v5f16(<5 x half> %a, <5 x half> %b) + ret <5 x half> %fma + } + + declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) + declare <5 x half> @llvm.maxnum.v5f16(<5 x half>, <5 x half>) +... + +--- +name: and_v2i16 +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + + ; GFX7-LABEL: name: and_v2i16 + ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX7-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX7-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) + ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX7-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[TRUNC]], [[TRUNC1]] + ; GFX7-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AND]](<2 x s16>) + ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX7-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX7-NEXT: $vgpr0 = COPY [[BITCAST]](s32) + ; GFX7-NEXT: $vgpr1 = COPY [[LSHR]](s32) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; GFX7-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %5:_(<2 x s32>) = G_BUILD_VECTOR %3(s32), %4(s32) + %0:_(<2 x s16>) = G_TRUNC %5(<2 x s32>) + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %8:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %7(s32) + %1:_(<2 x s16>) = G_TRUNC %8(<2 x s32>) + %2:sgpr_64 = COPY $sgpr30_sgpr31 + %9:_(<2 x s16>) = G_AND %0, %1 + %13:_(s16), %14:_(s16) = G_UNMERGE_VALUES %9(<2 x s16>) + %11:_(s32) = G_ANYEXT %13(s16) + %12:_(s32) = G_ANYEXT %14(s16) + $vgpr0 = COPY %11(s32) + $vgpr1 = COPY %12(s32) + %10:ccr_sgpr_64 = COPY %2 + S_SETPC_B64_return %10, implicit $vgpr0, implicit $vgpr1 + +... + +--- +name: add_v3i16 +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + + ; GFX7-LABEL: name: add_v3i16 + ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX7-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY3]] + ; GFX7-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY4]] + ; GFX7-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[COPY5]] + ; GFX7-NEXT: $vgpr0 = COPY [[ADD]](s32) + ; GFX7-NEXT: $vgpr1 = COPY [[ADD1]](s32) + ; GFX7-NEXT: $vgpr2 = COPY [[ADD2]](s32) + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX7-NEXT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %5:_(s32) = COPY $vgpr2 + %6:_(<3 x s32>) = G_BUILD_VECTOR %3(s32), %4(s32), %5(s32) + %0:_(<3 x s16>) = G_TRUNC %6(<3 x s32>) + %7:_(s32) = COPY $vgpr3 + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %10:_(<3 x s32>) = G_BUILD_VECTOR %7(s32), %8(s32), %9(s32) + %1:_(<3 x s16>) = G_TRUNC %10(<3 x s32>) + %2:sgpr_64 = COPY $sgpr30_sgpr31 + %11:_(<3 x s16>) = G_ADD %0, %1 + %16:_(s16), %17:_(s16), %18:_(s16) = G_UNMERGE_VALUES %11(<3 x s16>) + %13:_(s32) = G_ANYEXT %16(s16) + %14:_(s32) = G_ANYEXT %17(s16) + %15:_(s32) = G_ANYEXT %18(s16) + $vgpr0 = COPY %13(s32) + $vgpr1 = COPY %14(s32) + $vgpr2 = COPY %15(s32) + %12:ccr_sgpr_64 = COPY %2 + S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + +... + +--- +name: shl_v3i16 +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + + ; GFX7-LABEL: name: shl_v3i16 + ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX7-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX7-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; GFX7-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[AND]](s32) + ; GFX7-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; GFX7-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[AND1]](s32) + ; GFX7-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C]] + ; GFX7-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[AND2]](s32) + ; GFX7-NEXT: $vgpr0 = COPY [[SHL]](s32) + ; GFX7-NEXT: $vgpr1 = COPY [[SHL1]](s32) + ; GFX7-NEXT: $vgpr2 = COPY [[SHL2]](s32) + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX7-NEXT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %5:_(s32) = COPY $vgpr2 + %6:_(<3 x s32>) = G_BUILD_VECTOR %3(s32), %4(s32), %5(s32) + %0:_(<3 x s16>) = G_TRUNC %6(<3 x s32>) + %7:_(s32) = COPY $vgpr3 + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %10:_(<3 x s32>) = G_BUILD_VECTOR %7(s32), %8(s32), %9(s32) + %1:_(<3 x s16>) = G_TRUNC %10(<3 x s32>) + %2:sgpr_64 = COPY $sgpr30_sgpr31 + %11:_(<3 x s16>) = G_SHL %0, %1(<3 x s16>) + %16:_(s16), %17:_(s16), %18:_(s16) = G_UNMERGE_VALUES %11(<3 x s16>) + %13:_(s32) = G_ANYEXT %16(s16) + %14:_(s32) = G_ANYEXT %17(s16) + %15:_(s32) = G_ANYEXT %18(s16) + $vgpr0 = COPY %13(s32) + $vgpr1 = COPY %14(s32) + $vgpr2 = COPY %15(s32) + %12:ccr_sgpr_64 = COPY %2 + S_SETPC_B64_return %12, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + +... + +--- +name: fma_v4f16 +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $sgpr30_sgpr31 + + ; GFX7-LABEL: name: fma_v4f16 + ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; GFX7-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; GFX7-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) + ; GFX7-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; GFX7-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; GFX7-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) + ; GFX7-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[COPY11]](s32) + ; GFX7-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX7-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; GFX7-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) + ; GFX7-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FPEXT2]] + ; GFX7-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA]](s32) + ; GFX7-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX7-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; GFX7-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC9]](s16) + ; GFX7-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT3]], [[FPEXT4]], [[FPEXT5]] + ; GFX7-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA1]](s32) + ; GFX7-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX7-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; GFX7-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC10]](s16) + ; GFX7-NEXT: [[FMA2:%[0-9]+]]:_(s32) = G_FMA [[FPEXT6]], [[FPEXT7]], [[FPEXT8]] + ; GFX7-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA2]](s32) + ; GFX7-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX7-NEXT: [[FPEXT10:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; GFX7-NEXT: [[FPEXT11:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC11]](s16) + ; GFX7-NEXT: [[FMA3:%[0-9]+]]:_(s32) = G_FMA [[FPEXT9]], [[FPEXT10]], [[FPEXT11]] + ; GFX7-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMA3]](s32) + ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) + ; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) + ; GFX7-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC3]](s16) + ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX7-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) + ; GFX7-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) + ; GFX7-NEXT: $vgpr3 = COPY [[ANYEXT3]](s32) + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY12]] + ; GFX7-NEXT: S_SETPC_B64_return [[COPY13]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %8:_(<4 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32), %7(s32) + %0:_(<4 x s16>) = G_TRUNC %8(<4 x s32>) + %9:_(s32) = COPY $vgpr4 + %10:_(s32) = COPY $vgpr5 + %11:_(s32) = COPY $vgpr6 + %12:_(s32) = COPY $vgpr7 + %13:_(<4 x s32>) = G_BUILD_VECTOR %9(s32), %10(s32), %11(s32), %12(s32) + %1:_(<4 x s16>) = G_TRUNC %13(<4 x s32>) + %14:_(s32) = COPY $vgpr8 + %15:_(s32) = COPY $vgpr9 + %16:_(s32) = COPY $vgpr10 + %17:_(s32) = COPY $vgpr11 + %18:_(<4 x s32>) = G_BUILD_VECTOR %14(s32), %15(s32), %16(s32), %17(s32) + %2:_(<4 x s16>) = G_TRUNC %18(<4 x s32>) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %19:_(<4 x s16>) = G_FMA %0, %1, %2 + %25:_(s16), %26:_(s16), %27:_(s16), %28:_(s16) = G_UNMERGE_VALUES %19(<4 x s16>) + %21:_(s32) = G_ANYEXT %25(s16) + %22:_(s32) = G_ANYEXT %26(s16) + %23:_(s32) = G_ANYEXT %27(s16) + %24:_(s32) = G_ANYEXT %28(s16) + $vgpr0 = COPY %21(s32) + $vgpr1 = COPY %22(s32) + $vgpr2 = COPY %23(s32) + $vgpr3 = COPY %24(s32) + %20:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %20, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +... + +--- +name: maxnum_v5i16 +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 + + ; GFX7-LABEL: name: maxnum_v5i16 + ; GFX7: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX7-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX7-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX7-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX7-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX7-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) + ; GFX7-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; GFX7-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) + ; GFX7-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[COPY7]](s32) + ; GFX7-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY8]](s32) + ; GFX7-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) + ; GFX7-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) + ; GFX7-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16) + ; GFX7-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]] + ; GFX7-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32) + ; GFX7-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) + ; GFX7-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16) + ; GFX7-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]] + ; GFX7-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32) + ; GFX7-NEXT: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) + ; GFX7-NEXT: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16) + ; GFX7-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]] + ; GFX7-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE2]](s32) + ; GFX7-NEXT: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) + ; GFX7-NEXT: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC8]](s16) + ; GFX7-NEXT: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT6]], [[FPEXT7]] + ; GFX7-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE3]](s32) + ; GFX7-NEXT: [[FPEXT8:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16) + ; GFX7-NEXT: [[FPEXT9:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC9]](s16) + ; GFX7-NEXT: [[FMAXNUM_IEEE4:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT8]], [[FPEXT9]] + ; GFX7-NEXT: [[FPTRUNC4:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE4]](s32) + ; GFX7-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; GFX7-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC1]](s16) + ; GFX7-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC2]](s16) + ; GFX7-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC3]](s16) + ; GFX7-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC4]](s16) + ; GFX7-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX7-NEXT: $vgpr1 = COPY [[ANYEXT1]](s32) + ; GFX7-NEXT: $vgpr2 = COPY [[ANYEXT2]](s32) + ; GFX7-NEXT: $vgpr3 = COPY [[ANYEXT3]](s32) + ; GFX7-NEXT: $vgpr4 = COPY [[ANYEXT4]](s32) + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 + %2:_(s32) = COPY $vgpr0 + %3:_(s32) = COPY $vgpr1 + %4:_(s32) = COPY $vgpr2 + %5:_(s32) = COPY $vgpr3 + %6:_(s32) = COPY $vgpr4 + %7:_(<5 x s32>) = G_BUILD_VECTOR %2(s32), %3(s32), %4(s32), %5(s32), %6(s32) + %0:_(<5 x s16>) = G_TRUNC %7(<5 x s32>) + %8:_(s32) = COPY $vgpr5 + %9:_(s32) = COPY $vgpr6 + %10:_(s32) = COPY $vgpr7 + %11:_(s32) = COPY $vgpr8 + %12:_(s32) = COPY $vgpr9 + %13:_(<5 x s32>) = G_BUILD_VECTOR %8(s32), %9(s32), %10(s32), %11(s32), %12(s32) + %1:_(<5 x s16>) = G_TRUNC %13(<5 x s32>) + %15:_(<5 x s16>) = G_FMAXNUM %0, %1 + %21:_(s16), %22:_(s16), %23:_(s16), %24:_(s16), %25:_(s16) = G_UNMERGE_VALUES %15(<5 x s16>) + %16:_(s32) = G_ANYEXT %21(s16) + %17:_(s32) = G_ANYEXT %22(s16) + %18:_(s32) = G_ANYEXT %23(s16) + %19:_(s32) = G_ANYEXT %24(s16) + %20:_(s32) = G_ANYEXT %25(s16) + $vgpr0 = COPY %16(s32) + $vgpr1 = COPY %17(s32) + $vgpr2 = COPY %18(s32) + $vgpr3 = COPY %19(s32) + $vgpr4 = COPY %20(s32) + SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir new file mode 100644 index 000000000000..3d718081ba92 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir @@ -0,0 +1,531 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s + +--- | + + define <2 x i16> @and_v2i16(<2 x i16> %a, <2 x i16> %b) #0 { + %and = and <2 x i16> %a, %b + ret <2 x i16> %and + } + + define <3 x i16> @add_v3i16(<3 x i16> %a, <3 x i16> %b) #0 { + %add = add <3 x i16> %a, %b + ret <3 x i16> %add + } + + define <3 x i16> @shl_v3i16(<3 x i16> %a, <3 x i16> %b) #0 { + %shl = shl <3 x i16> %a, %b + ret <3 x i16> %shl + } + + define <4 x half> @fma_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) { + %fma = call <4 x half> @llvm.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) + ret <4 x half> %fma + } + + define amdgpu_ps <5 x half> @maxnum_v5i16(<5 x half> %a, <5 x half> %b) { + %fma = call <5 x half> @llvm.maxnum.v5f16(<5 x half> %a, <5 x half> %b) + ret <5 x half> %fma + } + + declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) + declare <5 x half> @llvm.maxnum.v5f16(<5 x half>, <5 x half>) +... + +--- +name: and_v2i16 +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 + + ; GFX8-LABEL: name: and_v2i16 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[COPY1]] + ; GFX8-NEXT: $vgpr0 = COPY [[AND]](<2 x s16>) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX8-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + ; GFX9-LABEL: name: and_v2i16 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[COPY1]] + ; GFX9-NEXT: $vgpr0 = COPY [[AND]](<2 x s16>) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] + ; GFX9-NEXT: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:sgpr_64 = COPY $sgpr30_sgpr31 + %3:_(<2 x s16>) = G_AND %0, %1 + $vgpr0 = COPY %3(<2 x s16>) + %4:ccr_sgpr_64 = COPY %2 + S_SETPC_B64_return %4, implicit $vgpr0 +... + +--- +name: add_v3i16 +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + + ; GFX8-LABEL: name: add_v3i16 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX8-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s96) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC3]] + ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC4]] + ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC5]] + ; GFX8-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; GFX8-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-LABEL: name: add_v3i16 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s96) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[DEF1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV3]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV4]](s32), [[DEF1]](s32) + ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(<2 x s16>) = G_ADD [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(<2 x s16>) = G_ADD [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[ADD1]](<2 x s16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[ADD]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; GFX9-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + %3:_(<2 x s16>) = COPY $vgpr0 + %4:_(<2 x s16>) = COPY $vgpr1 + %5:_(<2 x s16>) = G_IMPLICIT_DEF + %6:_(<6 x s16>) = G_CONCAT_VECTORS %3(<2 x s16>), %4(<2 x s16>), %5(<2 x s16>) + %19:_(s96) = G_BITCAST %6(<6 x s16>) + %20:_(s48) = G_TRUNC %19(s96) + %0:_(<3 x s16>) = G_BITCAST %20(s48) + %8:_(<2 x s16>) = COPY $vgpr2 + %9:_(<2 x s16>) = COPY $vgpr3 + %10:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %5(<2 x s16>) + %21:_(s96) = G_BITCAST %10(<6 x s16>) + %22:_(s48) = G_TRUNC %21(s96) + %1:_(<3 x s16>) = G_BITCAST %22(s48) + %2:sgpr_64 = COPY $sgpr30_sgpr31 + %12:_(<3 x s16>) = G_ADD %0, %1 + %16:_(<3 x s16>) = G_IMPLICIT_DEF + %17:_(<6 x s16>) = G_CONCAT_VECTORS %12(<3 x s16>), %16(<3 x s16>) + %14:_(<2 x s16>), %15:_(<2 x s16>), %18:_(<2 x s16>) = G_UNMERGE_VALUES %17(<6 x s16>) + $vgpr0 = COPY %14(<2 x s16>) + $vgpr1 = COPY %15(<2 x s16>) + %13:ccr_sgpr_64 = COPY %2 + S_SETPC_B64_return %13, implicit $vgpr0, implicit $vgpr1 +... + +--- +name: shl_v3i16 +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + + ; GFX8-LABEL: name: shl_v3i16 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX8-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s96) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) + ; GFX8-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL1]](s16) + ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SHL2]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; GFX8-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-LABEL: name: shl_v3i16 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[DEF1]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s96) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV5]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV6]](s32), [[DEF1]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV3]](<3 x s16>), 0 + ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV8]](<3 x s16>), 0 + ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[EXTRACT]], [[EXTRACT1]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[COPY5]], [[COPY6]](s16) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SHL]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>) + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; GFX9-NEXT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + %3:_(<2 x s16>) = COPY $vgpr0 + %4:_(<2 x s16>) = COPY $vgpr1 + %5:_(<2 x s16>) = G_IMPLICIT_DEF + %6:_(<6 x s16>) = G_CONCAT_VECTORS %3(<2 x s16>), %4(<2 x s16>), %5(<2 x s16>) + %19:_(s96) = G_BITCAST %6(<6 x s16>) + %20:_(s48) = G_TRUNC %19(s96) + %0:_(<3 x s16>) = G_BITCAST %20(s48) + %8:_(<2 x s16>) = COPY $vgpr2 + %9:_(<2 x s16>) = COPY $vgpr3 + %10:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %5(<2 x s16>) + %21:_(s96) = G_BITCAST %10(<6 x s16>) + %22:_(s48) = G_TRUNC %21(s96) + %1:_(<3 x s16>) = G_BITCAST %22(s48) + %2:sgpr_64 = COPY $sgpr30_sgpr31 + %12:_(<3 x s16>) = G_SHL %0, %1(<3 x s16>) + %16:_(<3 x s16>) = G_IMPLICIT_DEF + %17:_(<6 x s16>) = G_CONCAT_VECTORS %12(<3 x s16>), %16(<3 x s16>) + %14:_(<2 x s16>), %15:_(<2 x s16>), %18:_(<2 x s16>) = G_UNMERGE_VALUES %17(<6 x s16>) + $vgpr0 = COPY %14(<2 x s16>) + $vgpr1 = COPY %15(<2 x s16>) + %13:ccr_sgpr_64 = COPY %2 + S_SETPC_B64_return %13, implicit $vgpr0, implicit $vgpr1 +... + +--- +name: fma_v4f16 +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $sgpr30_sgpr31 + + ; GFX8-LABEL: name: fma_v4f16 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) + ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) + ; GFX8-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC4]], [[TRUNC8]] + ; GFX8-NEXT: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC5]], [[TRUNC9]] + ; GFX8-NEXT: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC6]], [[TRUNC10]] + ; GFX8-NEXT: [[FMA3:%[0-9]+]]:_(s16) = G_FMA [[TRUNC3]], [[TRUNC7]], [[TRUNC11]] + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMA2]](s16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMA3]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x s16>) + ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>) + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX8-NEXT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-LABEL: name: fma_v4f16 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[COPY]], [[COPY2]], [[COPY4]] + ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[COPY1]], [[COPY3]], [[COPY5]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[FMA1]](<2 x s16>) + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY6]] + ; GFX9-NEXT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + %4:_(<2 x s16>) = COPY $vgpr0 + %5:_(<2 x s16>) = COPY $vgpr1 + %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>) + %6:_(<2 x s16>) = COPY $vgpr2 + %7:_(<2 x s16>) = COPY $vgpr3 + %1:_(<4 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>) + %8:_(<2 x s16>) = COPY $vgpr4 + %9:_(<2 x s16>) = COPY $vgpr5 + %2:_(<4 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>) + %3:sgpr_64 = COPY $sgpr30_sgpr31 + %10:_(<4 x s16>) = G_FMA %0, %1, %2 + %12:_(<2 x s16>), %13:_(<2 x s16>) = G_UNMERGE_VALUES %10(<4 x s16>) + $vgpr0 = COPY %12(<2 x s16>) + $vgpr1 = COPY %13(<2 x s16>) + %11:ccr_sgpr_64 = COPY %3 + S_SETPC_B64_return %11, implicit $vgpr0, implicit $vgpr1 +... + +--- +name: maxnum_v5i16 +body: | + bb.1: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; GFX8-LABEL: name: maxnum_v5i16 + ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[CONCAT_VECTORS]](<10 x s16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s160) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX8-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s160) = G_BITCAST [[CONCAT_VECTORS1]](<10 x s16>) + ; GFX8-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s160) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32) + ; GFX8-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; GFX8-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) + ; GFX8-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] + ; GFX8-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] + ; GFX8-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX8-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] + ; GFX8-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC6]] + ; GFX8-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX8-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] + ; GFX8-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]] + ; GFX8-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] + ; GFX8-NEXT: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] + ; GFX8-NEXT: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC8]] + ; GFX8-NEXT: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] + ; GFX8-NEXT: [[FCANONICALIZE8:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] + ; GFX8-NEXT: [[FCANONICALIZE9:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC9]] + ; GFX8-NEXT: [[FMAXNUM_IEEE4:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE8]], [[FCANONICALIZE9]] + ; GFX8-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) + ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) + ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16) + ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE3]](s16) + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) + ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE4]](s16) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL2]] + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) + ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) + ; GFX8-NEXT: $vgpr2 = COPY [[BITCAST5]](<2 x s16>) + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX9-LABEL: name: maxnum_v5i16 + ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[CONCAT_VECTORS]](<10 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s160) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV2]](s32), [[DEF1]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s160) = G_BITCAST [[CONCAT_VECTORS2]](<10 x s16>) + ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s160) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV7]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV8]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV9]](s32), [[DEF1]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(<5 x s16>), [[UV13:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<10 x s16>) + ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV5]](<5 x s16>), 0 + ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV12]](<5 x s16>), 0 + ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) + ; GFX9-NEXT: [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV14]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV17]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV15]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV18]] + ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV16]] + ; GFX9-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV19]] + ; GFX9-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] + ; GFX9-NEXT: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE2]](<2 x s16>) + ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV20]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[FMAXNUM_IEEE1]](<2 x s16>) + ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC6]](<2 x s16>) + ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + %2:_(<2 x s16>) = COPY $vgpr0 + %3:_(<2 x s16>) = COPY $vgpr1 + %4:_(<2 x s16>) = COPY $vgpr2 + %5:_(<2 x s16>) = G_IMPLICIT_DEF + %6:_(<10 x s16>) = G_CONCAT_VECTORS %2(<2 x s16>), %3(<2 x s16>), %4(<2 x s16>), %5(<2 x s16>), %5(<2 x s16>) + %22:_(s160) = G_BITCAST %6(<10 x s16>) + %23:_(s80) = G_TRUNC %22(s160) + %0:_(<5 x s16>) = G_BITCAST %23(s80) + %8:_(<2 x s16>) = COPY $vgpr3 + %9:_(<2 x s16>) = COPY $vgpr4 + %10:_(<2 x s16>) = COPY $vgpr5 + %11:_(<10 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %10(<2 x s16>), %5(<2 x s16>), %5(<2 x s16>) + %24:_(s160) = G_BITCAST %11(<10 x s16>) + %25:_(s80) = G_TRUNC %24(s160) + %1:_(<5 x s16>) = G_BITCAST %25(s80) + %14:_(<5 x s16>) = G_FMAXNUM %0, %1 + %18:_(<5 x s16>) = G_IMPLICIT_DEF + %19:_(<10 x s16>) = G_CONCAT_VECTORS %14(<5 x s16>), %18(<5 x s16>) + %15:_(<2 x s16>), %16:_(<2 x s16>), %17:_(<2 x s16>), %20:_(<2 x s16>), %21:_(<2 x s16>) = G_UNMERGE_VALUES %19(<10 x s16>) + $vgpr0 = COPY %15(<2 x s16>) + $vgpr1 = COPY %16(<2 x s16>) + $vgpr2 = COPY %17(<2 x s16>) + SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll index 925c7bd4a17e..169507619a12 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll @@ -6,32 +6,34 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; CHECK-LABEL: name: s_buffer_load_i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s32)) - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) - ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; CHECK: $sgpr0 = COPY [[INT]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GREEDY-LABEL: name: s_buffer_load_i32 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) - ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GREEDY: $sgpr0 = COPY [[INT]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(s32) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[AMDGPU_S_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; GREEDY-NEXT: $sgpr0 = COPY [[INT]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret i32 %val } @@ -39,40 +41,42 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; CHECK-LABEL: name: s_buffer_load_v2i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s64), align 4) - ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>) - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; CHECK: $sgpr0 = COPY [[INT]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; CHECK: $sgpr1 = COPY [[INT1]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s64), align 4) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ; GREEDY-LABEL: name: s_buffer_load_v2i32 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s64), align 4) - ; GREEDY: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>) - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GREEDY: $sgpr0 = COPY [[INT]](s32) - ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GREEDY: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GREEDY: $sgpr1 = COPY [[INT1]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s64), align 4) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<2 x s32>) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; GREEDY-NEXT: $sgpr0 = COPY [[INT]](s32) + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) + ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) + ; GREEDY-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <2 x i32> %val } @@ -80,56 +84,58 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; CHECK-LABEL: name: s_buffer_load_v3i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; CHECK: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; CHECK: [[BITCAST:%[0-9]+]]:sgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) - ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s96) = G_TRUNC [[BITCAST]](s384) - ; CHECK: [[BITCAST1:%[0-9]+]]:sgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) - ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; CHECK: $sgpr0 = COPY [[INT]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; CHECK: $sgpr1 = COPY [[INT1]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; CHECK: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; CHECK: $sgpr2 = COPY [[INT2]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s96) = G_TRUNC [[BITCAST]](s384) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) + ; CHECK-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) + ; CHECK-NEXT: $sgpr2 = COPY [[INT2]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 ; GREEDY-LABEL: name: s_buffer_load_v3i32 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GREEDY: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GREEDY: [[BITCAST:%[0-9]+]]:sgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) - ; GREEDY: [[TRUNC:%[0-9]+]]:sgpr(s96) = G_TRUNC [[BITCAST]](s384) - ; GREEDY: [[BITCAST1:%[0-9]+]]:sgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) - ; GREEDY: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GREEDY: $sgpr0 = COPY [[INT]](s32) - ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GREEDY: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GREEDY: $sgpr1 = COPY [[INT1]](s32) - ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; GREEDY: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; GREEDY: $sgpr2 = COPY [[INT2]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4) + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s96) = G_TRUNC [[BITCAST]](s384) + ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; GREEDY-NEXT: $sgpr0 = COPY [[INT]](s32) + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) + ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) + ; GREEDY-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) + ; GREEDY-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) + ; GREEDY-NEXT: $sgpr2 = COPY [[INT2]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 %val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x i32> %val } @@ -137,76 +143,78 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; CHECK-LABEL: name: s_buffer_load_v8i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s256), align 4) - ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; CHECK: $sgpr0 = COPY [[INT]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; CHECK: $sgpr1 = COPY [[INT1]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; CHECK: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; CHECK: $sgpr2 = COPY [[INT2]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) - ; CHECK: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) - ; CHECK: $sgpr3 = COPY [[INT3]](s32) - ; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) - ; CHECK: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) - ; CHECK: $sgpr4 = COPY [[INT4]](s32) - ; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) - ; CHECK: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) - ; CHECK: $sgpr5 = COPY [[INT5]](s32) - ; CHECK: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) - ; CHECK: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) - ; CHECK: $sgpr6 = COPY [[INT6]](s32) - ; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) - ; CHECK: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) - ; CHECK: $sgpr7 = COPY [[INT7]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s256), align 4) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) + ; CHECK-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) + ; CHECK-NEXT: $sgpr2 = COPY [[INT2]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) + ; CHECK-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) + ; CHECK-NEXT: $sgpr3 = COPY [[INT3]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) + ; CHECK-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) + ; CHECK-NEXT: $sgpr4 = COPY [[INT4]](s32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) + ; CHECK-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) + ; CHECK-NEXT: $sgpr5 = COPY [[INT5]](s32) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) + ; CHECK-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) + ; CHECK-NEXT: $sgpr6 = COPY [[INT6]](s32) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) + ; CHECK-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) + ; CHECK-NEXT: $sgpr7 = COPY [[INT7]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8i32 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s256), align 4) - ; GREEDY: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GREEDY: $sgpr0 = COPY [[INT]](s32) - ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GREEDY: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GREEDY: $sgpr1 = COPY [[INT1]](s32) - ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; GREEDY: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; GREEDY: $sgpr2 = COPY [[INT2]](s32) - ; GREEDY: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) - ; GREEDY: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) - ; GREEDY: $sgpr3 = COPY [[INT3]](s32) - ; GREEDY: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) - ; GREEDY: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) - ; GREEDY: $sgpr4 = COPY [[INT4]](s32) - ; GREEDY: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) - ; GREEDY: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) - ; GREEDY: $sgpr5 = COPY [[INT5]](s32) - ; GREEDY: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) - ; GREEDY: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) - ; GREEDY: $sgpr6 = COPY [[INT6]](s32) - ; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) - ; GREEDY: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) - ; GREEDY: $sgpr7 = COPY [[INT7]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s256), align 4) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; GREEDY-NEXT: $sgpr0 = COPY [[INT]](s32) + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) + ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) + ; GREEDY-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) + ; GREEDY-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) + ; GREEDY-NEXT: $sgpr2 = COPY [[INT2]](s32) + ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) + ; GREEDY-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) + ; GREEDY-NEXT: $sgpr3 = COPY [[INT3]](s32) + ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) + ; GREEDY-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) + ; GREEDY-NEXT: $sgpr4 = COPY [[INT4]](s32) + ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) + ; GREEDY-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) + ; GREEDY-NEXT: $sgpr5 = COPY [[INT5]](s32) + ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) + ; GREEDY-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) + ; GREEDY-NEXT: $sgpr6 = COPY [[INT6]](s32) + ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) + ; GREEDY-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) + ; GREEDY-NEXT: $sgpr7 = COPY [[INT7]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 %val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x i32> %val } @@ -214,124 +222,126 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; CHECK-LABEL: name: s_buffer_load_v16i32 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s512), align 4) - ; CHECK: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>) - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; CHECK: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; CHECK: $sgpr0 = COPY [[INT]](s32) - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; CHECK: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; CHECK: $sgpr1 = COPY [[INT1]](s32) - ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; CHECK: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; CHECK: $sgpr2 = COPY [[INT2]](s32) - ; CHECK: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) - ; CHECK: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) - ; CHECK: $sgpr3 = COPY [[INT3]](s32) - ; CHECK: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) - ; CHECK: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) - ; CHECK: $sgpr4 = COPY [[INT4]](s32) - ; CHECK: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) - ; CHECK: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) - ; CHECK: $sgpr5 = COPY [[INT5]](s32) - ; CHECK: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) - ; CHECK: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) - ; CHECK: $sgpr6 = COPY [[INT6]](s32) - ; CHECK: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) - ; CHECK: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) - ; CHECK: $sgpr7 = COPY [[INT7]](s32) - ; CHECK: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV8]](s32) - ; CHECK: [[INT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32) - ; CHECK: $sgpr8 = COPY [[INT8]](s32) - ; CHECK: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV9]](s32) - ; CHECK: [[INT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32) - ; CHECK: $sgpr9 = COPY [[INT9]](s32) - ; CHECK: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV10]](s32) - ; CHECK: [[INT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32) - ; CHECK: $sgpr10 = COPY [[INT10]](s32) - ; CHECK: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV11]](s32) - ; CHECK: [[INT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32) - ; CHECK: $sgpr11 = COPY [[INT11]](s32) - ; CHECK: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV12]](s32) - ; CHECK: [[INT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32) - ; CHECK: $sgpr12 = COPY [[INT12]](s32) - ; CHECK: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV13]](s32) - ; CHECK: [[INT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32) - ; CHECK: $sgpr13 = COPY [[INT13]](s32) - ; CHECK: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV14]](s32) - ; CHECK: [[INT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32) - ; CHECK: $sgpr14 = COPY [[INT14]](s32) - ; CHECK: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[UV15]](s32) - ; CHECK: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32) - ; CHECK: $sgpr15 = COPY [[INT15]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s512), align 4) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) + ; CHECK-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) + ; CHECK-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) + ; CHECK-NEXT: $sgpr2 = COPY [[INT2]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) + ; CHECK-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) + ; CHECK-NEXT: $sgpr3 = COPY [[INT3]](s32) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) + ; CHECK-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) + ; CHECK-NEXT: $sgpr4 = COPY [[INT4]](s32) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) + ; CHECK-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) + ; CHECK-NEXT: $sgpr5 = COPY [[INT5]](s32) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) + ; CHECK-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) + ; CHECK-NEXT: $sgpr6 = COPY [[INT6]](s32) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) + ; CHECK-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) + ; CHECK-NEXT: $sgpr7 = COPY [[INT7]](s32) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV8]](s32) + ; CHECK-NEXT: [[INT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32) + ; CHECK-NEXT: $sgpr8 = COPY [[INT8]](s32) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV9]](s32) + ; CHECK-NEXT: [[INT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32) + ; CHECK-NEXT: $sgpr9 = COPY [[INT9]](s32) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV10]](s32) + ; CHECK-NEXT: [[INT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32) + ; CHECK-NEXT: $sgpr10 = COPY [[INT10]](s32) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV11]](s32) + ; CHECK-NEXT: [[INT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32) + ; CHECK-NEXT: $sgpr11 = COPY [[INT11]](s32) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV12]](s32) + ; CHECK-NEXT: [[INT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32) + ; CHECK-NEXT: $sgpr12 = COPY [[INT12]](s32) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV13]](s32) + ; CHECK-NEXT: [[INT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32) + ; CHECK-NEXT: $sgpr13 = COPY [[INT13]](s32) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV14]](s32) + ; CHECK-NEXT: [[INT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32) + ; CHECK-NEXT: $sgpr14 = COPY [[INT14]](s32) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[UV15]](s32) + ; CHECK-NEXT: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32) + ; CHECK-NEXT: $sgpr15 = COPY [[INT15]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; GREEDY-LABEL: name: s_buffer_load_v16i32 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s512), align 4) - ; GREEDY: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>) - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) - ; GREEDY: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) - ; GREEDY: $sgpr0 = COPY [[INT]](s32) - ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) - ; GREEDY: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) - ; GREEDY: $sgpr1 = COPY [[INT1]](s32) - ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) - ; GREEDY: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) - ; GREEDY: $sgpr2 = COPY [[INT2]](s32) - ; GREEDY: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) - ; GREEDY: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) - ; GREEDY: $sgpr3 = COPY [[INT3]](s32) - ; GREEDY: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) - ; GREEDY: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) - ; GREEDY: $sgpr4 = COPY [[INT4]](s32) - ; GREEDY: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) - ; GREEDY: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) - ; GREEDY: $sgpr5 = COPY [[INT5]](s32) - ; GREEDY: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) - ; GREEDY: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) - ; GREEDY: $sgpr6 = COPY [[INT6]](s32) - ; GREEDY: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) - ; GREEDY: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) - ; GREEDY: $sgpr7 = COPY [[INT7]](s32) - ; GREEDY: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV8]](s32) - ; GREEDY: [[INT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32) - ; GREEDY: $sgpr8 = COPY [[INT8]](s32) - ; GREEDY: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV9]](s32) - ; GREEDY: [[INT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32) - ; GREEDY: $sgpr9 = COPY [[INT9]](s32) - ; GREEDY: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV10]](s32) - ; GREEDY: [[INT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32) - ; GREEDY: $sgpr10 = COPY [[INT10]](s32) - ; GREEDY: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV11]](s32) - ; GREEDY: [[INT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32) - ; GREEDY: $sgpr11 = COPY [[INT11]](s32) - ; GREEDY: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV12]](s32) - ; GREEDY: [[INT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32) - ; GREEDY: $sgpr12 = COPY [[INT12]](s32) - ; GREEDY: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV13]](s32) - ; GREEDY: [[INT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32) - ; GREEDY: $sgpr13 = COPY [[INT13]](s32) - ; GREEDY: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV14]](s32) - ; GREEDY: [[INT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32) - ; GREEDY: $sgpr14 = COPY [[INT14]](s32) - ; GREEDY: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[UV15]](s32) - ; GREEDY: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32) - ; GREEDY: $sgpr15 = COPY [[INT15]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s512), align 4) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32), [[UV4:%[0-9]+]]:sgpr(s32), [[UV5:%[0-9]+]]:sgpr(s32), [[UV6:%[0-9]+]]:sgpr(s32), [[UV7:%[0-9]+]]:sgpr(s32), [[UV8:%[0-9]+]]:sgpr(s32), [[UV9:%[0-9]+]]:sgpr(s32), [[UV10:%[0-9]+]]:sgpr(s32), [[UV11:%[0-9]+]]:sgpr(s32), [[UV12:%[0-9]+]]:sgpr(s32), [[UV13:%[0-9]+]]:sgpr(s32), [[UV14:%[0-9]+]]:sgpr(s32), [[UV15:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<16 x s32>) + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) + ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) + ; GREEDY-NEXT: $sgpr0 = COPY [[INT]](s32) + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[UV1]](s32) + ; GREEDY-NEXT: [[INT1:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY6]](s32) + ; GREEDY-NEXT: $sgpr1 = COPY [[INT1]](s32) + ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[UV2]](s32) + ; GREEDY-NEXT: [[INT2:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY7]](s32) + ; GREEDY-NEXT: $sgpr2 = COPY [[INT2]](s32) + ; GREEDY-NEXT: [[COPY8:%[0-9]+]]:vgpr(s32) = COPY [[UV3]](s32) + ; GREEDY-NEXT: [[INT3:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY8]](s32) + ; GREEDY-NEXT: $sgpr3 = COPY [[INT3]](s32) + ; GREEDY-NEXT: [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[UV4]](s32) + ; GREEDY-NEXT: [[INT4:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY9]](s32) + ; GREEDY-NEXT: $sgpr4 = COPY [[INT4]](s32) + ; GREEDY-NEXT: [[COPY10:%[0-9]+]]:vgpr(s32) = COPY [[UV5]](s32) + ; GREEDY-NEXT: [[INT5:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY10]](s32) + ; GREEDY-NEXT: $sgpr5 = COPY [[INT5]](s32) + ; GREEDY-NEXT: [[COPY11:%[0-9]+]]:vgpr(s32) = COPY [[UV6]](s32) + ; GREEDY-NEXT: [[INT6:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY11]](s32) + ; GREEDY-NEXT: $sgpr6 = COPY [[INT6]](s32) + ; GREEDY-NEXT: [[COPY12:%[0-9]+]]:vgpr(s32) = COPY [[UV7]](s32) + ; GREEDY-NEXT: [[INT7:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY12]](s32) + ; GREEDY-NEXT: $sgpr7 = COPY [[INT7]](s32) + ; GREEDY-NEXT: [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[UV8]](s32) + ; GREEDY-NEXT: [[INT8:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY13]](s32) + ; GREEDY-NEXT: $sgpr8 = COPY [[INT8]](s32) + ; GREEDY-NEXT: [[COPY14:%[0-9]+]]:vgpr(s32) = COPY [[UV9]](s32) + ; GREEDY-NEXT: [[INT9:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY14]](s32) + ; GREEDY-NEXT: $sgpr9 = COPY [[INT9]](s32) + ; GREEDY-NEXT: [[COPY15:%[0-9]+]]:vgpr(s32) = COPY [[UV10]](s32) + ; GREEDY-NEXT: [[INT10:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY15]](s32) + ; GREEDY-NEXT: $sgpr10 = COPY [[INT10]](s32) + ; GREEDY-NEXT: [[COPY16:%[0-9]+]]:vgpr(s32) = COPY [[UV11]](s32) + ; GREEDY-NEXT: [[INT11:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY16]](s32) + ; GREEDY-NEXT: $sgpr11 = COPY [[INT11]](s32) + ; GREEDY-NEXT: [[COPY17:%[0-9]+]]:vgpr(s32) = COPY [[UV12]](s32) + ; GREEDY-NEXT: [[INT12:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY17]](s32) + ; GREEDY-NEXT: $sgpr12 = COPY [[INT12]](s32) + ; GREEDY-NEXT: [[COPY18:%[0-9]+]]:vgpr(s32) = COPY [[UV13]](s32) + ; GREEDY-NEXT: [[INT13:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY18]](s32) + ; GREEDY-NEXT: $sgpr13 = COPY [[INT13]](s32) + ; GREEDY-NEXT: [[COPY19:%[0-9]+]]:vgpr(s32) = COPY [[UV14]](s32) + ; GREEDY-NEXT: [[INT14:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY19]](s32) + ; GREEDY-NEXT: $sgpr14 = COPY [[INT14]](s32) + ; GREEDY-NEXT: [[COPY20:%[0-9]+]]:vgpr(s32) = COPY [[UV15]](s32) + ; GREEDY-NEXT: [[INT15:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY20]](s32) + ; GREEDY-NEXT: $sgpr15 = COPY [[INT15]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 %val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x i32> %val } @@ -340,32 +350,34 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val } @@ -373,36 +385,38 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_v2f32_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s64), align 4) - ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s64), align 4) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<2 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; GREEDY-LABEL: name: s_buffer_load_v2f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s64), align 4) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<2 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV]](s32) - ; GREEDY: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s64), align 4) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<2 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <2 x float> %val } @@ -410,52 +424,54 @@ define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %r define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[COPY5]](<4 x s32>), [[COPY6]](<4 x s32>) - ; CHECK: [[BITCAST:%[0-9]+]]:vgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) - ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[BITCAST]](s384) - ; CHECK: [[BITCAST1:%[0-9]+]]:vgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) - ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[COPY5]](<4 x s32>), [[COPY6]](<4 x s32>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[BITCAST]](s384) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GREEDY-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) - ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[COPY5]](<4 x s32>), [[COPY6]](<4 x s32>) - ; GREEDY: [[BITCAST:%[0-9]+]]:vgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) - ; GREEDY: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[BITCAST]](s384) - ; GREEDY: [[BITCAST1:%[0-9]+]]:vgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV]](s32) - ; GREEDY: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[COPY5]](<4 x s32>), [[COPY6]](<4 x s32>) + ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[BITCAST]](s384) + ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x float> %val } @@ -463,40 +479,42 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_v4f32_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GREEDY-LABEL: name: s_buffer_load_v4f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV]](s32) - ; GREEDY: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <4 x float> %val } @@ -504,52 +522,54 @@ define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %r define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV]](s32) - ; GREEDY: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY: $vgpr7 = COPY [[UV7]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val } @@ -557,72 +577,74 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %r define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_v16f32_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GREEDY-LABEL: name: s_buffer_load_v16f32_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV]](s32) - ; GREEDY: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY: $vgpr7 = COPY [[UV7]](s32) - ; GREEDY: $vgpr8 = COPY [[UV8]](s32) - ; GREEDY: $vgpr9 = COPY [[UV9]](s32) - ; GREEDY: $vgpr10 = COPY [[UV10]](s32) - ; GREEDY: $vgpr11 = COPY [[UV11]](s32) - ; GREEDY: $vgpr12 = COPY [[UV12]](s32) - ; GREEDY: $vgpr13 = COPY [[UV13]](s32) - ; GREEDY: $vgpr14 = COPY [[UV14]](s32) - ; GREEDY: $vgpr15 = COPY [[UV15]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x float> %val } @@ -630,36 +652,38 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg define amdgpu_ps void @s_buffer_load_i96_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_i96_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](s128) - ; CHECK: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](s128) + ; CHECK-NEXT: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_i96_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](s128) - ; GREEDY: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) - ; GREEDY: S_ENDPGM 0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](s128) + ; GREEDY-NEXT: G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1) + ; GREEDY-NEXT: S_ENDPGM 0 %val = call i96 @llvm.amdgcn.s.buffer.load.i96(<4 x i32> %rsrc, i32 %soffset, i32 0) store i96 %val, i96 addrspace(1)* undef ret void @@ -669,46 +693,48 @@ define amdgpu_ps void @s_buffer_load_i96_vgpr_offset(<4 x i32> inreg %rsrc, i32 define amdgpu_ps void @s_buffer_load_i256_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_i256_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128) - ; CHECK: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s256) - ; CHECK: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i256 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i256 addrspace(1)* undef` + 16, align 8, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s256) + ; CHECK-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i256 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; CHECK-NEXT: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i256 addrspace(1)* undef` + 16, align 8, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_i256_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s256) - ; GREEDY: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i256 addrspace(1)* undef`, align 8, addrspace 1) - ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i256 addrspace(1)* undef` + 16, align 8, addrspace 1) - ; GREEDY: S_ENDPGM 0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s256) + ; GREEDY-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i256 addrspace(1)* undef`, align 8, addrspace 1) + ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY-NEXT: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i256 addrspace(1)* undef` + 16, align 8, addrspace 1) + ; GREEDY-NEXT: S_ENDPGM 0 %val = call i256 @llvm.amdgcn.s.buffer.load.i256(<4 x i32> %rsrc, i32 %soffset, i32 0) store i256 %val, i256 addrspace(1)* undef ret void @@ -718,62 +744,64 @@ define amdgpu_ps void @s_buffer_load_i256_vgpr_offset(<4 x i32> inreg %rsrc, i32 define amdgpu_ps void @s_buffer_load_i512_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_i512_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; CHECK: [[MV:%[0-9]+]]:vgpr(s512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128), [[AMDGPU_BUFFER_LOAD2]](s128), [[AMDGPU_BUFFER_LOAD3]](s128) - ; CHECK: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128), [[UV2:%[0-9]+]]:vgpr(s128), [[UV3:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s512) - ; CHECK: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i512 addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 16, align 8, addrspace 1) - ; CHECK: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; CHECK: G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 32, align 8, addrspace 1) - ; CHECK: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; CHECK: G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 48, align 8, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128), [[AMDGPU_BUFFER_LOAD2]](s128), [[AMDGPU_BUFFER_LOAD3]](s128) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128), [[UV2:%[0-9]+]]:vgpr(s128), [[UV3:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s512) + ; CHECK-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i512 addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; CHECK-NEXT: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 16, align 8, addrspace 1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; CHECK-NEXT: G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 32, align 8, addrspace 1) + ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; CHECK-NEXT: G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 48, align 8, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_i512_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GREEDY: [[MV:%[0-9]+]]:vgpr(s512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128), [[AMDGPU_BUFFER_LOAD2]](s128), [[AMDGPU_BUFFER_LOAD3]](s128) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128), [[UV2:%[0-9]+]]:vgpr(s128), [[UV3:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s512) - ; GREEDY: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i512 addrspace(1)* undef`, align 8, addrspace 1) - ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 16, align 8, addrspace 1) - ; GREEDY: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; GREEDY: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; GREEDY: G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 32, align 8, addrspace 1) - ; GREEDY: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; GREEDY: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; GREEDY: G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 48, align 8, addrspace 1) - ; GREEDY: S_ENDPGM 0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[MV:%[0-9]+]]:vgpr(s512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128), [[AMDGPU_BUFFER_LOAD2]](s128), [[AMDGPU_BUFFER_LOAD3]](s128) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128), [[UV2:%[0-9]+]]:vgpr(s128), [[UV3:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s512) + ; GREEDY-NEXT: G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i512 addrspace(1)* undef`, align 8, addrspace 1) + ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY-NEXT: G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 16, align 8, addrspace 1) + ; GREEDY-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 + ; GREEDY-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; GREEDY-NEXT: G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 32, align 8, addrspace 1) + ; GREEDY-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 + ; GREEDY-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; GREEDY-NEXT: G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 48, align 8, addrspace 1) + ; GREEDY-NEXT: S_ENDPGM 0 %val = call i512 @llvm.amdgcn.s.buffer.load.i512(<4 x i32> %rsrc, i32 %soffset, i32 0) store i512 %val, i512 addrspace(1)* undef ret void @@ -783,46 +811,48 @@ define amdgpu_ps void @s_buffer_load_i512_vgpr_offset(<4 x i32> inreg %rsrc, i32 define amdgpu_ps void @s_buffer_load_v16i16_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_v16i16_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) - ; CHECK: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef` + 16, basealign 32, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) + ; CHECK-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; CHECK-NEXT: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v16i16_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) - ; GREEDY: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef`, align 32, addrspace 1) - ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef` + 16, basealign 32, addrspace 1) - ; GREEDY: S_ENDPGM 0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>) + ; GREEDY-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef`, align 32, addrspace 1) + ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY-NEXT: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; GREEDY-NEXT: S_ENDPGM 0 %val = call <16 x i16> @llvm.amdgcn.s.buffer.load.v16i16(<4 x i32> %rsrc, i32 %soffset, i32 0) store <16 x i16> %val, <16 x i16> addrspace(1)* undef ret void @@ -832,62 +862,64 @@ define amdgpu_ps void @s_buffer_load_v16i16_vgpr_offset(<4 x i32> inreg %rsrc, i define amdgpu_ps void @s_buffer_load_v32i16_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_v32i16_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>), [[AMDGPU_BUFFER_LOAD2]](<8 x s16>), [[AMDGPU_BUFFER_LOAD3]](<8 x s16>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>), [[UV2:%[0-9]+]]:vgpr(<8 x s16>), [[UV3:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s16>) - ; CHECK: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef`, align 64, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 16, basealign 64, addrspace 1) - ; CHECK: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; CHECK: G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) - ; CHECK: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; CHECK: G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 48, basealign 64, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>), [[AMDGPU_BUFFER_LOAD2]](<8 x s16>), [[AMDGPU_BUFFER_LOAD3]](<8 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>), [[UV2:%[0-9]+]]:vgpr(<8 x s16>), [[UV3:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s16>) + ; CHECK-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef`, align 64, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; CHECK-NEXT: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; CHECK-NEXT: G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; CHECK-NEXT: G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v32i16_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>), [[AMDGPU_BUFFER_LOAD2]](<8 x s16>), [[AMDGPU_BUFFER_LOAD3]](<8 x s16>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>), [[UV2:%[0-9]+]]:vgpr(<8 x s16>), [[UV3:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s16>) - ; GREEDY: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef`, align 64, addrspace 1) - ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 16, basealign 64, addrspace 1) - ; GREEDY: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; GREEDY: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; GREEDY: G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) - ; GREEDY: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; GREEDY: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; GREEDY: G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 48, basealign 64, addrspace 1) - ; GREEDY: S_ENDPGM 0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>), [[AMDGPU_BUFFER_LOAD2]](<8 x s16>), [[AMDGPU_BUFFER_LOAD3]](<8 x s16>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>), [[UV2:%[0-9]+]]:vgpr(<8 x s16>), [[UV3:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s16>) + ; GREEDY-NEXT: G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef`, align 64, addrspace 1) + ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY-NEXT: G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; GREEDY-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 + ; GREEDY-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; GREEDY-NEXT: G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; GREEDY-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 + ; GREEDY-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; GREEDY-NEXT: G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; GREEDY-NEXT: S_ENDPGM 0 %val = call <32 x i16> @llvm.amdgcn.s.buffer.load.v32i16(<4 x i32> %rsrc, i32 %soffset, i32 0) store <32 x i16> %val, <32 x i16> addrspace(1)* undef ret void @@ -897,46 +929,48 @@ define amdgpu_ps void @s_buffer_load_v32i16_vgpr_offset(<4 x i32> inreg %rsrc, i define amdgpu_ps void @s_buffer_load_v4i64_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_v4i64_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) - ; CHECK: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef` + 16, basealign 32, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) + ; CHECK-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; CHECK-NEXT: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v4i64_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) - ; GREEDY: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef`, align 32, addrspace 1) - ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef` + 16, basealign 32, addrspace 1) - ; GREEDY: S_ENDPGM 0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>) + ; GREEDY-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef`, align 32, addrspace 1) + ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY-NEXT: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; GREEDY-NEXT: S_ENDPGM 0 %val = call <4 x i64> @llvm.amdgcn.s.buffer.load.v4i64(<4 x i32> %rsrc, i32 %soffset, i32 0) store <4 x i64> %val, <4 x i64> addrspace(1)* undef ret void @@ -946,62 +980,64 @@ define amdgpu_ps void @s_buffer_load_v4i64_vgpr_offset(<4 x i32> inreg %rsrc, i3 define amdgpu_ps void @s_buffer_load_v8i64_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_v8i64_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>), [[AMDGPU_BUFFER_LOAD2]](<2 x s64>), [[AMDGPU_BUFFER_LOAD3]](<2 x s64>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>), [[UV2:%[0-9]+]]:vgpr(<2 x s64>), [[UV3:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) - ; CHECK: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef`, align 64, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 16, basealign 64, addrspace 1) - ; CHECK: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; CHECK: G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) - ; CHECK: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; CHECK: G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 48, basealign 64, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>), [[AMDGPU_BUFFER_LOAD2]](<2 x s64>), [[AMDGPU_BUFFER_LOAD3]](<2 x s64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>), [[UV2:%[0-9]+]]:vgpr(<2 x s64>), [[UV3:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) + ; CHECK-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef`, align 64, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; CHECK-NEXT: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; CHECK-NEXT: G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; CHECK-NEXT: G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v8i64_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>), [[AMDGPU_BUFFER_LOAD2]](<2 x s64>), [[AMDGPU_BUFFER_LOAD3]](<2 x s64>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>), [[UV2:%[0-9]+]]:vgpr(<2 x s64>), [[UV3:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) - ; GREEDY: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef`, align 64, addrspace 1) - ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 16, basealign 64, addrspace 1) - ; GREEDY: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; GREEDY: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; GREEDY: G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) - ; GREEDY: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; GREEDY: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; GREEDY: G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 48, basealign 64, addrspace 1) - ; GREEDY: S_ENDPGM 0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>), [[AMDGPU_BUFFER_LOAD2]](<2 x s64>), [[AMDGPU_BUFFER_LOAD3]](<2 x s64>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>), [[UV2:%[0-9]+]]:vgpr(<2 x s64>), [[UV3:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>) + ; GREEDY-NEXT: G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef`, align 64, addrspace 1) + ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY-NEXT: G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; GREEDY-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 + ; GREEDY-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; GREEDY-NEXT: G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; GREEDY-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 + ; GREEDY-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; GREEDY-NEXT: G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; GREEDY-NEXT: S_ENDPGM 0 %val = call <8 x i64> @llvm.amdgcn.s.buffer.load.v8i64(<4 x i32> %rsrc, i32 %soffset, i32 0) store <8 x i64> %val, <8 x i64> addrspace(1)* undef ret void @@ -1011,46 +1047,48 @@ define amdgpu_ps void @s_buffer_load_v8i64_vgpr_offset(<4 x i32> inreg %rsrc, i3 define amdgpu_ps void @s_buffer_load_v4p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_v4p1_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x p1>) - ; CHECK: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef`, align 32, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 32, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x p1>) + ; CHECK-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef`, align 32, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; CHECK-NEXT: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v4p1_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x p1>) - ; GREEDY: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef`, align 32, addrspace 1) - ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 32, addrspace 1) - ; GREEDY: S_ENDPGM 0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x p1>) + ; GREEDY-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef`, align 32, addrspace 1) + ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY-NEXT: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 32, addrspace 1) + ; GREEDY-NEXT: S_ENDPGM 0 %val = call <4 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v4p1i8(<4 x i32> %rsrc, i32 %soffset, i32 0) store <4 x i8 addrspace(1)*> %val, <4 x i8 addrspace(1)*> addrspace(1)* undef ret void @@ -1060,62 +1098,64 @@ define amdgpu_ps void @s_buffer_load_v4p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 define amdgpu_ps void @s_buffer_load_v8p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; CHECK-LABEL: name: s_buffer_load_v8p1_vgpr_offset ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>), [[AMDGPU_BUFFER_LOAD2]](<2 x p1>), [[AMDGPU_BUFFER_LOAD3]](<2 x p1>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>), [[UV2:%[0-9]+]]:vgpr(<2 x p1>), [[UV3:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x p1>) - ; CHECK: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef`, align 64, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; CHECK: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 64, addrspace 1) - ; CHECK: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; CHECK: G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) - ; CHECK: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; CHECK: G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 48, basealign 64, addrspace 1) - ; CHECK: S_ENDPGM 0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>), [[AMDGPU_BUFFER_LOAD2]](<2 x p1>), [[AMDGPU_BUFFER_LOAD3]](<2 x p1>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>), [[UV2:%[0-9]+]]:vgpr(<2 x p1>), [[UV3:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x p1>) + ; CHECK-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef`, align 64, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; CHECK-NEXT: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; CHECK-NEXT: G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; CHECK-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; CHECK-NEXT: G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; CHECK-NEXT: S_ENDPGM 0 ; GREEDY-LABEL: name: s_buffer_load_v8p1_vgpr_offset ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>), [[AMDGPU_BUFFER_LOAD2]](<2 x p1>), [[AMDGPU_BUFFER_LOAD3]](<2 x p1>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>), [[UV2:%[0-9]+]]:vgpr(<2 x p1>), [[UV3:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x p1>) - ; GREEDY: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef`, align 64, addrspace 1) - ; GREEDY: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 - ; GREEDY: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) - ; GREEDY: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 64, addrspace 1) - ; GREEDY: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 - ; GREEDY: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) - ; GREEDY: G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) - ; GREEDY: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 - ; GREEDY: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) - ; GREEDY: G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 48, basealign 64, addrspace 1) - ; GREEDY: S_ENDPGM 0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>), [[AMDGPU_BUFFER_LOAD2]](<2 x p1>), [[AMDGPU_BUFFER_LOAD3]](<2 x p1>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>), [[UV2:%[0-9]+]]:vgpr(<2 x p1>), [[UV3:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x p1>) + ; GREEDY-NEXT: G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef`, align 64, addrspace 1) + ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16 + ; GREEDY-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) + ; GREEDY-NEXT: G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 64, addrspace 1) + ; GREEDY-NEXT: [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32 + ; GREEDY-NEXT: [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64) + ; GREEDY-NEXT: G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1) + ; GREEDY-NEXT: [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48 + ; GREEDY-NEXT: [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64) + ; GREEDY-NEXT: G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 48, basealign 64, addrspace 1) + ; GREEDY-NEXT: S_ENDPGM 0 %val = call <8 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v8p1i8(<4 x i32> %rsrc, i32 %soffset, i32 0) store <8 x i8 addrspace(1)*> %val, <8 x i8 addrspace(1)*> addrspace(1)* undef ret void @@ -1124,38 +1164,40 @@ define amdgpu_ps void @s_buffer_load_v8p1_vgpr_offset(<4 x i32> inreg %rsrc, i32 define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -1164,38 +1206,40 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg % define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4095, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4095 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -1204,36 +1248,38 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg % define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -1243,58 +1289,60 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg % define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV]](s32) - ; GREEDY: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY: $vgpr7 = COPY [[UV7]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -1304,56 +1352,58 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV]](s32) - ; GREEDY: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY: $vgpr7 = COPY [[UV7]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -1362,78 +1412,80 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4032 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4032 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GREEDY-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4032 - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV]](s32) - ; GREEDY: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY: $vgpr7 = COPY [[UV7]](s32) - ; GREEDY: $vgpr8 = COPY [[UV8]](s32) - ; GREEDY: $vgpr9 = COPY [[UV9]](s32) - ; GREEDY: $vgpr10 = COPY [[UV10]](s32) - ; GREEDY: $vgpr11 = COPY [[UV11]](s32) - ; GREEDY: $vgpr12 = COPY [[UV12]](s32) - ; GREEDY: $vgpr13 = COPY [[UV13]](s32) - ; GREEDY: $vgpr14 = COPY [[UV14]](s32) - ; GREEDY: $vgpr15 = COPY [[UV15]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4032 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4032, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4048, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %soffset = add i32 %soffset.base, 4032 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x float> %val @@ -1442,76 +1494,78 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i3 define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4036 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) - ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: $vgpr2 = COPY [[UV2]](s32) - ; CHECK: $vgpr3 = COPY [[UV3]](s32) - ; CHECK: $vgpr4 = COPY [[UV4]](s32) - ; CHECK: $vgpr5 = COPY [[UV5]](s32) - ; CHECK: $vgpr6 = COPY [[UV6]](s32) - ; CHECK: $vgpr7 = COPY [[UV7]](s32) - ; CHECK: $vgpr8 = COPY [[UV8]](s32) - ; CHECK: $vgpr9 = COPY [[UV9]](s32) - ; CHECK: $vgpr10 = COPY [[UV10]](s32) - ; CHECK: $vgpr11 = COPY [[UV11]](s32) - ; CHECK: $vgpr12 = COPY [[UV12]](s32) - ; CHECK: $vgpr13 = COPY [[UV13]](s32) - ; CHECK: $vgpr14 = COPY [[UV14]](s32) - ; CHECK: $vgpr15 = COPY [[UV15]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4036 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GREEDY-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4036 - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) - ; GREEDY: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV]](s32) - ; GREEDY: $vgpr1 = COPY [[UV1]](s32) - ; GREEDY: $vgpr2 = COPY [[UV2]](s32) - ; GREEDY: $vgpr3 = COPY [[UV3]](s32) - ; GREEDY: $vgpr4 = COPY [[UV4]](s32) - ; GREEDY: $vgpr5 = COPY [[UV5]](s32) - ; GREEDY: $vgpr6 = COPY [[UV6]](s32) - ; GREEDY: $vgpr7 = COPY [[UV7]](s32) - ; GREEDY: $vgpr8 = COPY [[UV8]](s32) - ; GREEDY: $vgpr9 = COPY [[UV9]](s32) - ; GREEDY: $vgpr10 = COPY [[UV10]](s32) - ; GREEDY: $vgpr11 = COPY [[UV11]](s32) - ; GREEDY: $vgpr12 = COPY [[UV12]](s32) - ; GREEDY: $vgpr13 = COPY [[UV13]](s32) - ; GREEDY: $vgpr14 = COPY [[UV14]](s32) - ; GREEDY: $vgpr15 = COPY [[UV15]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4036 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 32, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>), [[AMDGPU_BUFFER_LOAD2]](<4 x s32>), [[AMDGPU_BUFFER_LOAD3]](<4 x s32>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32), [[UV10:%[0-9]+]]:vgpr(s32), [[UV11:%[0-9]+]]:vgpr(s32), [[UV12:%[0-9]+]]:vgpr(s32), [[UV13:%[0-9]+]]:vgpr(s32), [[UV14:%[0-9]+]]:vgpr(s32), [[UV15:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr8 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr9 = COPY [[UV9]](s32) + ; GREEDY-NEXT: $vgpr10 = COPY [[UV10]](s32) + ; GREEDY-NEXT: $vgpr11 = COPY [[UV11]](s32) + ; GREEDY-NEXT: $vgpr12 = COPY [[UV12]](s32) + ; GREEDY-NEXT: $vgpr13 = COPY [[UV13]](s32) + ; GREEDY-NEXT: $vgpr14 = COPY [[UV14]](s32) + ; GREEDY-NEXT: $vgpr15 = COPY [[UV15]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %soffset = add i32 %soffset.base, 4036 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x float> %val @@ -1521,78 +1575,88 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i3 define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %soffset) { ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_rsrc ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY4]](s32) + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY5]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val } @@ -1601,80 +1665,90 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 - ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %16, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %16, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 - ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %16, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4092 + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %16, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4092, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -1684,82 +1758,92 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %17, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %17, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %17, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %17, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -1769,78 +1853,88 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) { ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4095, 0, 0 :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4095, 0, 0 :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4095, 0, 0 :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4095 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4095, 0, 0 :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 0) ret float %val } @@ -1849,78 +1943,88 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) { ; CHECK-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %15, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY4]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 0) ret float %val } @@ -1930,100 +2034,110 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV2]](s32) - ; CHECK: $vgpr1 = COPY [[UV3]](s32) - ; CHECK: $vgpr2 = COPY [[UV4]](s32) - ; CHECK: $vgpr3 = COPY [[UV5]](s32) - ; CHECK: $vgpr4 = COPY [[UV6]](s32) - ; CHECK: $vgpr5 = COPY [[UV7]](s32) - ; CHECK: $vgpr6 = COPY [[UV8]](s32) - ; CHECK: $vgpr7 = COPY [[UV9]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY: $vgpr7 = COPY [[UV9]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4064, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[C1]], [[COPY4]], 4080, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -2034,102 +2148,112 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 - ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV2]](s32) - ; CHECK: $vgpr1 = COPY [[UV3]](s32) - ; CHECK: $vgpr2 = COPY [[UV4]](s32) - ; CHECK: $vgpr3 = COPY [[UV5]](s32) - ; CHECK: $vgpr4 = COPY [[UV6]](s32) - ; CHECK: $vgpr5 = COPY [[UV7]](s32) - ; CHECK: $vgpr6 = COPY [[UV8]](s32) - ; CHECK: $vgpr7 = COPY [[UV9]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 - ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY: $vgpr7 = COPY [[UV9]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4068 + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -2138,102 +2262,112 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV2]](s32) - ; CHECK: $vgpr1 = COPY [[UV3]](s32) - ; CHECK: $vgpr2 = COPY [[UV4]](s32) - ; CHECK: $vgpr3 = COPY [[UV5]](s32) - ; CHECK: $vgpr4 = COPY [[UV6]](s32) - ; CHECK: $vgpr5 = COPY [[UV7]](s32) - ; CHECK: $vgpr6 = COPY [[UV8]](s32) - ; CHECK: $vgpr7 = COPY [[UV9]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 - ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY: $vgpr7 = COPY [[UV9]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4096 + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY4]], [[C]] + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %27, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C2]](s32), [[COPY5]], [[C1]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4096 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -2242,100 +2376,110 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000(<4 x i32> %rsrc, i32 %offset.base) { ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5000 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV2]](s32) - ; CHECK: $vgpr1 = COPY [[UV3]](s32) - ; CHECK: $vgpr2 = COPY [[UV4]](s32) - ; CHECK: $vgpr3 = COPY [[UV5]](s32) - ; CHECK: $vgpr4 = COPY [[UV6]](s32) - ; CHECK: $vgpr5 = COPY [[UV7]](s32) - ; CHECK: $vgpr6 = COPY [[UV8]](s32) - ; CHECK: $vgpr7 = COPY [[UV9]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5000 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5000 - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY: $vgpr7 = COPY [[UV9]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 5000 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 5000 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -2344,100 +2488,110 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076(<4 x i32> %rsrc, i32 %offset.base) { ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4076 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV2]](s32) - ; CHECK: $vgpr1 = COPY [[UV3]](s32) - ; CHECK: $vgpr2 = COPY [[UV4]](s32) - ; CHECK: $vgpr3 = COPY [[UV5]](s32) - ; CHECK: $vgpr4 = COPY [[UV6]](s32) - ; CHECK: $vgpr5 = COPY [[UV7]](s32) - ; CHECK: $vgpr6 = COPY [[UV8]](s32) - ; CHECK: $vgpr7 = COPY [[UV9]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4076 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4076 - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY: $vgpr7 = COPY [[UV9]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4076 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4076 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -2446,100 +2600,110 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080(<4 x i32> %rsrc, i32 %offset.base) { ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4080 - ; CHECK: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV2]](s32) - ; CHECK: $vgpr1 = COPY [[UV3]](s32) - ; CHECK: $vgpr2 = COPY [[UV4]](s32) - ; CHECK: $vgpr3 = COPY [[UV5]](s32) - ; CHECK: $vgpr4 = COPY [[UV6]](s32) - ; CHECK: $vgpr5 = COPY [[UV7]](s32) - ; CHECK: $vgpr6 = COPY [[UV8]](s32) - ; CHECK: $vgpr7 = COPY [[UV9]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4080 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4080 - ; GREEDY: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY: $vgpr7 = COPY [[UV9]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4080 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY5]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4080 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -2548,98 +2712,108 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064(<4 x i32> %rsrc, i32 %offset.base) { ; CHECK-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; CHECK: bb.2: - ; CHECK: successors: %bb.3, %bb.2 - ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 - ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; CHECK: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; CHECK: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; CHECK: bb.3: - ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; CHECK: bb.4: - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; CHECK: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; CHECK: $vgpr0 = COPY [[UV2]](s32) - ; CHECK: $vgpr1 = COPY [[UV3]](s32) - ; CHECK: $vgpr2 = COPY [[UV4]](s32) - ; CHECK: $vgpr3 = COPY [[UV5]](s32) - ; CHECK: $vgpr4 = COPY [[UV6]](s32) - ; CHECK: $vgpr5 = COPY [[UV7]](s32) - ; CHECK: $vgpr6 = COPY [[UV8]](s32) - ; CHECK: $vgpr7 = COPY [[UV9]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; CHECK-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; CHECK-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; CHECK-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; CHECK-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; CHECK-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; CHECK-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; CHECK-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GREEDY-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GREEDY: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 - ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 - ; GREEDY: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF - ; GREEDY: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) - ; GREEDY: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GREEDY: bb.2: - ; GREEDY: successors: %bb.3, %bb.2 - ; GREEDY: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 - ; GREEDY: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec - ; GREEDY: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec - ; GREEDY: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec - ; GREEDY: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec - ; GREEDY: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GREEDY: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) - ; GREEDY: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GREEDY: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GREEDY: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GREEDY: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GREEDY: bb.3: - ; GREEDY: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GREEDY: bb.4: - ; GREEDY: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) - ; GREEDY: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) - ; GREEDY: $vgpr0 = COPY [[UV2]](s32) - ; GREEDY: $vgpr1 = COPY [[UV3]](s32) - ; GREEDY: $vgpr2 = COPY [[UV4]](s32) - ; GREEDY: $vgpr3 = COPY [[UV5]](s32) - ; GREEDY: $vgpr4 = COPY [[UV6]](s32) - ; GREEDY: $vgpr5 = COPY [[UV7]](s32) - ; GREEDY: $vgpr6 = COPY [[UV8]](s32) - ; GREEDY: $vgpr7 = COPY [[UV9]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GREEDY-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:vgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 4064 + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C3:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s32>) + ; GREEDY-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.2: + ; GREEDY-NEXT: successors: %bb.3, %bb.2 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.1, %26, %bb.2 + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec + ; GREEDY-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec + ; GREEDY-NEXT: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec + ; GREEDY-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GREEDY-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) + ; GREEDY-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4064, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR1]](<4 x s32>), [[C3]](s32), [[C1]], [[C2]], 4080, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GREEDY-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GREEDY-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.3: + ; GREEDY-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: bb.4: + ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[AMDGPU_BUFFER_LOAD1]](<4 x s32>) + ; GREEDY-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32), [[UV4:%[0-9]+]]:vgpr(s32), [[UV5:%[0-9]+]]:vgpr(s32), [[UV6:%[0-9]+]]:vgpr(s32), [[UV7:%[0-9]+]]:vgpr(s32), [[UV8:%[0-9]+]]:vgpr(s32), [[UV9:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>) + ; GREEDY-NEXT: $vgpr0 = COPY [[UV2]](s32) + ; GREEDY-NEXT: $vgpr1 = COPY [[UV3]](s32) + ; GREEDY-NEXT: $vgpr2 = COPY [[UV4]](s32) + ; GREEDY-NEXT: $vgpr3 = COPY [[UV5]](s32) + ; GREEDY-NEXT: $vgpr4 = COPY [[UV6]](s32) + ; GREEDY-NEXT: $vgpr5 = COPY [[UV7]](s32) + ; GREEDY-NEXT: $vgpr6 = COPY [[UV8]](s32) + ; GREEDY-NEXT: $vgpr7 = COPY [[UV9]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0) ret <8 x float> %val } @@ -2647,36 +2821,38 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; CHECK-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] - ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] - ; GREEDY: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; GREEDY-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.v, %offset.s %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) ret float %val @@ -2685,36 +2861,38 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg % define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; CHECK-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] - ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] - ; GREEDY: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; GREEDY-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C]](s32), [[COPY4]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.s, %offset.v %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) ret float %val @@ -2723,44 +2901,46 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg % define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; CHECK-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] - ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 + ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, %offset.s %offset = add i32 %offset.base, 1024 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -2770,44 +2950,46 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; CHECK-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] - ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] - ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 + ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 1024, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, %offset.v %offset = add i32 %offset.base, 1024 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -2818,40 +3000,42 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; CHECK-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; CHECK: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY5]], [[C]] - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; CHECK: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY5]], [[C]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GREEDY: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY5]], [[C]] - ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) - ; GREEDY: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY5]], [[C]] + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[ADD]](s32) + ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY6]], [[COPY4]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[ADD]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, 1024 %offset = add i32 %offset.base, %offset.v %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -2861,42 +3045,44 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; CHECK-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; CHECK: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] - ; CHECK: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; CHECK: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] - ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; CHECK: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GREEDY-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr ; GREEDY: bb.1 (%ir-block.0): - ; GREEDY: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GREEDY: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 - ; GREEDY: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 - ; GREEDY: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 - ; GREEDY: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 - ; GREEDY: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GREEDY: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; GREEDY: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 - ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 - ; GREEDY: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) - ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] - ; GREEDY: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) - ; GREEDY: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) - ; GREEDY: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) - ; GREEDY: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GREEDY-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GREEDY-NEXT: {{ $}} + ; GREEDY-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; GREEDY-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3 + ; GREEDY-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 + ; GREEDY-NEXT: [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr5 + ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 + ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1024 + ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY4]], [[COPY6]] + ; GREEDY-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY5]](s32) + ; GREEDY-NEXT: [[ADD1:%[0-9]+]]:vgpr(s32) = G_ADD [[ADD]], [[COPY7]] + ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[ADD]], [[COPY5]], 0, 0, 0 :: (dereferenceable invariant load (s32)) + ; GREEDY-NEXT: $vgpr0 = COPY [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, 1024 %offset = add i32 %offset.base, %offset.s %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir index 6ab65dbd4821..3036df730a46 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir @@ -115,12 +115,12 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v8i32_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v8i32, align 32, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v8i32 + 16, basealign 32, addrspace 1) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v8i32, align 32, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v8i32 + 16, basealign 32, addrspace 1) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>) from %ir.global.not.uniform.v8i32) ... @@ -135,12 +135,12 @@ body: | ; CHECK-LABEL: name: load_global_v4i64_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v4i64, align 32, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v4i64 + 16, basealign 32, addrspace 1) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v4i64, align 32, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v4i64 + 16, basealign 32, addrspace 1) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>) from %ir.global.not.uniform.v4i64) ... @@ -154,18 +154,18 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v16i32_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32, align 64, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32 + 16, basealign 64, addrspace 1) - ; CHECK: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32 + 32, align 32, basealign 64, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32 + 48, basealign 64, addrspace 1) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32, align 64, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32 + 16, basealign 64, addrspace 1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32 + 32, align 32, basealign 64, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<4 x s32>) from %ir.global.not.uniform.v16i32 + 48, basealign 64, addrspace 1) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>) from %ir.global.not.uniform.v16i32) ... @@ -179,18 +179,18 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v8i64_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64, align 64, addrspace 1) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64 + 16, basealign 64, addrspace 1) - ; CHECK: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64 + 32, align 32, basealign 64, addrspace 1) - ; CHECK: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64 + 48, basealign 64, addrspace 1) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>), [[LOAD2]](<2 x s64>), [[LOAD3]](<2 x s64>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[COPY]](p1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64, align 64, addrspace 1) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64 + 16, basealign 64, addrspace 1) + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64 + 32, align 32, basealign 64, addrspace 1) + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<2 x s64>) from %ir.global.not.uniform.v8i64 + 48, basealign 64, addrspace 1) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>), [[LOAD2]](<2 x s64>), [[LOAD3]](<2 x s64>) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<8 x s64>) = G_LOAD %0 :: (load (<8 x s64>) from %ir.global.not.uniform.v8i64) ... @@ -204,7 +204,7 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v8i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_LOAD [[COPY]](p1) :: (invariant load (<8 x s32>), addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_LOAD [[COPY]](p1) :: (invariant load (<8 x s32>), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (invariant load (<8 x s32>), addrspace 1) ... @@ -218,7 +218,7 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v4i64_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s64>) = G_LOAD [[COPY]](p1) :: (invariant load (<4 x s64>), addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x s64>) = G_LOAD [[COPY]](p1) :: (invariant load (<4 x s64>), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (invariant load (<4 x s64>), addrspace 1) ... @@ -232,7 +232,7 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v16i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_LOAD [[COPY]](p1) :: (invariant load (<16 x s32>), addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_LOAD [[COPY]](p1) :: (invariant load (<16 x s32>), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (invariant load (<16 x s32>), addrspace 1) ... @@ -246,7 +246,7 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_global_v8i64_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s64>) = G_LOAD [[COPY]](p1) :: (invariant load (<8 x s64>), addrspace 1) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x s64>) = G_LOAD [[COPY]](p1) :: (invariant load (<8 x s64>), addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(<8 x s64>) = G_LOAD %0 :: (invariant load (<8 x s64>), addrspace 1) ... @@ -260,12 +260,12 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v8i32_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v8i32, align 32, addrspace 4) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v8i32 + 16, basealign 32, addrspace 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v8i32, align 32, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v8i32 + 16, basealign 32, addrspace 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>) from %ir.constant.not.uniform.v8i32) ... @@ -279,12 +279,12 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_i256_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p4) :: (load (s128) from %ir.constant.not.uniform, align 32, addrspace 4) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(s128) = G_LOAD [[PTR_ADD]](p4) :: (load (s128) from %ir.constant.not.uniform + 16, basealign 32, addrspace 4) - ; CHECK: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[LOAD]](s128), [[LOAD1]](s128) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s128) = G_LOAD [[COPY]](p4) :: (load (s128) from %ir.constant.not.uniform, align 32, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(s128) = G_LOAD [[PTR_ADD]](p4) :: (load (s128) from %ir.constant.not.uniform + 16, basealign 32, addrspace 4) + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[LOAD]](s128), [[LOAD1]](s128) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s256) = G_LOAD %0 :: (load (s256) from %ir.constant.not.uniform) ... @@ -299,12 +299,12 @@ body: | ; CHECK-LABEL: name: load_constant_v16i16_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load (<8 x s16>) from %ir.constant.not.uniform, align 32, addrspace 4) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (load (<8 x s16>) from %ir.constant.not.uniform + 16, basealign 32, addrspace 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[LOAD1]](<8 x s16>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load (<8 x s16>) from %ir.constant.not.uniform, align 32, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (load (<8 x s16>) from %ir.constant.not.uniform + 16, basealign 32, addrspace 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[LOAD]](<8 x s16>), [[LOAD1]](<8 x s16>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<16 x s16>) = G_LOAD %0 :: (load (<16 x s16>) from %ir.constant.not.uniform) ... @@ -318,12 +318,12 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v4i64_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v4i64, align 32, addrspace 4) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v4i64 + 16, basealign 32, addrspace 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v4i64, align 32, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v4i64 + 16, basealign 32, addrspace 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>) from %ir.constant.not.uniform.v4i64) ... @@ -337,18 +337,18 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v16i32_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32, align 64, addrspace 4) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32 + 16, basealign 64, addrspace 4) - ; CHECK: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32 + 32, align 32, basealign 64, addrspace 4) - ; CHECK: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32 + 48, basealign 64, addrspace 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32, align 64, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32 + 16, basealign 64, addrspace 4) + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32 + 32, align 32, basealign 64, addrspace 4) + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD2]](p4) :: (load (<4 x s32>) from %ir.constant.not.uniform.v16i32 + 48, basealign 64, addrspace 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>) from %ir.constant.not.uniform.v16i32) ... @@ -362,18 +362,18 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v8i64_non_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64, align 64, addrspace 4) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64 + 16, basealign 64, addrspace 4) - ; CHECK: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 - ; CHECK: [[PTR_ADD1:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD1]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64 + 32, align 32, basealign 64, addrspace 4) - ; CHECK: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 - ; CHECK: [[PTR_ADD2:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD2]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64 + 48, basealign 64, addrspace 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>), [[LOAD2]](<2 x s64>), [[LOAD3]](<2 x s64>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64, align 64, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64 + 16, basealign 64, addrspace 4) + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32 + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD1]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64 + 32, align 32, basealign 64, addrspace 4) + ; CHECK-NEXT: [[C2:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR_ADD2]](p4) :: (load (<2 x s64>) from %ir.constant.not.uniform.v8i64 + 48, basealign 64, addrspace 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD]](<2 x s64>), [[LOAD1]](<2 x s64>), [[LOAD2]](<2 x s64>), [[LOAD3]](<2 x s64>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<8 x s64>) = G_LOAD %0 :: (load (<8 x s64>) from %ir.constant.not.uniform.v8i64) ... @@ -387,7 +387,7 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v8i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x s32>) = G_LOAD [[COPY]](p4) :: (load (<8 x s32>), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), addrspace 4) ... @@ -401,7 +401,7 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v16i16_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<16 x s16>) = G_LOAD [[COPY]](p4) :: (load (<16 x s16>), addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<16 x s16>) = G_LOAD [[COPY]](p4) :: (load (<16 x s16>), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<16 x s16>) = G_LOAD %0 :: (load (<16 x s16>), addrspace 4) ... @@ -415,7 +415,7 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v4i64_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<4 x s64>) = G_LOAD %0 :: (load (<4 x s64>), addrspace 4) ... @@ -429,7 +429,7 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v16i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<16 x s32>) = G_LOAD [[COPY]](p4) :: (load (<16 x s32>), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), addrspace 4) ... @@ -443,7 +443,7 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v8i64_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s64>) = G_LOAD [[COPY]](p4) :: (load (<8 x s64>), addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x s64>) = G_LOAD [[COPY]](p4) :: (load (<8 x s64>), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<8 x s64>) = G_LOAD %0 :: (load (<8 x s64>), addrspace 4) ... @@ -457,8 +457,8 @@ body: | ; CHECK-LABEL: name: load_local_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p3) :: (load (s32), addrspace 3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p3) :: (load (s32), addrspace 3) %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 3) @@ -472,8 +472,8 @@ body: | ; CHECK-LABEL: name: load_region_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p3) :: (load (s32), addrspace 5) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p3) = COPY [[COPY]](p3) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p3) :: (load (s32), addrspace 5) %0:_(p3) = COPY $sgpr0 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 5) @@ -488,8 +488,8 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: extload_constant_i8_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s8), addrspace 4) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s8), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s8), addrspace 4, align 1) ... @@ -504,8 +504,8 @@ body: | ; CHECK-LABEL: name: extload_global_i8_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s8), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s8), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s8), addrspace 1, align 1) ... @@ -520,8 +520,8 @@ body: | ; CHECK-LABEL: name: extload_constant_i16_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s16), addrspace 4) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s16), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s16), addrspace 4, align 2) ... @@ -536,8 +536,8 @@ body: | ; CHECK-LABEL: name: extload_global_i16_to_i32_uniform ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s16), addrspace 1) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s16), addrspace 1) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s16), addrspace 1, align 2) ... @@ -551,7 +551,7 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_i32_uniform_align4 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(s32) = G_LOAD [[COPY]](p4) :: (load (s32), addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 4, align 4) ... @@ -566,8 +566,8 @@ body: | ; CHECK-LABEL: name: load_constant_i32_uniform_align2 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s32), align 2, addrspace 4) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s32), align 2, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 4, align 2) ... @@ -582,8 +582,8 @@ body: | ; CHECK-LABEL: name: load_constant_i32_uniform_align1 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s32), align 1, addrspace 4) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p4) = COPY [[COPY]](p4) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p4) :: (load (s32), align 1, addrspace 4) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 4, align 1) ... @@ -598,8 +598,8 @@ body: | ; CHECK-LABEL: name: load_private_uniform_sgpr_i32 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY [[COPY]](p5) - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p5) :: (load (s32), addrspace 5) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY [[COPY]](p5) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p5) :: (load (s32), addrspace 5) %0:_(p5) = COPY $sgpr0 %1:_(s32) = G_LOAD %0 :: (load (s32), addrspace 5, align 4) ... @@ -615,12 +615,13 @@ body: | ; CHECK-LABEL: name: load_constant_v8i32_vgpr_crash ; CHECK: liveins: $vgpr0_vgpr1 - ; CHECK: [[COPY:%[0-9]+]]:vgpr(p4) = COPY $vgpr0_vgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 32, addrspace 4) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from unknown-address + 16, addrspace 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p4) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (load (<4 x s32>), align 32, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from unknown-address + 16, addrspace 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), addrspace 4) ... @@ -633,21 +634,24 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: load_constant_v8i32_vgpr_crash_loop_phi ; CHECK: bb.0: - ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 - ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr(p4) = COPY $sgpr2_sgpr3 - ; CHECK: G_BR %bb.1 - ; CHECK: bb.1: - ; CHECK: successors: %bb.1(0x80000000) - ; CHECK: [[PHI:%[0-9]+]]:vgpr(p4) = G_PHI [[COPY]](p4), %bb.0, %3(p4), %bb.1 - ; CHECK: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PHI]](p4) :: (load (<4 x s32>), align 32, addrspace 4) - ; CHECK: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 - ; CHECK: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PHI]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from unknown-address + 16, addrspace 4) - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) - ; CHECK: [[COPY2:%[0-9]+]]:sgpr(p4) = COPY [[COPY1]](p4) - ; CHECK: G_BR %bb.1 + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(p4) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: G_BR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:vgpr(p4) = G_PHI [[COPY]](p4), %bb.0, %3(p4), %bb.1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PHI]](p4) :: (load (<4 x s32>), align 32, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PHI]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<4 x s32>) from unknown-address + 16, addrspace 4) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD]](<4 x s32>), [[LOAD1]](<4 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(p4) = COPY [[COPY1]](p4) + ; CHECK-NEXT: G_BR %bb.1 bb.0: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 @@ -672,14 +676,14 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v3i32_align4 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<2 x s32>), align 4, addrspace 4) - ; CHECK: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, addrspace 4) - ; CHECK: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; CHECK: S_ENDPGM 0, implicit [[INSERT1]](<3 x s32>) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<2 x s32>), align 4, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, addrspace 4) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 + ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (invariant load (<3 x s32>), addrspace 4, align 4) S_ENDPGM 0, implicit %1 @@ -694,14 +698,14 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v3i32_align8 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<2 x s32>), addrspace 4) - ; CHECK: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, align 8, addrspace 4) - ; CHECK: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; CHECK: S_ENDPGM 0, implicit [[INSERT1]](<3 x s32>) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<2 x s32>), addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, align 8, addrspace 4) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 + ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (invariant load (<3 x s32>), addrspace 4, align 8) S_ENDPGM 0, implicit %1 @@ -716,9 +720,9 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v3i32_align16 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x s32>), addrspace 4) - ; CHECK: [[EXTRACT:%[0-9]+]]:sgpr(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 - ; CHECK: S_ENDPGM 0, implicit [[EXTRACT]](<3 x s32>) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x s32>), addrspace 4) + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:sgpr(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[EXTRACT]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (invariant load (<3 x s32>), addrspace 4, align 16) S_ENDPGM 0, implicit %1 @@ -733,14 +737,14 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v6i16_align4 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x s16>), align 4, addrspace 4) - ; CHECK: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(<2 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (<2 x s16>) from unknown-address + 8, addrspace 4) - ; CHECK: [[DEF:%[0-9]+]]:sgpr(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[DEF]], [[LOAD]](<4 x s16>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[INSERT]], [[LOAD1]](<2 x s16>), 64 - ; CHECK: S_ENDPGM 0, implicit [[INSERT1]](<6 x s16>) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x s16>), align 4, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(<2 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (<2 x s16>) from unknown-address + 8, addrspace 4) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<6 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[DEF]], [[LOAD]](<4 x s16>), 0 + ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[INSERT]], [[LOAD1]](<2 x s16>), 64 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](<6 x s16>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<6 x s16>) = G_LOAD %0 :: (invariant load (<6 x s16>), addrspace 4, align 4) S_ENDPGM 0, implicit %1 @@ -755,14 +759,14 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v6i16_align8 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<4 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x s16>), addrspace 4) - ; CHECK: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(<2 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (<2 x s16>) from unknown-address + 8, align 8, addrspace 4) - ; CHECK: [[DEF:%[0-9]+]]:sgpr(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[DEF]], [[LOAD]](<4 x s16>), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[INSERT]], [[LOAD1]](<2 x s16>), 64 - ; CHECK: S_ENDPGM 0, implicit [[INSERT1]](<6 x s16>) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x s16>), addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(<2 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (<2 x s16>) from unknown-address + 8, align 8, addrspace 4) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<6 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[DEF]], [[LOAD]](<4 x s16>), 0 + ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[INSERT]], [[LOAD1]](<2 x s16>), 64 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](<6 x s16>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<6 x s16>) = G_LOAD %0 :: (invariant load (<6 x s16>), addrspace 4, align 8) S_ENDPGM 0, implicit %1 @@ -777,9 +781,9 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_v6i16_align16 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(<8 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<8 x s16>), addrspace 4) - ; CHECK: [[EXTRACT:%[0-9]+]]:sgpr(<6 x s16>) = G_EXTRACT [[LOAD]](<8 x s16>), 0 - ; CHECK: S_ENDPGM 0, implicit [[EXTRACT]](<6 x s16>) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<8 x s16>), addrspace 4) + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:sgpr(<6 x s16>) = G_EXTRACT [[LOAD]](<8 x s16>), 0 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[EXTRACT]](<6 x s16>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<6 x s16>) = G_LOAD %0 :: (invariant load (<6 x s16>), addrspace 4, align 16) S_ENDPGM 0, implicit %1 @@ -794,14 +798,14 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_i96_align4 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s64) = G_LOAD [[COPY]](p4) :: (invariant load (s64), align 4, addrspace 4) - ; CHECK: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, addrspace 4) - ; CHECK: [[DEF:%[0-9]+]]:sgpr(s96) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:sgpr(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:sgpr(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; CHECK: S_ENDPGM 0, implicit [[INSERT1]](s96) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(s64) = G_LOAD [[COPY]](p4) :: (invariant load (s64), align 4, addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, addrspace 4) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(s96) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0 + ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](s96) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s96) = G_LOAD %0 :: (invariant load (s96), addrspace 4, align 4) S_ENDPGM 0, implicit %1 @@ -816,14 +820,14 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_i96_align8 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s64) = G_LOAD [[COPY]](p4) :: (invariant load (s64), addrspace 4) - ; CHECK: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; CHECK: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, align 8, addrspace 4) - ; CHECK: [[DEF:%[0-9]+]]:sgpr(s96) = G_IMPLICIT_DEF - ; CHECK: [[INSERT:%[0-9]+]]:sgpr(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0 - ; CHECK: [[INSERT1:%[0-9]+]]:sgpr(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; CHECK: S_ENDPGM 0, implicit [[INSERT1]](s96) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(s64) = G_LOAD [[COPY]](p4) :: (invariant load (s64), addrspace 4) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, align 8, addrspace 4) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(s96) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0 + ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](s96) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s96) = G_LOAD %0 :: (invariant load (s96), addrspace 4, align 8) S_ENDPGM 0, implicit %1 @@ -838,9 +842,9 @@ body: | liveins: $sgpr0_sgpr1 ; CHECK-LABEL: name: load_constant_i96_align16 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; CHECK: [[LOAD:%[0-9]+]]:sgpr(s128) = G_LOAD [[COPY]](p4) :: (invariant load (s128), addrspace 4) - ; CHECK: [[EXTRACT:%[0-9]+]]:sgpr(s96) = G_EXTRACT [[LOAD]](s128), 0 - ; CHECK: S_ENDPGM 0, implicit [[EXTRACT]](s96) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(s128) = G_LOAD [[COPY]](p4) :: (invariant load (s128), addrspace 4) + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:sgpr(s96) = G_EXTRACT [[LOAD]](s128), 0 + ; CHECK-NEXT: S_ENDPGM 0, implicit [[EXTRACT]](s96) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s96) = G_LOAD %0 :: (invariant load (s96), addrspace 4, align 16) S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir index 163a83aac29b..ea5a841e32d9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir @@ -30,14 +30,14 @@ body: | ; SI-LABEL: name: split_smrd_load_range ; SI: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; SI: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) - ; SI: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; SI: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 8, align 8, addrspace 4) - ; SI: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 - ; SI: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; SI: $sgpr0_sgpr1_sgpr2 = COPY [[INSERT1]](<3 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), addrspace 4) + ; SI-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 8, align 8, addrspace 4) + ; SI-NEXT: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF + ; SI-NEXT: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 + ; SI-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 + ; SI-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[INSERT1]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 8, addrspace 4, !range !0) $sgpr0_sgpr1_sgpr2 = COPY %1 @@ -53,14 +53,14 @@ body: | ; SI-LABEL: name: split_smrd_load_tbaa ; SI: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 - ; SI: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), !tbaa !2, addrspace 4) - ; SI: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 - ; SI: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 8, align 8, !tbaa !2, addrspace 4) - ; SI: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF - ; SI: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 - ; SI: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; SI: $sgpr0_sgpr1_sgpr2 = COPY [[INSERT1]](<3 x s32>) + ; SI-NEXT: [[LOAD:%[0-9]+]]:sgpr(<2 x s32>) = G_LOAD [[COPY]](p4) :: (load (<2 x s32>), !tbaa !2, addrspace 4) + ; SI-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) + ; SI-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 8, align 8, !tbaa !2, addrspace 4) + ; SI-NEXT: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF + ; SI-NEXT: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 + ; SI-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 + ; SI-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[INSERT1]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 8, addrspace 4, !tbaa !1) $sgpr0_sgpr1_sgpr2 = COPY %1 From 29f88b93fdbe3e20c35842ca3a6c2a3f1a81cfce Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Thu, 23 Dec 2021 14:09:51 +0100 Subject: [PATCH 143/293] [GlobalISel] Rework more/fewer elements for vectors Artifact combiner is not able to access individual elements after using LCMTy style merge/unmerge, extract and insert to change vector number of elements (pad with undef or split to sub-vector instructions). Use unmerge to individual elements instead and then merge elements into requested types. Change argument lowering for vectors and moreElementsVector to use buildPadVectorWithUndefElements and buildDeleteTrailingVectorElements. FewerElementsVector had a few helpers that had different behavior, introduce new helper for most of the opcodes. FewerElementsVector helper is more flexible since it can create leftover instruction smaller then requested type (useful in case target wants to avoid pad with undef and use fewer registers). If target does not want leftover of different type it should call more elements first. Some helpers were performing more elements first to have split without leftover. Opcodes that used this helper use clampMaxNumElementsStrict (does more elements first) in LegalizerInfo to avoid test changes. Fixes failures caused by failing to combine artifacts created during more/fewer elements vector. Differential Revision: https://reviews.llvm.org/D114198 --- .../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 52 +- .../llvm/CodeGen/GlobalISel/LegalizerInfo.h | 38 +- .../CodeGen/GlobalISel/MachineIRBuilder.h | 28 + llvm/include/llvm/CodeGen/GlobalISel/Utils.h | 5 + llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 36 +- .../CodeGen/GlobalISel/LegalizerHelper.cpp | 1118 +-- .../CodeGen/GlobalISel/MachineIRBuilder.cpp | 48 +- llvm/lib/CodeGen/GlobalISel/Utils.cpp | 15 + .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 53 +- .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 21 +- .../AArch64/GlobalISel/arm64-fallback.ll | 8 - .../GlobalISel/call-lowering-vectors.ll | 5 +- .../AArch64/GlobalISel/legalize-saddsat.mir | 3 +- .../GlobalISel/legalize-shuffle-vector.mir | 16 +- .../AArch64/GlobalISel/legalize-ssubsat.mir | 3 +- .../GlobalISel/artifact-combiner-extract.mir | 9 +- .../GlobalISel/artifact-combiner-zext.mir | 16 +- ...egalization-artifact-combiner-dead-def.mir | 20 +- .../AMDGPU/GlobalISel/combine-fma-add-mul.ll | 176 +- .../AMDGPU/GlobalISel/function-returns.ll | 24 +- .../AMDGPU/GlobalISel/insertelement.large.ll | 12 +- .../AMDGPU/GlobalISel/inst-select-extract.mir | 153 - .../GlobalISel/inst-select-fract.f64.mir | 6 +- .../AMDGPU/GlobalISel/inst-select-insert.mir | 39 - .../GlobalISel/inst-select-insert.xfail.mir | 4 +- .../irtranslator-call-return-values.ll | 16 +- .../AMDGPU/GlobalISel/irtranslator-call.ll | 194 +- .../GlobalISel/irtranslator-function-args.ll | 40 +- .../GlobalISel/irtranslator-sibling-call.ll | 17 +- .../AMDGPU/GlobalISel/legalize-and.mir | 303 +- .../AMDGPU/GlobalISel/legalize-anyext.mir | 5 +- .../AMDGPU/GlobalISel/legalize-ashr.mir | 104 +- .../AMDGPU/GlobalISel/legalize-bitcast.mir | 537 +- .../legalize-extract-vector-elt.mir | 312 +- .../AMDGPU/GlobalISel/legalize-extract.mir | 218 +- .../AMDGPU/GlobalISel/legalize-fabs.mir | 47 +- .../AMDGPU/GlobalISel/legalize-fadd.mir | 8 +- .../AMDGPU/GlobalISel/legalize-fdiv.mir | 10 +- .../AMDGPU/GlobalISel/legalize-fma.mir | 12 +- .../AMDGPU/GlobalISel/legalize-fmaxnum.mir | 58 +- .../AMDGPU/GlobalISel/legalize-fminnum.mir | 58 +- .../AMDGPU/GlobalISel/legalize-fmul.mir | 16 +- .../AMDGPU/GlobalISel/legalize-freeze.mir | 140 +- .../AMDGPU/GlobalISel/legalize-fshl.mir | 33 +- .../AMDGPU/GlobalISel/legalize-fshr.mir | 308 +- .../GlobalISel/legalize-implicit-def.mir | 107 +- .../GlobalISel/legalize-insert-vector-elt.mir | 79 +- .../AMDGPU/GlobalISel/legalize-insert.mir | 578 +- .../legalize-llvm.amdgcn.image.load.2d.d16.ll | 336 +- ...legalize-llvm.amdgcn.image.store.2d.d16.ll | 80 +- .../legalize-llvm.amdgcn.s.buffer.load.mir | 75 +- .../GlobalISel/legalize-load-constant.mir | 105 +- .../AMDGPU/GlobalISel/legalize-load-flat.mir | 102 +- .../GlobalISel/legalize-load-global.mir | 1016 +-- .../AMDGPU/GlobalISel/legalize-load-local.mir | 64 +- .../GlobalISel/legalize-load-private.mir | 24 +- .../AMDGPU/GlobalISel/legalize-lshr.mir | 216 +- .../CodeGen/AMDGPU/GlobalISel/legalize-or.mir | 287 +- .../AMDGPU/GlobalISel/legalize-phi.mir | 152 +- .../AMDGPU/GlobalISel/legalize-saddsat.mir | 90 +- .../AMDGPU/GlobalISel/legalize-select.mir | 115 +- .../AMDGPU/GlobalISel/legalize-sext-inreg.mir | 2 +- .../AMDGPU/GlobalISel/legalize-sext.mir | 5 +- .../AMDGPU/GlobalISel/legalize-shl.mir | 104 +- .../GlobalISel/legalize-shuffle-vector.mir | 104 +- .../legalize-shuffle-vector.s16.mir | 108 +- .../AMDGPU/GlobalISel/legalize-smax.mir | 46 +- .../AMDGPU/GlobalISel/legalize-smin.mir | 46 +- .../AMDGPU/GlobalISel/legalize-sshlsat.mir | 82 +- .../AMDGPU/GlobalISel/legalize-ssubsat.mir | 90 +- .../GlobalISel/legalize-store-global.mir | 1192 ++- .../AMDGPU/GlobalISel/legalize-store.mir | 16 +- .../AMDGPU/GlobalISel/legalize-uaddsat.mir | 90 +- .../AMDGPU/GlobalISel/legalize-umax.mir | 46 +- .../AMDGPU/GlobalISel/legalize-umin.mir | 46 +- .../AMDGPU/GlobalISel/legalize-umulh.mir | 72 +- .../AMDGPU/GlobalISel/legalize-ushlsat.mir | 82 +- .../AMDGPU/GlobalISel/legalize-usubsat.mir | 90 +- .../legalize-vector-args-gfx8-plus.mir | 418 +- .../AMDGPU/GlobalISel/legalize-xor.mir | 277 +- .../AMDGPU/GlobalISel/legalize-zext.mir | 41 +- .../llvm.amdgcn.image.load.1d.d16.ll | 27 +- .../GlobalISel/llvm.amdgcn.s.buffer.load.ll | 8119 +++++++++-------- .../AMDGPU/GlobalISel/load-constant.96.ll | 64 +- .../regbankselect-amdgcn.s.buffer.load.ll | 32 +- .../AMDGPU/GlobalISel/regbankselect-load.mir | 58 +- ...gbankselect-split-scalar-load-metadata.mir | 14 +- .../CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll | 946 +- .../CodeGen/AMDGPU/GlobalISel/srem.i64.ll | 910 +- .../CodeGen/AMDGPU/GlobalISel/urem.i64.ll | 509 +- .../test/CodeGen/AMDGPU/fp-min-max-atomics.ll | 33 +- .../CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll | 188 +- llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll | 15 +- .../GlobalISel/LegalizerHelperTest.cpp | 175 +- 94 files changed, 11048 insertions(+), 10769 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h index 74615c73741a..044f2e22cfdd 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -192,6 +192,10 @@ class LegalizerHelper { SmallVectorImpl &VRegs, SmallVectorImpl &LeftoverVRegs); + /// Version which handles irregular sub-vector splits. + void extractVectorParts(Register Reg, unsigned NumElst, + SmallVectorImpl &VRegs); + /// Helper function to build a wide generic register \p DstReg of type \p /// RegTy from smaller parts. This will produce a G_MERGE_VALUES, /// G_BUILD_VECTOR, G_CONCAT_VECTORS, or sequence of G_INSERT as appropriate @@ -205,6 +209,11 @@ class LegalizerHelper { LLT PartTy, ArrayRef PartRegs, LLT LeftoverTy = LLT(), ArrayRef LeftoverRegs = {}); + /// Merge \p PartRegs with different types into \p DstReg. + void mergeMixedSubvectors(Register DstReg, ArrayRef PartRegs); + + void appendVectorElts(SmallVectorImpl &Elts, Register Reg); + /// Unmerge \p SrcReg into smaller sized values, and append them to \p /// Parts. The elements of \p Parts will be the greatest common divisor type /// of \p DstTy, \p NarrowTy and the type of \p SrcReg. This will compute and @@ -285,26 +294,18 @@ class LegalizerHelper { /// vector bounds. Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index); - LegalizeResult fewerElementsVectorImplicitDef(MachineInstr &MI, - unsigned TypeIdx, LLT NarrowTy); - - /// Legalize a instruction with a vector type where each operand may have a - /// different element type. All type indexes must have the same number of - /// elements. - LegalizeResult fewerElementsVectorMultiEltType(MachineInstr &MI, - unsigned TypeIdx, LLT NarrowTy); + /// Handles most opcodes. Split \p MI into same instruction on sub-vectors or + /// scalars with \p NumElts elements (1 for scalar). Supports uneven splits: + /// there can be leftover sub-vector with fewer then \p NumElts or a leftover + /// scalar. To avoid this use moreElements first and set MI number of elements + /// to multiple of \p NumElts. Non-vector operands that should be used on all + /// sub-instructions without split are listed in \p NonVecOpIndices. + LegalizeResult fewerElementsVectorMultiEltType( + GenericMachineInstr &MI, unsigned NumElts, + std::initializer_list NonVecOpIndices = {}); - LegalizeResult fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy); - - LegalizeResult - fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); - - LegalizeResult - fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); - - LegalizeResult fewerElementsVectorPhi(MachineInstr &MI, - unsigned TypeIdx, LLT NarrowTy); + LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, + unsigned NumElts); LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy); @@ -320,22 +321,9 @@ class LegalizerHelper { unsigned TypeIdx, LLT NarrowTy); - LegalizeResult fewerElementsVectorMulo(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy); - LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy); - /// Legalize an instruction by reducing the operation width, either by - /// narrowing the type of the operation or by reducing the number of elements - /// of a vector. - /// The used strategy (narrow vs. fewerElements) is decided by \p NarrowTy. - /// Narrow is used if the scalar type of \p NarrowTy and \p DstTy differ, - /// fewerElements is used when the scalar type is the same but the number of - /// elements between \p NarrowTy and \p DstTy differ. - LegalizeResult reduceOperationWidth(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy); - LegalizeResult fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index 68c14240ebc7..0b37539030b1 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -58,7 +58,10 @@ enum LegalizeAction : std::uint8_t { /// The (vector) operation should be implemented by splitting it into /// sub-vectors where the operation is legal. For example a <8 x s64> add - /// might be implemented as 4 separate <2 x s64> adds. + /// might be implemented as 4 separate <2 x s64> adds. There can be a leftover + /// if there are not enough elements for last sub-vector e.g. <7 x s64> add + /// will be implemented as 3 separate <2 x s64> adds and one s64 add. Leftover + /// types can be avoided by doing MoreElements first. FewerElements, /// The (vector) operation should be implemented by widening the input @@ -1050,6 +1053,26 @@ class LegalizeRuleSet { TypeIdx, LLT::fixed_vector(MinElements, VecTy.getElementType())); }); } + + /// Set number of elements to nearest larger multiple of NumElts. + LegalizeRuleSet &alignNumElementsTo(unsigned TypeIdx, const LLT EltTy, + unsigned NumElts) { + typeIdx(TypeIdx); + return actionIf( + LegalizeAction::MoreElements, + [=](const LegalityQuery &Query) { + LLT VecTy = Query.Types[TypeIdx]; + return VecTy.isVector() && VecTy.getElementType() == EltTy && + (VecTy.getNumElements() % NumElts != 0); + }, + [=](const LegalityQuery &Query) { + LLT VecTy = Query.Types[TypeIdx]; + unsigned NewSize = alignTo(VecTy.getNumElements(), NumElts); + return std::make_pair( + TypeIdx, LLT::fixed_vector(NewSize, VecTy.getElementType())); + }); + } + /// Limit the number of elements in EltTy vectors to at most MaxElements. LegalizeRuleSet &clampMaxNumElements(unsigned TypeIdx, const LLT EltTy, unsigned MaxElements) { @@ -1085,6 +1108,19 @@ class LegalizeRuleSet { .clampMaxNumElements(TypeIdx, EltTy, MaxTy.getNumElements()); } + /// Express \p EltTy vectors strictly using vectors with \p NumElts elements + /// (or scalars when \p NumElts equals 1). + /// First pad with undef elements to nearest larger multiple of \p NumElts. + /// Then perform split with all sub-instructions having the same type. + /// Using clampMaxNumElements (non-strict) can result in leftover instruction + /// with different type (fewer elements then \p NumElts or scalar). + /// No effect if the type is not a vector. + LegalizeRuleSet &clampMaxNumElementsStrict(unsigned TypeIdx, const LLT EltTy, + unsigned NumElts) { + return alignNumElementsTo(TypeIdx, EltTy, NumElts) + .clampMaxNumElements(TypeIdx, EltTy, NumElts); + } + /// Fallback on the previous implementation. This should only be used while /// porting a rule. LegalizeRuleSet &fallback() { diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h index 069f71b54328..fde0cb3cf1af 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -497,6 +497,34 @@ class MachineIRBuilder { MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits); + /// Build and insert + /// a, b, ..., x = G_UNMERGE_VALUES \p Op0 + /// \p Res = G_BUILD_VECTOR a, b, ..., x, undef, ..., undef + /// + /// Pad \p Op0 with undef elements to match number of elements in \p Res. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res and \p Op0 must be generic virtual registers with vector type, + /// same vector element type and Op0 must have fewer elements then Res. + /// + /// \return a MachineInstrBuilder for the newly created build vector instr. + MachineInstrBuilder buildPadVectorWithUndefElements(const DstOp &Res, + const SrcOp &Op0); + + /// Build and insert + /// a, b, ..., x, y, z = G_UNMERGE_VALUES \p Op0 + /// \p Res = G_BUILD_VECTOR a, b, ..., x + /// + /// Delete trailing elements in \p Op0 to match number of elements in \p Res. + /// + /// \pre setBasicBlock or setMI must have been called. + /// \pre \p Res and \p Op0 must be generic virtual registers with vector type, + /// same vector element type and Op0 must have more elements then Res. + /// + /// \return a MachineInstrBuilder for the newly created build vector instr. + MachineInstrBuilder buildDeleteTrailingVectorElements(const DstOp &Res, + const SrcOp &Op0); + /// Build and insert \p Res, \p CarryOut = G_UADDO \p Op0, \p Op1 /// /// G_UADDO sets \p Res to \p Op0 + \p Op1 (truncated to the bit width) and diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h index 4126e2ac7b8f..8fed79585fe9 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h @@ -323,6 +323,11 @@ Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy); +LLVM_READNONE +/// Return smallest type that covers both \p OrigTy and \p TargetTy and is +/// multiple of TargetTy. +LLT getCoverTy(LLT OrigTy, LLT TargetTy); + /// Return a type where the total size is the greatest common divisor of \p /// OrigTy and \p TargetTy. This will try to either change the number of vector /// elements, or bitwidth of scalars. The intent is the result type can be used diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index 17094a8e44f8..d061664e8c5d 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -256,7 +256,7 @@ mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef DstRegs, LLT PartLLT = MRI.getType(SrcRegs[0]); // Deal with v3s16 split into v2s16 - LLT LCMTy = getLCMType(LLTy, PartLLT); + LLT LCMTy = getCoverTy(LLTy, PartLLT); if (LCMTy == LLTy) { // Common case where no padding is needed. assert(DstRegs.size() == 1); @@ -267,21 +267,9 @@ mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef DstRegs, // widening the original value. Register UnmergeSrcReg; if (LCMTy != PartLLT) { - // e.g. A <3 x s16> value was split to <2 x s16> - // %register_value0:_(<2 x s16>) - // %register_value1:_(<2 x s16>) - // %undef:_(<2 x s16>) = G_IMPLICIT_DEF - // %concat:_<6 x s16>) = G_CONCAT_VECTORS %reg_value0, %reg_value1, %undef - // %dst_reg:_(<3 x s16>), %dead:_(<3 x s16>) = G_UNMERGE_VALUES %concat - const int NumWide = LCMTy.getSizeInBits() / PartLLT.getSizeInBits(); - Register Undef = B.buildUndef(PartLLT).getReg(0); - - // Build vector of undefs. - SmallVector WidenedSrcs(NumWide, Undef); - - // Replace the first sources with the real registers. - std::copy(SrcRegs.begin(), SrcRegs.end(), WidenedSrcs.begin()); - UnmergeSrcReg = B.buildConcatVectors(LCMTy, WidenedSrcs).getReg(0); + assert(DstRegs.size() == 1); + return B.buildDeleteTrailingVectorElements(DstRegs[0], + B.buildMerge(LCMTy, SrcRegs)); } else { // We don't need to widen anything if we're extracting a scalar which was // promoted to a vector e.g. s8 -> v4s8 -> s8 @@ -298,6 +286,8 @@ mergeVectorRegsToResultRegs(MachineIRBuilder &B, ArrayRef DstRegs, for (int I = DstRegs.size(); I != NumDst; ++I) PadDstRegs[I] = MRI.createGenericVirtualRegister(LLTy); + if (PadDstRegs.size() == 1) + return B.buildDeleteTrailingVectorElements(DstRegs[0], UnmergeSrcReg); return B.buildUnmerge(PadDstRegs, UnmergeSrcReg); } @@ -485,7 +475,7 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef DstRegs, MachineRegisterInfo &MRI = *B.getMRI(); LLT DstTy = MRI.getType(DstRegs[0]); - LLT LCMTy = getLCMType(SrcTy, PartTy); + LLT LCMTy = getCoverTy(SrcTy, PartTy); const unsigned DstSize = DstTy.getSizeInBits(); const unsigned SrcSize = SrcTy.getSizeInBits(); @@ -493,7 +483,7 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef DstRegs, Register UnmergeSrc = SrcReg; - if (CoveringSize != SrcSize) { + if (!LCMTy.isVector() && CoveringSize != SrcSize) { // For scalars, it's common to be able to use a simple extension. if (SrcTy.isScalar() && DstTy.isScalar()) { CoveringSize = alignTo(SrcSize, DstSize); @@ -510,14 +500,10 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef DstRegs, } } - // Unmerge to the original registers and pad with dead defs. - SmallVector UnmergeResults(DstRegs.begin(), DstRegs.end()); - for (unsigned Size = DstSize * DstRegs.size(); Size != CoveringSize; - Size += DstSize) { - UnmergeResults.push_back(MRI.createGenericVirtualRegister(DstTy)); - } + if (LCMTy.isVector() && CoveringSize != SrcSize) + UnmergeSrc = B.buildPadVectorWithUndefElements(LCMTy, SrcReg).getReg(0); - B.buildUnmerge(UnmergeResults, UnmergeSrc); + B.buildUnmerge(DstRegs, UnmergeSrc); } bool CallLowering::determineAndHandleAssignments( diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index e09cd26eb0c1..6b50fa49c063 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -176,16 +176,18 @@ bool LegalizerHelper::extractParts(Register Reg, LLT RegTy, return true; } + // Perform irregular split. Leftover is last element of RegPieces. if (MainTy.isVector()) { - unsigned EltSize = MainTy.getScalarSizeInBits(); - if (LeftoverSize % EltSize != 0) - return false; - LeftoverTy = LLT::scalarOrVector( - ElementCount::getFixed(LeftoverSize / EltSize), EltSize); - } else { - LeftoverTy = LLT::scalar(LeftoverSize); + SmallVector RegPieces; + extractVectorParts(Reg, MainTy.getNumElements(), RegPieces); + for (unsigned i = 0; i < RegPieces.size() - 1; ++i) + VRegs.push_back(RegPieces[i]); + LeftoverRegs.push_back(RegPieces[RegPieces.size() - 1]); + LeftoverTy = MRI.getType(LeftoverRegs[0]); + return true; } + LeftoverTy = LLT::scalar(LeftoverSize); // For irregular sizes, extract the individual parts. for (unsigned I = 0; I != NumParts; ++I) { Register NewReg = MRI.createGenericVirtualRegister(MainTy); @@ -203,6 +205,44 @@ bool LegalizerHelper::extractParts(Register Reg, LLT RegTy, return true; } +void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts, + SmallVectorImpl &VRegs) { + LLT RegTy = MRI.getType(Reg); + assert(RegTy.isVector() && "Expected a vector type"); + + LLT EltTy = RegTy.getElementType(); + LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy); + unsigned RegNumElts = RegTy.getNumElements(); + unsigned LeftoverNumElts = RegNumElts % NumElts; + unsigned NumNarrowTyPieces = RegNumElts / NumElts; + + // Perfect split without leftover + if (LeftoverNumElts == 0) + return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs); + + // Irregular split. Provide direct access to all elements for artifact + // combiner using unmerge to elements. Then build vectors with NumElts + // elements. Remaining element(s) will be (used to build vector) Leftover. + SmallVector Elts; + extractParts(Reg, EltTy, RegNumElts, Elts); + + unsigned Offset = 0; + // Requested sub-vectors of NarrowTy. + for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) { + ArrayRef Pieces(&Elts[Offset], NumElts); + VRegs.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0)); + } + + // Leftover element(s). + if (LeftoverNumElts == 1) { + VRegs.push_back(Elts[Offset]); + } else { + LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy); + ArrayRef Pieces(&Elts[Offset], LeftoverNumElts); + VRegs.push_back(MIRBuilder.buildMerge(LeftoverTy, Pieces).getReg(0)); + } +} + void LegalizerHelper::insertParts(Register DstReg, LLT ResultTy, LLT PartTy, ArrayRef PartRegs, @@ -223,6 +263,15 @@ void LegalizerHelper::insertParts(Register DstReg, return; } + // Merge sub-vectors with different number of elements and insert into DstReg. + if (ResultTy.isVector()) { + assert(LeftoverRegs.size() == 1 && "Expected one leftover register"); + SmallVector AllRegs; + for (auto Reg : concat(PartRegs, LeftoverRegs)) + AllRegs.push_back(Reg); + return mergeMixedSubvectors(DstReg, AllRegs); + } + SmallVector GCDRegs; LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy); for (auto PartReg : concat(PartRegs, LeftoverRegs)) @@ -231,6 +280,31 @@ void LegalizerHelper::insertParts(Register DstReg, buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs); } +void LegalizerHelper::appendVectorElts(SmallVectorImpl &Elts, + Register Reg) { + LLT Ty = MRI.getType(Reg); + SmallVector RegElts; + extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts); + Elts.append(RegElts); +} + +/// Merge \p PartRegs with different types into \p DstReg. +void LegalizerHelper::mergeMixedSubvectors(Register DstReg, + ArrayRef PartRegs) { + SmallVector AllElts; + for (unsigned i = 0; i < PartRegs.size() - 1; ++i) + appendVectorElts(AllElts, PartRegs[i]); + + Register Leftover = PartRegs[PartRegs.size() - 1]; + if (MRI.getType(Leftover).isScalar()) + AllElts.push_back(Leftover); + else + appendVectorElts(AllElts, Leftover); + + MIRBuilder.buildMerge(DstReg, AllElts); + return; +} + /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs. static void getUnmergeResults(SmallVectorImpl &Regs, const MachineInstr &MI) { @@ -916,8 +990,26 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, return Legalized; } - case TargetOpcode::G_FREEZE: - return reduceOperationWidth(MI, TypeIdx, NarrowTy); + case TargetOpcode::G_FREEZE: { + if (TypeIdx != 0) + return UnableToLegalize; + + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + // Should widen scalar first + if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0) + return UnableToLegalize; + + auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg()); + SmallVector Parts; + for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) { + Parts.push_back( + MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0)); + } + + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Parts); + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_ADD: case TargetOpcode::G_SUB: case TargetOpcode::G_SADDO: @@ -1372,37 +1464,17 @@ void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx) { MachineOperand &MO = MI.getOperand(OpIdx); MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); - MO.setReg(widenWithUnmerge(WideTy, MO.getReg())); + Register Dst = MO.getReg(); + Register DstExt = MRI.createGenericVirtualRegister(WideTy); + MO.setReg(DstExt); + MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt); } void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx) { MachineOperand &MO = MI.getOperand(OpIdx); - - LLT OldTy = MRI.getType(MO.getReg()); - unsigned OldElts = OldTy.getNumElements(); - unsigned NewElts = MoreTy.getNumElements(); - - unsigned NumParts = NewElts / OldElts; - - // Use concat_vectors if the result is a multiple of the number of elements. - if (NumParts * OldElts == NewElts) { - SmallVector Parts; - Parts.push_back(MO.getReg()); - - Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0); - for (unsigned I = 1; I != NumParts; ++I) - Parts.push_back(ImpDef); - - auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts); - MO.setReg(Concat.getReg(0)); - return; - } - - Register MoreReg = MRI.createGenericVirtualRegister(MoreTy); - Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0); - MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0); - MO.setReg(MoreReg); + SmallVector Regs; + MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0)); } void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) { @@ -3558,20 +3630,81 @@ Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy, return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0); } -LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( - MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { - Register DstReg = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT LCMTy = getLCMType(DstTy, NarrowTy); +/// Check that all vector operands have same number of elements. Other operands +/// should be listed in NonVecOp. +static bool hasSameNumEltsOnAllVectorOperands( + GenericMachineInstr &MI, MachineRegisterInfo &MRI, + std::initializer_list NonVecOpIndices) { + if (MI.getNumMemOperands() != 0) + return false; - unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits(); + LLT VecTy = MRI.getType(MI.getReg(0)); + if (!VecTy.isVector()) + return false; + unsigned NumElts = VecTy.getNumElements(); - auto NewUndef = MIRBuilder.buildUndef(NarrowTy); - SmallVector Parts(NumParts, NewUndef.getReg(0)); + for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) { + MachineOperand &Op = MI.getOperand(OpIdx); + if (!Op.isReg()) { + if (!is_contained(NonVecOpIndices, OpIdx)) + return false; + continue; + } - buildWidenedRemergeToDst(DstReg, LCMTy, Parts); - MI.eraseFromParent(); - return Legalized; + LLT Ty = MRI.getType(Op.getReg()); + if (!Ty.isVector()) { + if (!is_contained(NonVecOpIndices, OpIdx)) + return false; + is_contained(NonVecOpIndices, OpIdx); + continue; + } + + if (Ty.getNumElements() != NumElts) + return false; + } + + return true; +} + +/// Fill \p DstOps with DstOps that have same number of elements combined as +/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are +/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple +/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements. +static void makeDstOps(SmallVectorImpl &DstOps, LLT Ty, + unsigned NumElts) { + LLT LeftoverTy; + assert(Ty.isVector() && "Expected vector type"); + LLT EltTy = Ty.getElementType(); + LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy); + int NumParts, NumLeftover; + std::tie(NumParts, NumLeftover) = + getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy); + + assert(NumParts > 0 && "Error in getNarrowTypeBreakDown"); + for (int i = 0; i < NumParts; ++i) { + DstOps.push_back(NarrowTy); + } + + if (LeftoverTy.isValid()) { + assert(NumLeftover == 1 && "expected exactly one leftover"); + DstOps.push_back(LeftoverTy); + } +} + +/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps +/// made from \p Op depending on operand type. +static void broadcastSrcOp(SmallVectorImpl &Ops, unsigned N, + MachineOperand &Op) { + for (unsigned i = 0; i < N; ++i) { + if (Op.isReg()) + Ops.push_back(Op.getReg()); + else if (Op.isImm()) + Ops.push_back(Op.getImm()); + else if (Op.isPredicate()) + Ops.push_back(static_cast(Op.getPredicate())); + else + llvm_unreachable("Unsupported type"); + } } // Handle splitting vector operations which need to have the same number of @@ -3588,335 +3721,116 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef( // s64 = G_SHL s64, s32 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorMultiEltType( - MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) { - if (TypeIdx != 0) - return UnableToLegalize; - - const LLT NarrowTy0 = NarrowTyArg; - const Register DstReg = MI.getOperand(0).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT LeftoverTy0; - - // All of the operands need to have the same number of elements, so if we can - // determine a type breakdown for the result type, we can for all of the - // source types. - int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first; - if (NumParts < 0) - return UnableToLegalize; - - SmallVector NewInsts; - - SmallVector DstRegs, LeftoverDstRegs; - SmallVector PartRegs, LeftoverRegs; - - for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) { - Register SrcReg = MI.getOperand(I).getReg(); - LLT SrcTyI = MRI.getType(SrcReg); - const auto NewEC = NarrowTy0.isVector() ? NarrowTy0.getElementCount() - : ElementCount::getFixed(1); - LLT NarrowTyI = LLT::scalarOrVector(NewEC, SrcTyI.getScalarType()); - LLT LeftoverTyI; - - // Split this operand into the requested typed registers, and any leftover - // required to reproduce the original type. - if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs, - LeftoverRegs)) - return UnableToLegalize; - - if (I == 1) { - // For the first operand, create an instruction for each part and setup - // the result. - for (Register PartReg : PartRegs) { - Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0); - NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) - .addDef(PartDstReg) - .addUse(PartReg)); - DstRegs.push_back(PartDstReg); - } - - for (Register LeftoverReg : LeftoverRegs) { - Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0); - NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode()) - .addDef(PartDstReg) - .addUse(LeftoverReg)); - LeftoverDstRegs.push_back(PartDstReg); - } + GenericMachineInstr &MI, unsigned NumElts, + std::initializer_list NonVecOpIndices) { + assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) && + "Non-compatible opcode or not specified non-vector operands"); + unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements(); + + unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs(); + unsigned NumDefs = MI.getNumDefs(); + + // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output. + // Build instructions with DstOps to use instruction found by CSE directly. + // CSE copies found instruction into given vreg when building with vreg dest. + SmallVector, 2> OutputOpsPieces(NumDefs); + // Output registers will be taken from created instructions. + SmallVector, 2> OutputRegs(NumDefs); + for (unsigned i = 0; i < NumDefs; ++i) { + makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts); + } + + // Split vector input operands into sub-vectors with NumElts elts + Leftover. + // Operands listed in NonVecOpIndices will be used as is without splitting; + // examples: compare predicate in icmp and fcmp (op 1), vector select with i1 + // scalar condition (op 1), immediate in sext_inreg (op 2). + SmallVector, 3> InputOpsPieces(NumInputs); + for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands(); + ++UseIdx, ++UseNo) { + if (is_contained(NonVecOpIndices, UseIdx)) { + broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(), + MI.getOperand(UseIdx)); } else { - assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size()); - - // Add the newly created operand splits to the existing instructions. The - // odd-sized pieces are ordered after the requested NarrowTyArg sized - // pieces. - unsigned InstCount = 0; - for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J) - NewInsts[InstCount++].addUse(PartRegs[J]); - for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J) - NewInsts[InstCount++].addUse(LeftoverRegs[J]); + SmallVector SplitPieces; + extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces); + for (auto Reg : SplitPieces) + InputOpsPieces[UseNo].push_back(Reg); } - - PartRegs.clear(); - LeftoverRegs.clear(); } - // Insert the newly built operations and rebuild the result register. - for (auto &MIB : NewInsts) - MIRBuilder.insertInstr(MIB); + unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0; - insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs); + // Take i-th piece of each input operand split and build sub-vector/scalar + // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s). + for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) { + SmallVector Defs; + for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo) + Defs.push_back(OutputOpsPieces[DstNo][i]); - MI.eraseFromParent(); - return Legalized; -} + SmallVector Uses; + for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo) + Uses.push_back(InputOpsPieces[InputNo][i]); -LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - if (TypeIdx != 0) - return UnableToLegalize; - - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = MRI.getType(SrcReg); - - LLT NarrowTy0 = NarrowTy; - LLT NarrowTy1; - unsigned NumParts; - - if (NarrowTy.isVector()) { - // Uneven breakdown not handled. - NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); - if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements()) - return UnableToLegalize; - - NarrowTy1 = LLT::vector(NarrowTy.getElementCount(), SrcTy.getElementType()); - } else { - NumParts = DstTy.getNumElements(); - NarrowTy1 = SrcTy.getElementType(); + auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags()); + for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo) + OutputRegs[DstNo].push_back(I.getReg(DstNo)); } - SmallVector SrcRegs, DstRegs; - extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs); - - for (unsigned I = 0; I < NumParts; ++I) { - Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); - MachineInstr *NewInst = - MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]}); - - NewInst->setFlags(MI.getFlags()); - DstRegs.push_back(DstReg); + // Merge small outputs into MI's output for each def operand. + if (NumLeftovers) { + for (unsigned i = 0; i < NumDefs; ++i) + mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]); + } else { + for (unsigned i = 0; i < NumDefs; ++i) + MIRBuilder.buildMerge(MI.getReg(i), OutputRegs[i]); } - if (NarrowTy.isVector()) - MIRBuilder.buildConcatVectors(DstReg, DstRegs); - else - MIRBuilder.buildBuildVector(DstReg, DstRegs); - MI.eraseFromParent(); return Legalized; } LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - Register DstReg = MI.getOperand(0).getReg(); - Register Src0Reg = MI.getOperand(2).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = MRI.getType(Src0Reg); - - unsigned NumParts; - LLT NarrowTy0, NarrowTy1; - - if (TypeIdx == 0) { - unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; - unsigned OldElts = DstTy.getNumElements(); - - NarrowTy0 = NarrowTy; - NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements(); - NarrowTy1 = NarrowTy.isVector() ? LLT::vector(NarrowTy.getElementCount(), - SrcTy.getScalarSizeInBits()) - : SrcTy.getElementType(); - - } else { - unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; - unsigned OldElts = SrcTy.getNumElements(); - - NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : - NarrowTy.getNumElements(); - NarrowTy0 = - LLT::vector(NarrowTy.getElementCount(), DstTy.getScalarSizeInBits()); - NarrowTy1 = NarrowTy; +LegalizerHelper::fewerElementsVectorPhi(GenericMachineInstr &MI, + unsigned NumElts) { + unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements(); + + unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs(); + unsigned NumDefs = MI.getNumDefs(); + + SmallVector OutputOpsPieces; + SmallVector OutputRegs; + makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts); + + // Instructions that perform register split will be inserted in basic block + // where register is defined (basic block is in the next operand). + SmallVector, 3> InputOpsPieces(NumInputs / 2); + for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands(); + UseIdx += 2, ++UseNo) { + MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB(); + MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); + extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]); } - // FIXME: Don't know how to handle the situation where the small vectors - // aren't all the same size yet. - if (NarrowTy1.isVector() && - NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements()) - return UnableToLegalize; - - CmpInst::Predicate Pred - = static_cast(MI.getOperand(1).getPredicate()); - - SmallVector Src1Regs, Src2Regs, DstRegs; - extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs); - extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs); + // Build PHIs with fewer elements. + unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0; + MIRBuilder.setInsertPt(*MI.getParent(), MI); + for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) { + auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI); + Phi.addDef( + MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI))); + OutputRegs.push_back(Phi.getReg(0)); - for (unsigned I = 0; I < NumParts; ++I) { - Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); - DstRegs.push_back(DstReg); - - if (MI.getOpcode() == TargetOpcode::G_ICMP) - MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); - else { - MachineInstr *NewCmp - = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); - NewCmp->setFlags(MI.getFlags()); + for (unsigned j = 0; j < NumInputs / 2; ++j) { + Phi.addUse(InputOpsPieces[j][i]); + Phi.add(MI.getOperand(1 + j * 2 + 1)); } } - if (NarrowTy1.isVector()) - MIRBuilder.buildConcatVectors(DstReg, DstRegs); - else - MIRBuilder.buildBuildVector(DstReg, DstRegs); - - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - Register DstReg = MI.getOperand(0).getReg(); - Register CondReg = MI.getOperand(1).getReg(); - - unsigned NumParts = 0; - LLT NarrowTy0, NarrowTy1; - - LLT DstTy = MRI.getType(DstReg); - LLT CondTy = MRI.getType(CondReg); - unsigned Size = DstTy.getSizeInBits(); - - assert(TypeIdx == 0 || CondTy.isVector()); - - if (TypeIdx == 0) { - NarrowTy0 = NarrowTy; - NarrowTy1 = CondTy; - - unsigned NarrowSize = NarrowTy0.getSizeInBits(); - // FIXME: Don't know how to handle the situation where the small vectors - // aren't all the same size yet. - if (Size % NarrowSize != 0) - return UnableToLegalize; - - NumParts = Size / NarrowSize; - - // Need to break down the condition type - if (CondTy.isVector()) { - if (CondTy.getNumElements() == NumParts) - NarrowTy1 = CondTy.getElementType(); - else - NarrowTy1 = - LLT::vector(CondTy.getElementCount().divideCoefficientBy(NumParts), - CondTy.getScalarSizeInBits()); - } + // Merge small outputs into MI's def. + if (NumLeftovers) { + mergeMixedSubvectors(MI.getReg(0), OutputRegs); } else { - NumParts = CondTy.getNumElements(); - if (NarrowTy.isVector()) { - // TODO: Handle uneven breakdown. - if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements()) - return UnableToLegalize; - - return UnableToLegalize; - } else { - NarrowTy0 = DstTy.getElementType(); - NarrowTy1 = NarrowTy; - } - } - - SmallVector DstRegs, Src0Regs, Src1Regs, Src2Regs; - if (CondTy.isVector()) - extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs); - - extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs); - extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs); - - for (unsigned i = 0; i < NumParts; ++i) { - Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0); - MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg, - Src1Regs[i], Src2Regs[i]); - DstRegs.push_back(DstReg); - } - - if (NarrowTy0.isVector()) - MIRBuilder.buildConcatVectors(DstReg, DstRegs); - else - MIRBuilder.buildBuildVector(DstReg, DstRegs); - - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - const Register DstReg = MI.getOperand(0).getReg(); - LLT PhiTy = MRI.getType(DstReg); - LLT LeftoverTy; - - // All of the operands need to have the same number of elements, so if we can - // determine a type breakdown for the result type, we can for all of the - // source types. - int NumParts, NumLeftover; - std::tie(NumParts, NumLeftover) - = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy); - if (NumParts < 0) - return UnableToLegalize; - - SmallVector DstRegs, LeftoverDstRegs; - SmallVector NewInsts; - - const int TotalNumParts = NumParts + NumLeftover; - - // Insert the new phis in the result block first. - for (int I = 0; I != TotalNumParts; ++I) { - LLT Ty = I < NumParts ? NarrowTy : LeftoverTy; - Register PartDstReg = MRI.createGenericVirtualRegister(Ty); - NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI) - .addDef(PartDstReg)); - if (I < NumParts) - DstRegs.push_back(PartDstReg); - else - LeftoverDstRegs.push_back(PartDstReg); - } - - MachineBasicBlock *MBB = MI.getParent(); - MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI()); - insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs); - - SmallVector PartRegs, LeftoverRegs; - - // Insert code to extract the incoming values in each predecessor block. - for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) { - PartRegs.clear(); - LeftoverRegs.clear(); - - Register SrcReg = MI.getOperand(I).getReg(); - MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB(); - MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator()); - - LLT Unused; - if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs, - LeftoverRegs)) - return UnableToLegalize; - - // Add the newly created operand splits to the existing instructions. The - // odd-sized pieces are ordered after the requested NarrowTyArg sized - // pieces. - for (int J = 0; J != TotalNumParts; ++J) { - MachineInstrBuilder MIB = NewInsts[J]; - MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]); - MIB.addMBB(&OpMBB); - } + MIRBuilder.buildMerge(MI.getReg(0), OutputRegs); } MI.eraseFromParent(); @@ -3927,27 +3841,36 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { - if (TypeIdx != 1) - return UnableToLegalize; - const int NumDst = MI.getNumOperands() - 1; const Register SrcReg = MI.getOperand(NumDst).getReg(); - LLT SrcTy = MRI.getType(SrcReg); - LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + LLT SrcTy = MRI.getType(SrcReg); - // TODO: Create sequence of extracts. - if (DstTy == NarrowTy) + if (TypeIdx != 1 || NarrowTy == DstTy) return UnableToLegalize; - LLT GCDTy = getGCDType(SrcTy, NarrowTy); - if (DstTy == GCDTy) { - // This would just be a copy of the same unmerge. - // TODO: Create extracts, pad with undef and create intermediate merges. + // Requires compatible types. Otherwise SrcReg should have been defined by + // merge-like instruction that would get artifact combined. Most likely + // instruction that defines SrcReg has to perform more/fewer elements + // legalization compatible with NarrowTy. + assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types"); + assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type"); + + if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) || + (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0)) return UnableToLegalize; - } - auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); + // This is most likely DstTy (smaller then register size) packed in SrcTy + // (larger then register size) and since unmerge was not combined it will be + // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy + // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy. + + // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy) + // + // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence + // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg + // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy) + auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg); const int NumUnmerge = Unmerge->getNumOperands() - 1; const int PartsPerUnmerge = NumDst / NumUnmerge; @@ -3963,90 +3886,88 @@ LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, return Legalized; } -LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorMulo(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - Register Result = MI.getOperand(0).getReg(); - Register Overflow = MI.getOperand(1).getReg(); - Register LHS = MI.getOperand(2).getReg(); - Register RHS = MI.getOperand(3).getReg(); - - LLT SrcTy = MRI.getType(LHS); - if (!SrcTy.isVector()) - return UnableToLegalize; - - LLT ElementType = SrcTy.getElementType(); - LLT OverflowElementTy = MRI.getType(Overflow).getElementType(); - const ElementCount NumResult = SrcTy.getElementCount(); - LLT GCDTy = getGCDType(SrcTy, NarrowTy); - - // Unmerge the operands to smaller parts of GCD type. - auto UnmergeLHS = MIRBuilder.buildUnmerge(GCDTy, LHS); - auto UnmergeRHS = MIRBuilder.buildUnmerge(GCDTy, RHS); - - const int NumOps = UnmergeLHS->getNumOperands() - 1; - const ElementCount PartsPerUnmerge = NumResult.divideCoefficientBy(NumOps); - LLT OverflowTy = LLT::scalarOrVector(PartsPerUnmerge, OverflowElementTy); - LLT ResultTy = LLT::scalarOrVector(PartsPerUnmerge, ElementType); - - // Perform the operation over unmerged parts. - SmallVector ResultParts; - SmallVector OverflowParts; - for (int I = 0; I != NumOps; ++I) { - Register Operand1 = UnmergeLHS->getOperand(I).getReg(); - Register Operand2 = UnmergeRHS->getOperand(I).getReg(); - auto PartMul = MIRBuilder.buildInstr(MI.getOpcode(), {ResultTy, OverflowTy}, - {Operand1, Operand2}); - ResultParts.push_back(PartMul->getOperand(0).getReg()); - OverflowParts.push_back(PartMul->getOperand(1).getReg()); - } - - LLT ResultLCMTy = buildLCMMergePieces(SrcTy, NarrowTy, GCDTy, ResultParts); - LLT OverflowLCMTy = - LLT::scalarOrVector(ResultLCMTy.getElementCount(), OverflowElementTy); - - // Recombine the pieces to the original result and overflow registers. - buildWidenedRemergeToDst(Result, ResultLCMTy, ResultParts); - buildWidenedRemergeToDst(Overflow, OverflowLCMTy, OverflowParts); - MI.eraseFromParent(); - return Legalized; -} - -// Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces -// a vector -// -// Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with -// undef as necessary. -// -// %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2 -// -> <2 x s16> -// -// %4:_(s16) = G_IMPLICIT_DEF -// %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1 -// %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4 -// %7:_(<2 x s16>) = G_IMPLICIT_DEF -// %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7 -// %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { Register DstReg = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(DstReg); LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); - LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); + // Requires compatible types. Otherwise user of DstReg did not perform unmerge + // that should have been artifact combined. Most likely instruction that uses + // DstReg has to do more/fewer elements legalization compatible with NarrowTy. + assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types"); + assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type"); + if (NarrowTy == SrcTy) + return UnableToLegalize; - // Break into a common type - SmallVector Parts; - for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) - extractGCDType(Parts, GCDTy, MO.getReg()); + // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use + // is for old mir tests. Since the changes to more/fewer elements it should no + // longer be possible to generate MIR like this when starting from llvm-ir + // because LCMTy approach was replaced with merge/unmerge to vector elements. + if (TypeIdx == 1) { + assert(SrcTy.isVector() && "Expected vector types"); + assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type"); + if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) || + (NarrowTy.getNumElements() >= SrcTy.getNumElements())) + return UnableToLegalize; + // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy) + // + // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy) + // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy) + // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4 + // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6 + // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8 + // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11 + + SmallVector Elts; + LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType(); + for (unsigned i = 1; i < MI.getNumOperands(); ++i) { + auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg()); + for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j) + Elts.push_back(Unmerge.getReg(j)); + } - // Build the requested new merge, padding with undef. - LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, - TargetOpcode::G_ANYEXT); + SmallVector NarrowTyElts; + unsigned NumNarrowTyElts = NarrowTy.getNumElements(); + unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts; + for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces; + ++i, Offset += NumNarrowTyElts) { + ArrayRef Pieces(&Elts[Offset], NumNarrowTyElts); + NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0)); + } - // Pack into the original result register. - buildWidenedRemergeToDst(DstReg, LCMTy, Parts); + MIRBuilder.buildMerge(DstReg, NarrowTyElts); + MI.eraseFromParent(); + return Legalized; + } + + assert(TypeIdx == 0 && "Bad type index"); + if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) || + (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0)) + return UnableToLegalize; + // This is most likely SrcTy (smaller then register size) packed in DstTy + // (larger then register size) and since merge was not combined it will be + // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy + // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy. + + // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4 + // + // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg + // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4 + // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence + SmallVector NarrowTyElts; + unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements(); + unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1; + unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts; + for (unsigned i = 0; i < NumParts; ++i) { + SmallVector Sources; + for (unsigned j = 0; j < NumElts; ++j) + Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg()); + NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Sources).getReg(0)); + } + + MIRBuilder.buildMerge(DstReg, NarrowTyElts); MI.eraseFromParent(); return Legalized; } @@ -4217,164 +4138,15 @@ LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx, return Legalized; } -LegalizerHelper::LegalizeResult -LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx, - LLT NarrowTy) { - assert(TypeIdx == 0 && "only one type index expected"); - - const unsigned Opc = MI.getOpcode(); - const int NumDefOps = MI.getNumExplicitDefs(); - const int NumSrcOps = MI.getNumOperands() - NumDefOps; - const unsigned Flags = MI.getFlags(); - const unsigned NarrowSize = NarrowTy.getSizeInBits(); - const LLT NarrowScalarTy = LLT::scalar(NarrowSize); - - assert(MI.getNumOperands() <= 4 && "expected instruction with either 1 " - "result and 1-3 sources or 2 results and " - "1-2 sources"); - - SmallVector DstRegs; - for (int I = 0; I < NumDefOps; ++I) - DstRegs.push_back(MI.getOperand(I).getReg()); - - // First of all check whether we are narrowing (changing the element type) - // or reducing the vector elements - const LLT DstTy = MRI.getType(DstRegs[0]); - const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType(); - - SmallVector ExtractedRegs[3]; - SmallVector Parts; - - // Break down all the sources into NarrowTy pieces we can operate on. This may - // involve creating merges to a wider type, padded with undef. - for (int I = 0; I != NumSrcOps; ++I) { - Register SrcReg = MI.getOperand(I + NumDefOps).getReg(); - LLT SrcTy = MRI.getType(SrcReg); - - // The type to narrow SrcReg to. For narrowing, this is a smaller scalar. - // For fewerElements, this is a smaller vector with the same element type. - LLT OpNarrowTy; - if (IsNarrow) { - OpNarrowTy = NarrowScalarTy; - - // In case of narrowing, we need to cast vectors to scalars for this to - // work properly - // FIXME: Can we do without the bitcast here if we're narrowing? - if (SrcTy.isVector()) { - SrcTy = LLT::scalar(SrcTy.getSizeInBits()); - SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0); - } - } else { - auto NarrowEC = NarrowTy.isVector() ? NarrowTy.getElementCount() - : ElementCount::getFixed(1); - OpNarrowTy = LLT::scalarOrVector(NarrowEC, SrcTy.getScalarType()); - } - - LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg); - - // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand. - buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I], - TargetOpcode::G_ANYEXT); - } - - SmallVector ResultRegs[2]; - - // Input operands for each sub-instruction. - SmallVector InputRegs(NumSrcOps, Register()); - - int NumParts = ExtractedRegs[0].size(); - const unsigned DstSize = DstTy.getSizeInBits(); - const LLT DstScalarTy = LLT::scalar(DstSize); - - // Narrowing needs to use scalar types - LLT DstLCMTy, NarrowDstTy; - if (IsNarrow) { - DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy); - NarrowDstTy = NarrowScalarTy; - } else { - DstLCMTy = getLCMType(DstTy, NarrowTy); - NarrowDstTy = NarrowTy; - } - - // We widened the source registers to satisfy merge/unmerge size - // constraints. We'll have some extra fully undef parts. - const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize; - - for (int I = 0; I != NumRealParts; ++I) { - // Emit this instruction on each of the split pieces. - for (int J = 0; J != NumSrcOps; ++J) - InputRegs[J] = ExtractedRegs[J][I]; - - MachineInstrBuilder Inst; - if (NumDefOps == 1) - Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags); - else - Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy, NarrowDstTy}, InputRegs, - Flags); - - for (int J = 0; J != NumDefOps; ++J) - ResultRegs[J].push_back(Inst.getReg(J)); - } - - // Fill out the widened result with undef instead of creating instructions - // with undef inputs. - int NumUndefParts = NumParts - NumRealParts; - if (NumUndefParts != 0) { - Register Undef = MIRBuilder.buildUndef(NarrowDstTy).getReg(0); - for (int I = 0; I != NumDefOps; ++I) - ResultRegs[I].append(NumUndefParts, Undef); - } - - // Extract the possibly padded result. Use a scratch register if we need to do - // a final bitcast, otherwise use the original result register. - Register MergeDstReg; - for (int I = 0; I != NumDefOps; ++I) { - if (IsNarrow && DstTy.isVector()) - MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy); - else - MergeDstReg = DstRegs[I]; - - buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs[I]); - - // Recast to vector if we narrowed a vector - if (IsNarrow && DstTy.isVector()) - MIRBuilder.buildBitcast(DstRegs[I], MergeDstReg); - } - - MI.eraseFromParent(); - return Legalized; -} - -LegalizerHelper::LegalizeResult -LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx, - LLT NarrowTy) { - Register DstReg = MI.getOperand(0).getReg(); - Register SrcReg = MI.getOperand(1).getReg(); - int64_t Imm = MI.getOperand(2).getImm(); - - LLT DstTy = MRI.getType(DstReg); - - SmallVector Parts; - LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg); - LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts); - - for (Register &R : Parts) - R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0); - - buildWidenedRemergeToDst(DstReg, LCMTy, Parts); - - MI.eraseFromParent(); - return Legalized; -} - LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { using namespace TargetOpcode; + GenericMachineInstr &GMI = cast(MI); + unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; switch (MI.getOpcode()) { case G_IMPLICIT_DEF: - return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy); case G_TRUNC: case G_AND: case G_OR: @@ -4439,10 +4211,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_SSUBSAT: case G_UADDSAT: case G_USUBSAT: - return reduceOperationWidth(MI, TypeIdx, NarrowTy); case G_UMULO: case G_SMULO: - return fewerElementsVectorMulo(MI, TypeIdx, NarrowTy); case G_SHL: case G_LSHR: case G_ASHR: @@ -4454,7 +4224,6 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_CTTZ_ZERO_UNDEF: case G_CTPOP: case G_FCOPYSIGN: - return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy); case G_ZEXT: case G_SEXT: case G_ANYEXT: @@ -4467,14 +4236,16 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_INTTOPTR: case G_PTRTOINT: case G_ADDRSPACE_CAST: - return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy); + return fewerElementsVectorMultiEltType(GMI, NumElts); case G_ICMP: case G_FCMP: - return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy); + return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/}); case G_SELECT: - return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); + if (MRI.getType(MI.getOperand(1).getReg()).isVector()) + return fewerElementsVectorMultiEltType(GMI, NumElts); + return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/}); case G_PHI: - return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy); + return fewerElementsVectorPhi(GMI, NumElts); case G_UNMERGE_VALUES: return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy); case G_BUILD_VECTOR: @@ -4491,7 +4262,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_STORE: return reduceLoadStoreWidth(cast(MI), TypeIdx, NarrowTy); case G_SEXT_INREG: - return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy); + return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/}); GISEL_VECREDUCE_CASES_NONSEQ return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy); case G_SHUFFLE_VECTOR: @@ -5053,6 +4824,15 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, case TargetOpcode::G_AND: case TargetOpcode::G_OR: case TargetOpcode::G_XOR: + case TargetOpcode::G_ADD: + case TargetOpcode::G_SUB: + case TargetOpcode::G_MUL: + case TargetOpcode::G_FADD: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_UADDSAT: + case TargetOpcode::G_USUBSAT: + case TargetOpcode::G_SADDSAT: + case TargetOpcode::G_SSUBSAT: case TargetOpcode::G_SMIN: case TargetOpcode::G_SMAX: case TargetOpcode::G_UMIN: @@ -5070,6 +4850,17 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_FMA: + case TargetOpcode::G_FSHR: + case TargetOpcode::G_FSHL: { + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 1); + moreElementsVectorSrc(MI, MoreTy, 2); + moreElementsVectorSrc(MI, MoreTy, 3); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; + } case TargetOpcode::G_EXTRACT: if (TypeIdx != 1) return UnableToLegalize; @@ -5079,6 +4870,11 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, return Legalized; case TargetOpcode::G_INSERT: case TargetOpcode::G_FREEZE: + case TargetOpcode::G_FNEG: + case TargetOpcode::G_FABS: + case TargetOpcode::G_BSWAP: + case TargetOpcode::G_FCANONICALIZE: + case TargetOpcode::G_SEXT_INREG: if (TypeIdx != 0) return UnableToLegalize; Observer.changingInstr(MI); @@ -5098,30 +4894,34 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, moreElementsVectorDst(MI, MoreTy, 0); Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_UNMERGE_VALUES: { - if (TypeIdx != 1) - return UnableToLegalize; - - LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); - int NumDst = MI.getNumOperands() - 1; - moreElementsVectorSrc(MI, MoreTy, NumDst); - - auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); - for (int I = 0; I != NumDst; ++I) - MIB.addDef(MI.getOperand(I).getReg()); + case TargetOpcode::G_UNMERGE_VALUES: + return UnableToLegalize; + case TargetOpcode::G_PHI: + return moreElementsVectorPhi(MI, TypeIdx, MoreTy); + case TargetOpcode::G_SHUFFLE_VECTOR: + return moreElementsVectorShuffle(MI, TypeIdx, MoreTy); + case TargetOpcode::G_BUILD_VECTOR: { + SmallVector Elts; + for (auto Op : MI.uses()) { + Elts.push_back(Op.getReg()); + } - int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits(); - for (int I = NumDst; I != NewNumDst; ++I) - MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); + for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) { + Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType())); + } - MIB.addUse(MI.getOperand(NumDst).getReg()); + MIRBuilder.buildDeleteTrailingVectorElements( + MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts)); MI.eraseFromParent(); return Legalized; } - case TargetOpcode::G_PHI: - return moreElementsVectorPhi(MI, TypeIdx, MoreTy); - case TargetOpcode::G_SHUFFLE_VECTOR: - return moreElementsVectorShuffle(MI, TypeIdx, MoreTy); + case TargetOpcode::G_TRUNC: { + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 1); + moreElementsVectorDst(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; + } default: return UnableToLegalize; } @@ -6778,6 +6578,24 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) { LLT VecTy = MRI.getType(SrcVec); LLT EltTy = VecTy.getElementType(); + unsigned NumElts = VecTy.getNumElements(); + + int64_t IdxVal; + if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) { + SmallVector SrcRegs; + extractParts(SrcVec, EltTy, NumElts, SrcRegs); + + if (InsertVal) { + SrcRegs[IdxVal] = MI.getOperand(2).getReg(); + MIRBuilder.buildMerge(DstReg, SrcRegs); + } else { + MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]); + } + + MI.eraseFromParent(); + return Legalized; + } + if (!EltTy.isByteSized()) { // Not implemented. LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n"); return UnableToLegalize; @@ -6796,7 +6614,6 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) { // if the index is out of bounds. Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx); - int64_t IdxVal; if (mi_match(Idx, MRI, m_ICst(IdxVal))) { int64_t Offset = IdxVal * EltBytes; PtrInfo = PtrInfo.getWithOffset(Offset); @@ -6923,6 +6740,32 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) { LLT DstTy = MRI.getType(Dst); LLT SrcTy = MRI.getType(Src); + // Extract sub-vector or one element + if (SrcTy.isVector()) { + unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits(); + unsigned DstSize = DstTy.getSizeInBits(); + + if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) && + (Offset + DstSize <= SrcTy.getSizeInBits())) { + // Unmerge and allow access to each Src element for the artifact combiner. + auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), Src); + + // Take element(s) we need to extract and copy it (merge them). + SmallVector SubVectorElts; + for (unsigned Idx = Offset / SrcEltSize; + Idx < (Offset + DstSize) / SrcEltSize; ++Idx) { + SubVectorElts.push_back(Unmerge.getReg(Idx)); + } + if (SubVectorElts.size() == 1) + MIRBuilder.buildCopy(Dst, SubVectorElts[0]); + else + MIRBuilder.buildMerge(Dst, SubVectorElts); + + MI.eraseFromParent(); + return Legalized; + } + } + if (DstTy.isScalar() && (SrcTy.isScalar() || (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) { @@ -6956,6 +6799,45 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) { LLT DstTy = MRI.getType(Src); LLT InsertTy = MRI.getType(InsertSrc); + // Insert sub-vector or one element + if (DstTy.isVector() && !InsertTy.isPointer()) { + LLT EltTy = DstTy.getElementType(); + unsigned EltSize = EltTy.getSizeInBits(); + unsigned InsertSize = InsertTy.getSizeInBits(); + + if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) && + (Offset + InsertSize <= DstTy.getSizeInBits())) { + auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src); + SmallVector DstElts; + unsigned Idx = 0; + // Elements from Src before insert start Offset + for (; Idx < Offset / EltSize; ++Idx) { + DstElts.push_back(UnmergeSrc.getReg(Idx)); + } + + // Replace elements in Src with elements from InsertSrc + if (InsertTy.getSizeInBits() > EltSize) { + auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc); + for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize; + ++Idx, ++i) { + DstElts.push_back(UnmergeInsertSrc.getReg(i)); + } + } else { + DstElts.push_back(InsertSrc); + ++Idx; + } + + // Remaining elements from Src after insert + for (; Idx < DstTy.getNumElements(); ++Idx) { + DstElts.push_back(UnmergeSrc.getReg(Idx)); + } + + MIRBuilder.buildMerge(Dst, DstElts); + MI.eraseFromParent(); + return Legalized; + } + } + if (InsertTy.isVector() || (DstTy.isVector() && DstTy.getElementType() != InsertTy)) return UnableToLegalize; diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index fb5ed35c1f72..391251886fbb 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -215,6 +215,48 @@ MachineInstrBuilder MachineIRBuilder::buildMaskLowPtrBits(const DstOp &Res, return buildPtrMask(Res, Op0, MaskReg); } +MachineInstrBuilder +MachineIRBuilder::buildPadVectorWithUndefElements(const DstOp &Res, + const SrcOp &Op0) { + LLT ResTy = Res.getLLTTy(*getMRI()); + LLT Op0Ty = Op0.getLLTTy(*getMRI()); + + assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type"); + assert((ResTy.getElementType() == Op0Ty.getElementType()) && + "Different vector element types"); + assert((ResTy.getNumElements() > Op0Ty.getNumElements()) && + "Op0 has more elements"); + + auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0); + SmallVector Regs; + for (auto Op : Unmerge.getInstr()->defs()) + Regs.push_back(Op.getReg()); + Register Undef = buildUndef(Op0Ty.getElementType()).getReg(0); + unsigned NumberOfPadElts = ResTy.getNumElements() - Regs.size(); + for (unsigned i = 0; i < NumberOfPadElts; ++i) + Regs.push_back(Undef); + return buildMerge(Res, Regs); +} + +MachineInstrBuilder +MachineIRBuilder::buildDeleteTrailingVectorElements(const DstOp &Res, + const SrcOp &Op0) { + LLT ResTy = Res.getLLTTy(*getMRI()); + LLT Op0Ty = Op0.getLLTTy(*getMRI()); + + assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type"); + assert((ResTy.getElementType() == Op0Ty.getElementType()) && + "Different vector element types"); + assert((ResTy.getNumElements() < Op0Ty.getNumElements()) && + "Op0 has fewer elements"); + + SmallVector Regs; + auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0); + for (unsigned i = 0; i < ResTy.getNumElements(); ++i) + Regs.push_back(Unmerge.getReg(i)); + return buildMerge(Res, Regs); +} + MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) { return buildInstr(TargetOpcode::G_BR).addMBB(&Dest); } @@ -613,10 +655,8 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef Res, MachineInstrBuilder MachineIRBuilder::buildUnmerge(LLT Res, const SrcOp &Op) { unsigned NumReg = Op.getLLTTy(*getMRI()).getSizeInBits() / Res.getSizeInBits(); - SmallVector TmpVec; - for (unsigned I = 0; I != NumReg; ++I) - TmpVec.push_back(getMRI()->createGenericVirtualRegister(Res)); - return buildUnmerge(TmpVec, Op); + SmallVector TmpVec(NumReg, Res); + return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op); } MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef Res, diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp index 00ee2988d17f..4981a537dc7c 100644 --- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp @@ -923,6 +923,21 @@ LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) { return LLT::scalar(LCMSize); } +LLT llvm::getCoverTy(LLT OrigTy, LLT TargetTy) { + if (!OrigTy.isVector() || !TargetTy.isVector() || OrigTy == TargetTy || + (OrigTy.getScalarSizeInBits() != TargetTy.getScalarSizeInBits())) + return getLCMType(OrigTy, TargetTy); + + unsigned OrigTyNumElts = OrigTy.getNumElements(); + unsigned TargetTyNumElts = TargetTy.getNumElements(); + if (OrigTyNumElts % TargetTyNumElts == 0) + return OrigTy; + + unsigned NumElts = alignTo(OrigTyNumElts, TargetTyNumElts); + return LLT::scalarOrVector(ElementCount::getFixed(NumElts), + OrigTy.getElementType()); +} + LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) { const unsigned OrigSize = OrigTy.getSizeInBits(); const unsigned TargetSize = TargetTy.getSizeInBits(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 7fd1aa3044fe..5046daaed977 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -533,7 +533,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL}) .legalFor({S32, S16, V2S16}) .minScalar(0, S16) - .clampMaxNumElements(0, S16, 2) + .clampMaxNumElementsStrict(0, S16, 2) .widenScalarToNextMultipleOf(0, 32) .maxScalar(0, S32) .scalarize(0); @@ -541,7 +541,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT, G_SADDSAT, G_SSUBSAT}) .legalFor({S32, S16, V2S16}) // Clamp modifier .minScalarOrElt(0, S16) - .clampMaxNumElements(0, S16, 2) + .clampMaxNumElementsStrict(0, S16, 2) .scalarize(0) .widenScalarToNextPow2(0, 32) .lower(); @@ -712,7 +712,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, } if (ST.hasVOP3PInsts()) - FPOpActions.clampMaxNumElements(0, S16, 2); + FPOpActions.clampMaxNumElementsStrict(0, S16, 2); FPOpActions .scalarize(0) @@ -728,7 +728,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, getActionDefinitionsBuilder({G_FNEG, G_FABS}) .legalFor(FPTypesPK16) - .clampMaxNumElements(0, S16, 2) + .clampMaxNumElementsStrict(0, S16, 2) .scalarize(0) .clampScalar(0, S16, S64); @@ -965,7 +965,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, if (ST.has16BitInsts()) { getActionDefinitionsBuilder(G_BSWAP) .legalFor({S16, S32, V2S16}) - .clampMaxNumElements(0, S16, 2) + .clampMaxNumElementsStrict(0, S16, 2) // FIXME: Fixing non-power-of-2 before clamp is workaround for // narrowScalar limitation. .widenScalarToNextPow2(0) @@ -1425,6 +1425,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // FIXME: Doesn't handle extract of illegal sizes. getActionDefinitionsBuilder(Op) .lowerIf(all(typeIs(LitTyIdx, S16), sizeIs(BigTyIdx, 32))) + .lowerIf([=](const LegalityQuery &Query) { + // Sub-vector(or single element) insert and extract. + // TODO: verify immediate offset here since lower only works with + // whole elements. + const LLT BigTy = Query.Types[BigTyIdx]; + return BigTy.isVector(); + }) // FIXME: Multiples of 16 should not be legal. .legalIf([=](const LegalityQuery &Query) { const LLT BigTy = Query.Types[BigTyIdx]; @@ -1583,7 +1590,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // Prefer to reduce vector widths for 16-bit vectors before lowering, to // get more vector shift opportunities, since we'll get those when // expanded. - .fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16)); + .clampMaxNumElementsStrict(0, S16, 2); } else if (ST.has16BitInsts()) { SextInReg.lowerFor({{S32}, {S64}, {S16}}); } else { @@ -1605,14 +1612,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, getActionDefinitionsBuilder(G_FSHR) .legalFor({{S32, S32}}) .lowerFor({{V2S16, V2S16}}) - .fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16)) + .clampMaxNumElementsStrict(0, S16, 2) .scalarize(0) .lower(); if (ST.hasVOP3PInsts()) { getActionDefinitionsBuilder(G_FSHL) .lowerFor({{V2S16, V2S16}}) - .fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16)) + .clampMaxNumElementsStrict(0, S16, 2) .scalarize(0) .lower(); } else { @@ -2535,10 +2542,8 @@ bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper, } else { // For cases where the widened type isn't a nice register value, unmerge // from a widened register (e.g. <3 x s16> -> <4 x s16>) - B.setInsertPt(B.getMBB(), ++B.getInsertPt()); - WideLoad = Helper.widenWithUnmerge(WideTy, ValReg); - B.setInsertPt(B.getMBB(), MI.getIterator()); - B.buildLoadFromOffset(WideLoad, PtrReg, *MMO, 0); + WideLoad = B.buildLoadFromOffset(WideTy, PtrReg, *MMO, 0).getReg(0); + B.buildDeleteTrailingVectorElements(ValReg, WideLoad); } } @@ -3811,6 +3816,10 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B, llvm_unreachable("invalid data type"); } + if (StoreVT == LLT::fixed_vector(3, S16)) { + Reg = B.buildPadVectorWithUndefElements(LLT::fixed_vector(4, S16), Reg) + .getReg(0); + } return Reg; } @@ -4653,9 +4662,23 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( // Deal with the one annoying legal case. const LLT V3S16 = LLT::fixed_vector(3, 16); if (Ty == V3S16) { - padWithUndef(ResTy, RegsToCover - ResultRegs.size() + 1); - auto Concat = B.buildConcatVectors(LLT::fixed_vector(6, 16), ResultRegs); - B.buildUnmerge({DstReg, MRI->createGenericVirtualRegister(V3S16)}, Concat); + if (IsTFE) { + if (ResultRegs.size() == 1) { + NewResultReg = ResultRegs[0]; + } else if (ResultRegs.size() == 2) { + LLT V4S16 = LLT::fixed_vector(4, 16); + NewResultReg = B.buildConcatVectors(V4S16, ResultRegs).getReg(0); + } else { + return false; + } + } + + if (MRI->getType(DstReg).getNumElements() < + MRI->getType(NewResultReg).getNumElements()) { + B.buildDeleteTrailingVectorElements(DstReg, NewResultReg); + } else { + B.buildPadVectorWithUndefElements(DstReg, NewResultReg); + } return true; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 2bb77d9de0b2..c60012bcfe2e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1162,18 +1162,25 @@ bool AMDGPURegisterBankInfo::applyMappingLoad(MachineInstr &MI, // 96-bit loads are only available for vector loads. We need to split this // into a 64-bit part, and 32 (unless we can widen to a 128-bit load). if (MMO->getAlign() < Align(16)) { + MachineFunction *MF = MI.getParent()->getParent(); + ApplyRegBankMapping ApplyBank(*this, MRI, DstBank); + MachineIRBuilder B(MI, ApplyBank); + LegalizerHelper Helper(*MF, ApplyBank, B); LLT Part64, Part32; std::tie(Part64, Part32) = splitUnequalType(LoadTy, 64); - auto Load0 = B.buildLoadFromOffset(Part64, PtrReg, *MMO, 0); - auto Load1 = B.buildLoadFromOffset(Part32, PtrReg, *MMO, 8); - - auto Undef = B.buildUndef(LoadTy); - auto Ins0 = B.buildInsert(LoadTy, Undef, Load0, 0); - B.buildInsert(MI.getOperand(0), Ins0, Load1, 64); + if (Helper.reduceLoadStoreWidth(cast(MI), 0, Part64) != + LegalizerHelper::Legalized) + return false; + return true; } else { LLT WiderTy = widen96To128(LoadTy); auto WideLoad = B.buildLoadFromOffset(WiderTy, PtrReg, *MMO, 0); - B.buildExtract(MI.getOperand(0), WideLoad, 0); + if (WiderTy.isScalar()) + B.buildTrunc(MI.getOperand(0), WideLoad); + else { + B.buildDeleteTrailingVectorElements(MI.getOperand(0).getReg(), + WideLoad); + } } } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll index 2da155797fa9..13a6fe72c2f0 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -12,14 +12,6 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "aarch64--" ; BIG-ENDIAN: unable to translate in big endian mode -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(<28 x s32>) = G_CONCAT_VECTORS %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>) (in function: odd_vector) -; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_vector -; FALLBACK-WITH-REPORT-OUT-LABEL: odd_vector: -define void @odd_vector(<7 x i32>* %addr) { - %vec = load <7 x i32>, <7 x i32>* %addr - store <7 x i32> %vec, <7 x i32>* %addr - ret void -} ; Make sure we don't mess up metadata arguments. declare void @llvm.write_register.i64(metadata, i64) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll index 988de92eeacb..6c2d5e6967ba 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll @@ -58,9 +58,8 @@ define void @test_return_v3f32() { ; CHECK-NEXT: BL @bar, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $s0, implicit-def $q0 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[BITCAST]](<4 x s32>), [[DEF1]](<4 x s32>), [[DEF1]](<4 x s32>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; CHECK-NEXT: RET_ReallyLR %call = call <3 x float> @bar(float undef) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir index 860b5f75b1f8..cfce6fd39022 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir @@ -229,11 +229,10 @@ body: | ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) - ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8) ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8) ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[DEF]](s8) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV20]](s8) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV16]](s8) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV2]](s32), [[MV3]](s32) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir index 6ea81e73fc7e..bf3619c4210f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir @@ -244,35 +244,37 @@ body: | ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $d3 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $d4 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY $d5 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY3]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY4]](s64), [[COPY5]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY $d6 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s64) = COPY $d7 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.2, align 16) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s64) from %fixed-stack.3) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY2]](s64), [[COPY3]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY4]](s64), [[COPY5]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64) ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY6]](s64), [[COPY7]](s64) ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64) - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s64>), [[C]](s64) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR1]](<2 x s64>), [[C1]](s64) ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR4]](<2 x s64>), [[C]](s64) ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR2]](<2 x s64>), [[C1]](s64) + ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[SHUF]](<2 x s64>) ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC]](s64), [[EVEC1]](s64) ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[EVEC2]](s64), [[EVEC3]](s64) - ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR3]](<2 x s64>), [[BUILD_VECTOR5]], shufflemask(1, 3) + ; CHECK-NEXT: [[BUILD_VECTOR8:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR6]](<2 x s64>), [[COPY8]](p0) :: (store (<2 x s64>), align 64) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C2]](s64) ; CHECK-NEXT: G_STORE [[BUILD_VECTOR7]](<2 x s64>), [[PTR_ADD]](p0) :: (store (<2 x s64>) into unknown-address + 16) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY8]], [[C3]](s64) - ; CHECK-NEXT: G_STORE [[SHUF]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into unknown-address + 32, align 32) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR8]](<2 x s64>), [[PTR_ADD1]](p0) :: (store (<2 x s64>) into unknown-address + 32, align 32) ; CHECK-NEXT: RET_ReallyLR %3:_(s64) = COPY $d0 %4:_(s64) = COPY $d1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir index 6c5c2b783a49..10d365f993b2 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir @@ -229,11 +229,10 @@ body: | ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) - ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[DEF1]](s32) ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8), [[UV6]](s8), [[UV7]](s8) ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8) ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV12]](s8), [[UV13]](s8), [[UV14]](s8), [[DEF]](s8) - ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV20]](s8) + ; CHECK-NEXT: [[MV3:%[0-9]+]]:_(s32) = G_MERGE_VALUES [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV16]](s8) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 ; CHECK-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV]](s32), [[MV1]](s32) ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MV2]](s32), [[MV3]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir index b7ec21b576af..336521199709 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-extract.mir @@ -326,8 +326,13 @@ body: | ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[BUILD_VECTOR]](<2 x s64>), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<2 x s64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BITCAST]](s128) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C2]](s32) + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64) %0:_(s64) = G_CONSTANT i64 0 %1:_(s64) = G_CONSTANT i64 1 %2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir index 71eee774acbe..b0fb33b34061 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir @@ -74,17 +74,13 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[DEF1]](<2 x s32>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<3 x s32>), [[UV4:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV3]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[AND1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>) = G_TRUNC %0 %2:_(<3 x s32>) = G_ZEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir index f7bf2b25e95b..2ec0cb5a66e1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir @@ -30,12 +30,14 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4) - ; GFX10-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96 - ; GFX10-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64 - ; GFX10-NEXT: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; GFX10-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: G_STORE [[EXTRACT]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) + ; GFX10-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) %0:_(p5) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -62,12 +64,14 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4) - ; GFX10-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96 - ; GFX10-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64 - ; GFX10-NEXT: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV6]](s32) + ; GFX10-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10-NEXT: G_STORE [[EXTRACT]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) + ; GFX10-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) %0:_(p5) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll index 657184620fa6..a407de876bbf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll @@ -628,23 +628,25 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha ; GFX9-NEXT: v_lshlrev_b32_e32 v6, 16, v6 ; GFX9-NEXT: v_and_or_b32 v0, v0, v9, v6 ; GFX9-NEXT: v_lshlrev_b32_e32 v6, 16, v7 -; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v4 -; GFX9-NEXT: s_lshl_b32 s4, s4, 16 ; GFX9-NEXT: v_and_or_b32 v2, v2, v9, v6 +; GFX9-NEXT: s_lshl_b32 s4, s4, 16 +; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v4 ; GFX9-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-NEXT: v_and_or_b32 v3, v3, v9, s4 -; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2 -; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v8 +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3 -; GFX9-NEXT: v_and_or_b32 v2, v4, v9, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v0 -; GFX9-NEXT: v_and_or_b32 v3, v5, v9, s4 -; GFX9-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v8 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-NEXT: v_and_or_b32 v3, v4, v9, v3 +; GFX9-NEXT: v_and_or_b32 v0, v0, v9, v2 +; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 +; GFX9-NEXT: v_and_or_b32 v4, v5, v9, s4 ; GFX9-NEXT: v_and_or_b32 v1, v1, v9, s4 -; GFX9-NEXT: v_and_or_b32 v0, v0, v9, v4 -; GFX9-NEXT: v_pk_add_f16 v1, v3, v1 -; GFX9-NEXT: s_lshl_b32 s4, s4, 16 -; GFX9-NEXT: v_pk_add_f16 v0, v2, v0 +; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX9-NEXT: v_pk_add_f16 v1, v4, v1 +; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-NEXT: v_and_or_b32 v0, v0, v9, v2 ; GFX9-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; @@ -657,18 +659,20 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha ; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 16, v6 ; GFX9-CONTRACT-NEXT: v_lshrrev_b32_e32 v8, 16, v4 ; GFX9-CONTRACT-NEXT: v_and_or_b32 v0, v0, v9, v6 -; GFX9-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16 ; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 16, v7 -; GFX9-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-CONTRACT-NEXT: v_and_or_b32 v2, v2, v9, v6 -; GFX9-CONTRACT-NEXT: v_and_or_b32 v3, v3, v9, s4 ; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 16, v8 -; GFX9-CONTRACT-NEXT: v_and_or_b32 v5, v5, v9, s4 ; GFX9-CONTRACT-NEXT: v_and_or_b32 v4, v4, v9, v6 -; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 ; GFX9-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16 ; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 ; GFX9-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX9-CONTRACT-NEXT: v_and_or_b32 v3, v3, v9, s4 +; GFX9-CONTRACT-NEXT: v_and_or_b32 v5, v5, v9, s4 +; GFX9-CONTRACT-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-CONTRACT-NEXT: v_and_or_b32 v0, v0, v9, v2 +; GFX9-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-DENORM-LABEL: test_3xhalf_add_mul_rhs: @@ -680,23 +684,25 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha ; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v6 ; GFX9-DENORM-NEXT: v_and_or_b32 v0, v0, v9, v6 ; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v7 -; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v8, 16, v4 -; GFX9-DENORM-NEXT: s_lshl_b32 s4, s4, 16 ; GFX9-DENORM-NEXT: v_and_or_b32 v2, v2, v9, v6 +; GFX9-DENORM-NEXT: s_lshl_b32 s4, s4, 16 +; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v8, 16, v4 ; GFX9-DENORM-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-DENORM-NEXT: v_and_or_b32 v3, v3, v9, s4 -; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 -; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v2, 16, v8 +; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 -; GFX9-DENORM-NEXT: v_and_or_b32 v2, v4, v9, v2 -; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v4, 16, v0 -; GFX9-DENORM-NEXT: v_and_or_b32 v3, v5, v9, s4 -; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v3, 16, v8 +; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-DENORM-NEXT: v_and_or_b32 v3, v4, v9, v3 +; GFX9-DENORM-NEXT: v_and_or_b32 v0, v0, v9, v2 +; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v3, v0 +; GFX9-DENORM-NEXT: v_and_or_b32 v4, v5, v9, s4 ; GFX9-DENORM-NEXT: v_and_or_b32 v1, v1, v9, s4 -; GFX9-DENORM-NEXT: v_and_or_b32 v0, v0, v9, v4 -; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v3, v1 -; GFX9-DENORM-NEXT: s_lshl_b32 s4, s4, 16 -; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v2, v0 +; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v4, v1 +; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-DENORM-NEXT: v_and_or_b32 v0, v0, v9, v2 ; GFX9-DENORM-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; @@ -709,18 +715,20 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha ; GFX9-UNSAFE-NEXT: v_lshlrev_b32_e32 v6, 16, v6 ; GFX9-UNSAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4 ; GFX9-UNSAFE-NEXT: v_and_or_b32 v0, v0, v9, v6 -; GFX9-UNSAFE-NEXT: s_lshl_b32 s4, s4, 16 ; GFX9-UNSAFE-NEXT: v_lshlrev_b32_e32 v6, 16, v7 -; GFX9-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-UNSAFE-NEXT: v_and_or_b32 v2, v2, v9, v6 -; GFX9-UNSAFE-NEXT: v_and_or_b32 v3, v3, v9, s4 ; GFX9-UNSAFE-NEXT: v_lshlrev_b32_e32 v6, 16, v8 -; GFX9-UNSAFE-NEXT: v_and_or_b32 v5, v5, v9, s4 ; GFX9-UNSAFE-NEXT: v_and_or_b32 v4, v4, v9, v6 -; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 ; GFX9-UNSAFE-NEXT: s_lshl_b32 s4, s4, 16 ; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4 ; GFX9-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX9-UNSAFE-NEXT: v_and_or_b32 v3, v3, v9, s4 +; GFX9-UNSAFE-NEXT: v_and_or_b32 v5, v5, v9, s4 +; GFX9-UNSAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX9-UNSAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX9-UNSAFE-NEXT: v_and_or_b32 v0, v0, v9, v2 +; GFX9-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4 ; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_3xhalf_add_mul_rhs: @@ -734,23 +742,25 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha ; GFX10-NEXT: v_lshlrev_b32_e32 v6, 16, v6 ; GFX10-NEXT: v_lshlrev_b32_e32 v7, 16, v7 ; GFX10-NEXT: v_and_or_b32 v1, v1, v8, s4 +; GFX10-NEXT: v_and_or_b32 v3, v3, v8, s4 ; GFX10-NEXT: v_and_or_b32 v0, v0, v8, v6 ; GFX10-NEXT: v_and_or_b32 v2, v2, v8, v7 +; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2 -; GFX10-NEXT: v_and_or_b32 v2, v3, v8, s4 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v4 +; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v4 +; GFX10-NEXT: v_and_or_b32 v1, v1, v8, s4 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 -; GFX10-NEXT: v_pk_mul_f16 v1, v1, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX10-NEXT: v_lshlrev_b32_e32 v6, 16, v6 +; GFX10-NEXT: v_and_or_b32 v2, v4, v8, v2 +; GFX10-NEXT: v_and_or_b32 v0, v0, v8, v6 +; GFX10-NEXT: v_pk_add_f16 v0, v2, v0 ; GFX10-NEXT: v_and_or_b32 v2, v5, v8, s4 -; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v6 -; GFX10-NEXT: v_and_or_b32 v1, v1, v8, s4 -; GFX10-NEXT: s_lshl_b32 s4, s4, 16 -; GFX10-NEXT: v_and_or_b32 v3, v4, v8, v3 -; GFX10-NEXT: v_and_or_b32 v0, v0, v8, v5 +; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX10-NEXT: v_pk_add_f16 v1, v2, v1 -; GFX10-NEXT: v_pk_add_f16 v0, v3, v0 +; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX10-NEXT: v_and_or_b32 v1, v1, v8, s4 +; GFX10-NEXT: v_and_or_b32 v0, v0, v8, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-CONTRACT-LABEL: test_3xhalf_add_mul_rhs: @@ -758,23 +768,25 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha ; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v6, 16, v0 -; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v7, 0xffff -; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v8, 16, v2 -; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v9, 16, v4 +; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v7, 16, v2 +; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v8, 16, v4 +; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v9, 0xffff ; GFX10-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16 ; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 16, v6 -; GFX10-CONTRACT-NEXT: v_and_or_b32 v1, v1, v7, s4 +; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v7, 16, v7 ; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v8, 16, v8 -; GFX10-CONTRACT-NEXT: v_and_or_b32 v3, v3, v7, s4 -; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX10-CONTRACT-NEXT: v_and_or_b32 v5, v5, v7, s4 -; GFX10-CONTRACT-NEXT: v_and_or_b32 v0, v0, v7, v6 -; GFX10-CONTRACT-NEXT: v_and_or_b32 v2, v2, v7, v8 -; GFX10-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16 -; GFX10-CONTRACT-NEXT: v_and_or_b32 v4, v4, v7, v9 -; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v0, v0, v9, v6 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v2, v2, v9, v7 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v4, v4, v9, v8 ; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 -; GFX10-CONTRACT-NEXT: v_and_or_b32 v1, v1, v7, s4 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v2, v3, v9, s4 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v4, v5, v9, s4 +; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v2, v4 +; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX10-CONTRACT-NEXT: v_and_or_b32 v0, v0, v9, v3 ; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-DENORM-LABEL: test_3xhalf_add_mul_rhs: @@ -788,23 +800,25 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha ; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v6 ; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v7, 16, v7 ; GFX10-DENORM-NEXT: v_and_or_b32 v1, v1, v8, s4 +; GFX10-DENORM-NEXT: v_and_or_b32 v3, v3, v8, s4 ; GFX10-DENORM-NEXT: v_and_or_b32 v0, v0, v8, v6 ; GFX10-DENORM-NEXT: v_and_or_b32 v2, v2, v8, v7 +; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 ; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 -; GFX10-DENORM-NEXT: v_and_or_b32 v2, v3, v8, s4 -; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v3, 16, v4 +; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v4 +; GFX10-DENORM-NEXT: v_and_or_b32 v1, v1, v8, s4 ; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v6, 16, v0 -; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v2 +; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v6 +; GFX10-DENORM-NEXT: v_and_or_b32 v2, v4, v8, v2 +; GFX10-DENORM-NEXT: v_and_or_b32 v0, v0, v8, v6 +; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v2, v0 ; GFX10-DENORM-NEXT: v_and_or_b32 v2, v5, v8, s4 -; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v5, 16, v6 -; GFX10-DENORM-NEXT: v_and_or_b32 v1, v1, v8, s4 -; GFX10-DENORM-NEXT: s_lshl_b32 s4, s4, 16 -; GFX10-DENORM-NEXT: v_and_or_b32 v3, v4, v8, v3 -; GFX10-DENORM-NEXT: v_and_or_b32 v0, v0, v8, v5 +; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v3, 16, v0 ; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v2, v1 -; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v3, v0 +; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX10-DENORM-NEXT: v_and_or_b32 v1, v1, v8, s4 +; GFX10-DENORM-NEXT: v_and_or_b32 v0, v0, v8, v3 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-UNSAFE-LABEL: test_3xhalf_add_mul_rhs: @@ -812,23 +826,25 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v0 -; GFX10-UNSAFE-NEXT: v_mov_b32_e32 v7, 0xffff -; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v2 -; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v4 +; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v2 +; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4 +; GFX10-UNSAFE-NEXT: v_mov_b32_e32 v9, 0xffff ; GFX10-UNSAFE-NEXT: s_lshl_b32 s4, s4, 16 ; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v6, 16, v6 -; GFX10-UNSAFE-NEXT: v_and_or_b32 v1, v1, v7, s4 +; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v7, 16, v7 ; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v8, 16, v8 -; GFX10-UNSAFE-NEXT: v_and_or_b32 v3, v3, v7, s4 -; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GFX10-UNSAFE-NEXT: v_and_or_b32 v5, v5, v7, s4 -; GFX10-UNSAFE-NEXT: v_and_or_b32 v0, v0, v7, v6 -; GFX10-UNSAFE-NEXT: v_and_or_b32 v2, v2, v7, v8 -; GFX10-UNSAFE-NEXT: s_lshl_b32 s4, s4, 16 -; GFX10-UNSAFE-NEXT: v_and_or_b32 v4, v4, v7, v9 -; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v0, v0, v9, v6 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v2, v2, v9, v7 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v4, v4, v9, v8 ; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4 -; GFX10-UNSAFE-NEXT: v_and_or_b32 v1, v1, v7, s4 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v2, v3, v9, s4 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v4, v5, v9, s4 +; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v2, v4 +; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4 +; GFX10-UNSAFE-NEXT: v_and_or_b32 v0, v0, v9, v3 ; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] .entry: %a = fmul <3 x half> %x, %y diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll index 147d60fdac64..122b5e65df6e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll @@ -888,11 +888,12 @@ define <3 x i16> @v3i16_func_void() #0 { ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef @@ -942,12 +943,13 @@ define <5 x i16> @v5i16_func_void() #0 { ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile load (p1) from `<5 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s16>) from %ir.ptr, align 16, addrspace 1) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD1]](<5 x s16>), [[DEF1]](<5 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD1]](<5 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<6 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV5]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV6]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV7]](<2 x s16>) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll index 3d4acd79132d..e78d959f0af5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll @@ -14,9 +14,9 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in, ; GCN-NEXT: global_load_dwordx4 v[44:47], v64, s[0:1] offset:48 ; GCN-NEXT: global_load_dwordx4 v[48:51], v64, s[0:1] offset:64 ; GCN-NEXT: global_load_dwordx4 v[52:55], v64, s[0:1] offset:80 +; GCN-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] offset:128 ; GCN-NEXT: global_load_dwordx4 v[56:59], v64, s[0:1] offset:96 ; GCN-NEXT: global_load_dwordx4 v[60:63], v64, s[0:1] offset:112 -; GCN-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] offset:128 ; GCN-NEXT: global_load_dwordx4 v[4:7], v64, s[0:1] offset:144 ; GCN-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:160 ; GCN-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:176 @@ -38,8 +38,6 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in, ; GCN-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:208 ; GCN-NEXT: s_waitcnt vmcnt(7) ; GCN-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:224 -; GCN-NEXT: s_waitcnt vmcnt(7) -; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:240 ; GCN-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] ; GCN-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:16 ; GCN-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:32 @@ -48,6 +46,8 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in, ; GCN-NEXT: global_store_dwordx4 v64, v[52:55], s[2:3] offset:80 ; GCN-NEXT: global_store_dwordx4 v64, v[56:59], s[2:3] offset:96 ; GCN-NEXT: global_store_dwordx4 v64, v[60:63], s[2:3] offset:112 +; GCN-NEXT: s_waitcnt vmcnt(15) +; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:240 ; GCN-NEXT: s_endpgm ; ; GFX10-LABEL: v_insert_v64i32_37: @@ -58,13 +58,13 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in, ; GFX10-NEXT: s_clause 0xf ; GFX10-NEXT: global_load_dwordx4 v[32:35], v64, s[0:1] ; GFX10-NEXT: global_load_dwordx4 v[36:39], v64, s[0:1] offset:16 +; GFX10-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] offset:128 ; GFX10-NEXT: global_load_dwordx4 v[40:43], v64, s[0:1] offset:32 ; GFX10-NEXT: global_load_dwordx4 v[44:47], v64, s[0:1] offset:48 ; GFX10-NEXT: global_load_dwordx4 v[48:51], v64, s[0:1] offset:64 ; GFX10-NEXT: global_load_dwordx4 v[52:55], v64, s[0:1] offset:80 ; GFX10-NEXT: global_load_dwordx4 v[56:59], v64, s[0:1] offset:96 ; GFX10-NEXT: global_load_dwordx4 v[60:63], v64, s[0:1] offset:112 -; GFX10-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] offset:128 ; GFX10-NEXT: global_load_dwordx4 v[4:7], v64, s[0:1] offset:144 ; GFX10-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:160 ; GFX10-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:176 @@ -84,8 +84,6 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in, ; GFX10-NEXT: global_store_dwordx4 v64, v[16:19], s[2:3] offset:192 ; GFX10-NEXT: s_waitcnt vmcnt(2) ; GFX10-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:208 -; GFX10-NEXT: s_waitcnt vmcnt(1) -; GFX10-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:224 ; GFX10-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3] ; GFX10-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:16 ; GFX10-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:32 @@ -94,6 +92,8 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in, ; GFX10-NEXT: global_store_dwordx4 v64, v[52:55], s[2:3] offset:80 ; GFX10-NEXT: global_store_dwordx4 v64, v[56:59], s[2:3] offset:96 ; GFX10-NEXT: global_store_dwordx4 v64, v[60:63], s[2:3] offset:112 +; GFX10-NEXT: s_waitcnt vmcnt(1) +; GFX10-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:224 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:240 ; GFX10-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir index d6d2a74cbcc2..9580520d4216 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir @@ -198,156 +198,3 @@ body: | S_ENDPGM 0, implicit %1, implicit %2 ... - ---- -name: extract_sgpr_s32_from_v3s32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1_sgpr2 - ; CHECK-LABEL: name: extract_sgpr_s32_from_v3s32 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub2 - ; CHECK: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]] - %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 - %1:sgpr(s32) = G_EXTRACT %0, 0 - %2:sgpr(s32) = G_EXTRACT %0, 32 - %3:sgpr(s32) = G_EXTRACT %0, 64 - S_ENDPGM 0, implicit %0, implicit %2, implicit %3 - -... - ---- -name: extract_sgpr_v2s32_from_v3s32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1_sgpr2 - ; CHECK-LABEL: name: extract_sgpr_v2s32_from_v3s32 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_96_with_sub0_sub1 = COPY $sgpr0_sgpr1_sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[COPY]].sub0_sub1 - ; CHECK: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2 - %1:sgpr(<2 x s32>) = G_EXTRACT %0, 0 - S_ENDPGM 0, implicit %1 - -... - ---- -name: extract_sgpr_v3s32_from_v4s32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK-LABEL: name: extract_sgpr_v3s32_from_v4s32 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY [[COPY]] - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_96 = COPY [[COPY1]].sub0_sub1_sub2 - ; CHECK: [[COPY3:%[0-9]+]]:sgpr_96 = COPY [[COPY]].sub1_sub2_sub3 - ; CHECK: S_ENDPGM 0, implicit [[COPY2]], implicit [[COPY3]] - %0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 - %1:sgpr(<3 x s32>) = G_EXTRACT %0, 0 - %2:sgpr(<3 x s32>) = G_EXTRACT %0, 32 - S_ENDPGM 0, implicit %1, implicit %2 - -... - ---- -name: extract_sgpr_v2s16_from_v4s16_offset0 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1 - ; CHECK-LABEL: name: extract_sgpr_v2s16_from_v4s16_offset0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s16>) = G_EXTRACT %0, 0 - S_ENDPGM 0, implicit %1 - -... - ---- -name: extract_sgpr_v2s16_from_v4s16_offset32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1 - ; CHECK-LABEL: name: extract_sgpr_v2s16_from_v4s16_offset32 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s16>) = G_EXTRACT %0, 32 - S_ENDPGM 0, implicit %1 - -... - -# FIXME: Probably should not be legal ---- -name: extract_sgpr_s16_from_v4s16_offset0 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1 - ; CHECK-LABEL: name: extract_sgpr_s16_from_v4s16_offset0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 - ; CHECK: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(s16) = G_EXTRACT %0, 0 - S_ENDPGM 0, implicit %1 - -... - -# FIXME: Probably should not be legal ---- -name: extract_sgpr_s16_from_v4s16_offset32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1 - ; CHECK-LABEL: name: extract_sgpr_s16_from_v4s16_offset32 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(s16) = G_EXTRACT %0, 32 - S_ENDPGM 0, implicit %1 - -... - -# FIXME: Probably should not be legal ---- -name: extract_sgpr_s16_from_v6s16_offset32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1_sgpr2 - ; CHECK-LABEL: name: extract_sgpr_s16_from_v6s16_offset32 - ; CHECK: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 - ; CHECK: S_ENDPGM 0, implicit [[COPY1]] - %0:sgpr(<6 x s16>) = COPY $sgpr0_sgpr1_sgpr2 - %1:sgpr(s16) = G_EXTRACT %0, 32 - S_ENDPGM 0, implicit %1 - -... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir index 4b6628472c8b..7d8e42e39fac 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir @@ -32,8 +32,7 @@ body: | %7:sgpr(s64) = G_CONSTANT i64 36 %8:sgpr(p4) = G_PTR_ADD %2, %7(s64) %9:sgpr(<2 x s64>) = G_LOAD %8(p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) - %10:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 0 - %13:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 64 + %10:sgpr(s64), %13:sgpr(s64) = G_UNMERGE_VALUES %9(<2 x s64>) %15:sgpr(p1) = G_INTTOPTR %13(s64) %18:sgpr(s64) = G_LOAD %15(p1) :: (load (s64), addrspace 1) %19:sgpr(s64) = G_FCONSTANT double -0.000000e+00 @@ -82,8 +81,7 @@ body: | %7:sgpr(s64) = G_CONSTANT i64 36 %8:sgpr(p4) = G_PTR_ADD %2, %7(s64) %9:sgpr(<2 x s64>) = G_LOAD %8(p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) - %10:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 0 - %13:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 64 + %10:sgpr(s64), %13:sgpr(s64) = G_UNMERGE_VALUES %9(<2 x s64>) %15:sgpr(p1) = G_INTTOPTR %13(s64) %18:sgpr(s64) = G_LOAD %15(p1) :: (load (s64), addrspace 1) %19:sgpr(s64) = G_FABS %18 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir index 10b9280837a7..57dbe204ecc9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir @@ -566,42 +566,3 @@ body: | %2:vgpr(s256) = G_INSERT %0, %1, 128 S_ENDPGM 0, implicit %2 ... - ---- -name: insert_sgpr_v2s16_to_v4s16_offset0 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1, $sgpr2 - ; CHECK-LABEL: name: insert_sgpr_v2s16_to_v4s16_offset0 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0 - ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s16>) = COPY $sgpr2 - %2:sgpr(<4 x s16>) = G_INSERT %0, %1, 0 - S_ENDPGM 0, implicit %2 - -... - ---- -name: insert_sgpr_v2s16_to_v4s16_offset32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1, $sgpr2 - ; CHECK-LABEL: name: insert_sgpr_v2s16_to_v4s16_offset32 - ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1 - ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]] - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(<2 x s16>) = COPY $sgpr2 - %2:sgpr(<4 x s16>) = G_INSERT %0, %1, 32 - S_ENDPGM 0, implicit %2 -... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir index 150b341561f9..1720519b89e2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir @@ -1,7 +1,7 @@ # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s -# ERR: remark: :0:0: cannot select: %3:sgpr(<4 x s16>) = G_INSERT %0:sgpr, %2:sgpr(s16), 0 (in function: insert_sgpr_s16_to_v4s16_offset0) -# ERR-NEXT: remark: :0:0: cannot select: %2:sgpr(<16 x s32>) = G_INSERT %0:sgpr, %1:sgpr(<8 x s32>), 0 (in function: insert_sgpr_v8s32_to_v16s32_offset0) +# ERR: remark: :0:0: instruction is not legal: %3:sgpr(<4 x s16>) = G_INSERT %0:sgpr, %2:sgpr(s16), 0 (in function: insert_sgpr_s16_to_v4s16_offset0) +# ERR-NEXT: :0:0: instruction is not legal: %2:sgpr(<16 x s32>) = G_INSERT %0:sgpr, %1:sgpr(<8 x s32>), 0 (in function: insert_sgpr_v8s32_to_v16s32_offset0) # ERR-NOT: remark # FIXME: This 16-bit insert source should not be legal and this test diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll index 17d8f497cbcd..47461bf5e17d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -2078,11 +2078,11 @@ define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 { ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[DEF1]](<2 x s16>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <3 x i16> @external_v3i16_func_void() store volatile <3 x i16> %val, <3 x i16> addrspace(1)* undef @@ -2252,11 +2252,11 @@ define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 { ; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1 ; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[DEF1]](<2 x s16>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc - ; GCN-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) + ; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 %val = call <3 x half> @external_v3f16_func_void() store volatile <3 x half> %val, <3 x half> addrspace(1)* undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index d81a40bfe6d0..419c8f920f48 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -2101,11 +2101,12 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 { ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -2161,11 +2162,12 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 { ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[LOAD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<4 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV4]](<2 x s16>) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -2339,12 +2341,13 @@ define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 { ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[LOAD]](<5 x s16>), [[DEF1]](<5 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<5 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<6 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV5]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV6]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV7]](<2 x s16>) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -2400,13 +2403,14 @@ define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 { ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<7 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<14 x s16>) = G_CONCAT_VECTORS [[LOAD]](<7 x s16>), [[DEF1]](<7 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<14 x s16>) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<7 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<8 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV7]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV8]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV9]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV10]](<2 x s16>) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -2462,44 +2466,45 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 { ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<63 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<126 x s16>) = G_CONCAT_VECTORS [[LOAD]](<63 x s16>), [[DEF1]](<63 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>), [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>), [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>), [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>), [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<126 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<63 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<64 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<64 x s16>) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) - ; CHECK-NEXT: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](<2 x s16>) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](<2 x s16>) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](<2 x s16>) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](<2 x s16>) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](<2 x s16>) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](<2 x s16>) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](<2 x s16>) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](<2 x s16>) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](<2 x s16>) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](<2 x s16>) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](<2 x s16>) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](<2 x s16>) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](<2 x s16>) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](<2 x s16>) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](<2 x s16>) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](<2 x s16>) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](<2 x s16>) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](<2 x s16>) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](<2 x s16>) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](<2 x s16>) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](<2 x s16>) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](<2 x s16>) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](<2 x s16>) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](<2 x s16>) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](<2 x s16>) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](<2 x s16>) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](<2 x s16>) + ; CHECK-NEXT: G_STORE [[UV94]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[UV63]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV64]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV65]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV66]](<2 x s16>) + ; CHECK-NEXT: $vgpr4 = COPY [[UV67]](<2 x s16>) + ; CHECK-NEXT: $vgpr5 = COPY [[UV68]](<2 x s16>) + ; CHECK-NEXT: $vgpr6 = COPY [[UV69]](<2 x s16>) + ; CHECK-NEXT: $vgpr7 = COPY [[UV70]](<2 x s16>) + ; CHECK-NEXT: $vgpr8 = COPY [[UV71]](<2 x s16>) + ; CHECK-NEXT: $vgpr9 = COPY [[UV72]](<2 x s16>) + ; CHECK-NEXT: $vgpr10 = COPY [[UV73]](<2 x s16>) + ; CHECK-NEXT: $vgpr11 = COPY [[UV74]](<2 x s16>) + ; CHECK-NEXT: $vgpr12 = COPY [[UV75]](<2 x s16>) + ; CHECK-NEXT: $vgpr13 = COPY [[UV76]](<2 x s16>) + ; CHECK-NEXT: $vgpr14 = COPY [[UV77]](<2 x s16>) + ; CHECK-NEXT: $vgpr15 = COPY [[UV78]](<2 x s16>) + ; CHECK-NEXT: $vgpr16 = COPY [[UV79]](<2 x s16>) + ; CHECK-NEXT: $vgpr17 = COPY [[UV80]](<2 x s16>) + ; CHECK-NEXT: $vgpr18 = COPY [[UV81]](<2 x s16>) + ; CHECK-NEXT: $vgpr19 = COPY [[UV82]](<2 x s16>) + ; CHECK-NEXT: $vgpr20 = COPY [[UV83]](<2 x s16>) + ; CHECK-NEXT: $vgpr21 = COPY [[UV84]](<2 x s16>) + ; CHECK-NEXT: $vgpr22 = COPY [[UV85]](<2 x s16>) + ; CHECK-NEXT: $vgpr23 = COPY [[UV86]](<2 x s16>) + ; CHECK-NEXT: $vgpr24 = COPY [[UV87]](<2 x s16>) + ; CHECK-NEXT: $vgpr25 = COPY [[UV88]](<2 x s16>) + ; CHECK-NEXT: $vgpr26 = COPY [[UV89]](<2 x s16>) + ; CHECK-NEXT: $vgpr27 = COPY [[UV90]](<2 x s16>) + ; CHECK-NEXT: $vgpr28 = COPY [[UV91]](<2 x s16>) + ; CHECK-NEXT: $vgpr29 = COPY [[UV92]](<2 x s16>) + ; CHECK-NEXT: $vgpr30 = COPY [[UV93]](<2 x s16>) ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) @@ -2555,47 +2560,48 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 { ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<65 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[LOAD]](<65 x s16>), [[DEF1]](<65 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>), [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>), [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>), [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>), [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>), [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>), [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>), [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16), [[UV64:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<65 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<66 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16), [[DEF1]](s16) + ; CHECK-NEXT: [[UV65:%[0-9]+]]:_(<2 x s16>), [[UV66:%[0-9]+]]:_(<2 x s16>), [[UV67:%[0-9]+]]:_(<2 x s16>), [[UV68:%[0-9]+]]:_(<2 x s16>), [[UV69:%[0-9]+]]:_(<2 x s16>), [[UV70:%[0-9]+]]:_(<2 x s16>), [[UV71:%[0-9]+]]:_(<2 x s16>), [[UV72:%[0-9]+]]:_(<2 x s16>), [[UV73:%[0-9]+]]:_(<2 x s16>), [[UV74:%[0-9]+]]:_(<2 x s16>), [[UV75:%[0-9]+]]:_(<2 x s16>), [[UV76:%[0-9]+]]:_(<2 x s16>), [[UV77:%[0-9]+]]:_(<2 x s16>), [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>), [[UV80:%[0-9]+]]:_(<2 x s16>), [[UV81:%[0-9]+]]:_(<2 x s16>), [[UV82:%[0-9]+]]:_(<2 x s16>), [[UV83:%[0-9]+]]:_(<2 x s16>), [[UV84:%[0-9]+]]:_(<2 x s16>), [[UV85:%[0-9]+]]:_(<2 x s16>), [[UV86:%[0-9]+]]:_(<2 x s16>), [[UV87:%[0-9]+]]:_(<2 x s16>), [[UV88:%[0-9]+]]:_(<2 x s16>), [[UV89:%[0-9]+]]:_(<2 x s16>), [[UV90:%[0-9]+]]:_(<2 x s16>), [[UV91:%[0-9]+]]:_(<2 x s16>), [[UV92:%[0-9]+]]:_(<2 x s16>), [[UV93:%[0-9]+]]:_(<2 x s16>), [[UV94:%[0-9]+]]:_(<2 x s16>), [[UV95:%[0-9]+]]:_(<2 x s16>), [[UV96:%[0-9]+]]:_(<2 x s16>), [[UV97:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<66 x s16>) ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(p5) = COPY $sp_reg ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C3]](s32) - ; CHECK-NEXT: G_STORE [[UV31]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5) + ; CHECK-NEXT: G_STORE [[UV96]](<2 x s16>), [[PTR_ADD1]](p5) :: (store (<2 x s16>) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY20]], [[C4]](s32) - ; CHECK-NEXT: G_STORE [[UV32]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5) - ; CHECK-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](<2 x s16>) - ; CHECK-NEXT: $vgpr3 = COPY [[UV3]](<2 x s16>) - ; CHECK-NEXT: $vgpr4 = COPY [[UV4]](<2 x s16>) - ; CHECK-NEXT: $vgpr5 = COPY [[UV5]](<2 x s16>) - ; CHECK-NEXT: $vgpr6 = COPY [[UV6]](<2 x s16>) - ; CHECK-NEXT: $vgpr7 = COPY [[UV7]](<2 x s16>) - ; CHECK-NEXT: $vgpr8 = COPY [[UV8]](<2 x s16>) - ; CHECK-NEXT: $vgpr9 = COPY [[UV9]](<2 x s16>) - ; CHECK-NEXT: $vgpr10 = COPY [[UV10]](<2 x s16>) - ; CHECK-NEXT: $vgpr11 = COPY [[UV11]](<2 x s16>) - ; CHECK-NEXT: $vgpr12 = COPY [[UV12]](<2 x s16>) - ; CHECK-NEXT: $vgpr13 = COPY [[UV13]](<2 x s16>) - ; CHECK-NEXT: $vgpr14 = COPY [[UV14]](<2 x s16>) - ; CHECK-NEXT: $vgpr15 = COPY [[UV15]](<2 x s16>) - ; CHECK-NEXT: $vgpr16 = COPY [[UV16]](<2 x s16>) - ; CHECK-NEXT: $vgpr17 = COPY [[UV17]](<2 x s16>) - ; CHECK-NEXT: $vgpr18 = COPY [[UV18]](<2 x s16>) - ; CHECK-NEXT: $vgpr19 = COPY [[UV19]](<2 x s16>) - ; CHECK-NEXT: $vgpr20 = COPY [[UV20]](<2 x s16>) - ; CHECK-NEXT: $vgpr21 = COPY [[UV21]](<2 x s16>) - ; CHECK-NEXT: $vgpr22 = COPY [[UV22]](<2 x s16>) - ; CHECK-NEXT: $vgpr23 = COPY [[UV23]](<2 x s16>) - ; CHECK-NEXT: $vgpr24 = COPY [[UV24]](<2 x s16>) - ; CHECK-NEXT: $vgpr25 = COPY [[UV25]](<2 x s16>) - ; CHECK-NEXT: $vgpr26 = COPY [[UV26]](<2 x s16>) - ; CHECK-NEXT: $vgpr27 = COPY [[UV27]](<2 x s16>) - ; CHECK-NEXT: $vgpr28 = COPY [[UV28]](<2 x s16>) - ; CHECK-NEXT: $vgpr29 = COPY [[UV29]](<2 x s16>) - ; CHECK-NEXT: $vgpr30 = COPY [[UV30]](<2 x s16>) + ; CHECK-NEXT: G_STORE [[UV97]](<2 x s16>), [[PTR_ADD2]](p5) :: (store (<2 x s16>) into stack + 4, addrspace 5) + ; CHECK-NEXT: $vgpr0 = COPY [[UV65]](<2 x s16>) + ; CHECK-NEXT: $vgpr1 = COPY [[UV66]](<2 x s16>) + ; CHECK-NEXT: $vgpr2 = COPY [[UV67]](<2 x s16>) + ; CHECK-NEXT: $vgpr3 = COPY [[UV68]](<2 x s16>) + ; CHECK-NEXT: $vgpr4 = COPY [[UV69]](<2 x s16>) + ; CHECK-NEXT: $vgpr5 = COPY [[UV70]](<2 x s16>) + ; CHECK-NEXT: $vgpr6 = COPY [[UV71]](<2 x s16>) + ; CHECK-NEXT: $vgpr7 = COPY [[UV72]](<2 x s16>) + ; CHECK-NEXT: $vgpr8 = COPY [[UV73]](<2 x s16>) + ; CHECK-NEXT: $vgpr9 = COPY [[UV74]](<2 x s16>) + ; CHECK-NEXT: $vgpr10 = COPY [[UV75]](<2 x s16>) + ; CHECK-NEXT: $vgpr11 = COPY [[UV76]](<2 x s16>) + ; CHECK-NEXT: $vgpr12 = COPY [[UV77]](<2 x s16>) + ; CHECK-NEXT: $vgpr13 = COPY [[UV78]](<2 x s16>) + ; CHECK-NEXT: $vgpr14 = COPY [[UV79]](<2 x s16>) + ; CHECK-NEXT: $vgpr15 = COPY [[UV80]](<2 x s16>) + ; CHECK-NEXT: $vgpr16 = COPY [[UV81]](<2 x s16>) + ; CHECK-NEXT: $vgpr17 = COPY [[UV82]](<2 x s16>) + ; CHECK-NEXT: $vgpr18 = COPY [[UV83]](<2 x s16>) + ; CHECK-NEXT: $vgpr19 = COPY [[UV84]](<2 x s16>) + ; CHECK-NEXT: $vgpr20 = COPY [[UV85]](<2 x s16>) + ; CHECK-NEXT: $vgpr21 = COPY [[UV86]](<2 x s16>) + ; CHECK-NEXT: $vgpr22 = COPY [[UV87]](<2 x s16>) + ; CHECK-NEXT: $vgpr23 = COPY [[UV88]](<2 x s16>) + ; CHECK-NEXT: $vgpr24 = COPY [[UV89]](<2 x s16>) + ; CHECK-NEXT: $vgpr25 = COPY [[UV90]](<2 x s16>) + ; CHECK-NEXT: $vgpr26 = COPY [[UV91]](<2 x s16>) + ; CHECK-NEXT: $vgpr27 = COPY [[UV92]](<2 x s16>) + ; CHECK-NEXT: $vgpr28 = COPY [[UV93]](<2 x s16>) + ; CHECK-NEXT: $vgpr29 = COPY [[UV94]](<2 x s16>) + ; CHECK-NEXT: $vgpr30 = COPY [[UV95]](<2 x s16>) ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY10]](p4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll index 70a41f9dafaa..f29e6456d090 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -1230,12 +1230,12 @@ define void @void_func_v3i16(<3 x i16> %arg0) #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] store <3 x i16> %arg0, <3 x i16> addrspace(1)* undef @@ -1267,12 +1267,12 @@ define void @void_func_v5i16(<5 x i16> %arg0) #0 { ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[UV]](<5 x s16>), [[DEF1]](p1) :: (store (<5 x s16>) into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s16>), [[DEF]](p1) :: (store (<5 x s16>) into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY4]] store <5 x i16> %arg0, <5 x i16> addrspace(1)* undef @@ -1363,12 +1363,12 @@ define void @void_func_v65i16(<65 x i16> %arg0) #0 { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, align 16, addrspace 5) ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0 ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, addrspace 5) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<65 x s16>), [[UV1:%[0-9]+]]:_(<65 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<66 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16), [[UV64:%[0-9]+]]:_(s16), [[UV65:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<66 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<65 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16) ; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[UV]](<65 x s16>), [[DEF1]](p1) :: (store (<65 x s16>) into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<65 x s16>), [[DEF]](p1) :: (store (<65 x s16>) into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1) ; CHECK-NEXT: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY32]] store <65 x i16> %arg0, <65 x i16> addrspace(1)* undef @@ -1680,12 +1680,12 @@ define void @void_func_v3f16(<3 x half> %arg0) #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] ; CHECK-NEXT: S_SETPC_B64_return [[COPY3]] store <3 x half> %arg0, <3 x half> addrspace(1)* undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll index d6da4fca2198..7a0176d6233f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -1383,9 +1383,9 @@ define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1 ; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5 ; GCN-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[DEF]](<2 x s16>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16) ; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v3i16_fastcc_v3i16 ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]] @@ -1396,11 +1396,12 @@ define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1 ; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]] ; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]] ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GCN-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GCN-NEXT: $vgpr0 = COPY [[UV2]](<2 x s16>) - ; GCN-NEXT: $vgpr1 = COPY [[UV3]](<2 x s16>) + ; GCN-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s16>) + ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF]](s16) + ; GCN-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s16>) + ; GCN-NEXT: $vgpr0 = COPY [[UV7]](<2 x s16>) + ; GCN-NEXT: $vgpr1 = COPY [[UV8]](<2 x s16>) ; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir index 6d67456bf6f5..9acec3f56876 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -allow-ginsert-as-artifact=0 -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: test_and_s32 @@ -302,18 +302,14 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[DEF1]](<2 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<3 x s32>), [[UV7:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV6]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[UV7]](s32), [[AND1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_AND %0, %1 @@ -350,23 +346,20 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32) - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR3]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR4]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR2]], [[BUILD_VECTOR5]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[AND2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>) - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<5 x s32>), [[UV11:%[0-9]+]]:_(<5 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s32>) - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[UV10]](<5 x s32>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[UV9]] + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]](<2 x s32>) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](<8 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[AND2]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x s32>) %0:_(<5 x s32>) = G_IMPLICIT_DEF %1:_(<5 x s32>) = G_IMPLICIT_DEF %2:_(<5 x s32>) = G_AND %0, %1 @@ -421,40 +414,65 @@ body: | ; CHECK-LABEL: name: test_and_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT]], [[INSERT1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND6]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C1]] + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C1]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND11]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -490,54 +508,102 @@ body: | ; CHECK-LABEL: name: test_and_v5s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0 - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) - ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV14]](<3 x s16>), 0 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT2]], [[INSERT3]] - ; CHECK-NEXT: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV13]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV15]](<3 x s16>), 0 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[INSERT4]], [[INSERT5]] - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>) - ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>), [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<30 x s16>) - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV20]](<5 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND6]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL7]] + ; CHECK-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>) + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(<4 x s16>) = G_AND [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AND11]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C]](s32) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST17]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST18:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST18]], [[C]](s32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL8]] + ; CHECK-NEXT: [[BITCAST19:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C]](s32) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL9]] + ; CHECK-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL10]] + ; CHECK-NEXT: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; CHECK-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[BITCAST18]], [[C1]] + ; CHECK-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL11]] + ; CHECK-NEXT: [[BITCAST22:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST19]](<2 x s16>), [[BITCAST20]](<2 x s16>), [[BITCAST21]](<2 x s16>), [[BITCAST22]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF %2:_(<5 x s16>) = G_AND %0, %1 @@ -552,34 +618,15 @@ body: | bb.0: ; CHECK-LABEL: name: test_and_v3s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF1]](<3 x s8>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[AND]](s32) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[ANYEXT3]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[AND1]](s32) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[ANYEXT5]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[AND2]](s32) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[ANYEXT7]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[AND3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF2]](<4 x s8>), [[DEF2]](<4 x s8>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[UV4]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[UV5]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[UV6]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[AND]](s32), [[AND1]](s32), [[AND2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s8>) = G_IMPLICIT_DEF %2:_(<3 x s8>) = G_AND %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir index ffbf3f55dfb2..c6f0a6dc349c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir @@ -114,10 +114,7 @@ body: | ; CHECK-LABEL: name: test_anyext_v3s16_to_v3s32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir index 5c982bd92352..b1165aa5cd48 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9PLUS %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9PLUS %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9PLUS %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9PLUS %s --- name: test_ashr_s32_s32 @@ -532,43 +532,40 @@ body: | ; SI-LABEL: name: test_ashr_v3s64_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV3]](s32) - ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV4]](s32) - ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV5]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32) + ; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32) + ; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_ashr_v3s64_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV3]](s32) - ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV4]](s32) - ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV5]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32) + ; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32) + ; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9PLUS-LABEL: name: test_ashr_v3s64_v3s32 ; GFX9PLUS: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9PLUS-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV3]](s32) - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV4]](s32) - ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV5]](s32) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64) + ; GFX9PLUS-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32) + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32) + ; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32) ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9PLUS-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64) + ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<3 x s64>) = G_EXTRACT %0, 0 %2:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 @@ -798,32 +795,33 @@ body: | ; GFX9PLUS-LABEL: name: test_ashr_v3s16_v3s16 ; GFX9PLUS: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9PLUS-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; GFX9PLUS-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9PLUS-NEXT: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT1]](<4 x s16>), 32 - ; GFX9PLUS-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9PLUS-NEXT: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 - ; GFX9PLUS-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9PLUS-NEXT: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 - ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[EXTRACT]], [[EXTRACT2]](<2 x s16>) - ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[EXTRACT1]], [[EXTRACT3]](s16) - ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) + ; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9PLUS-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9PLUS-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX9PLUS-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) + ; GFX9PLUS-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9PLUS-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9PLUS-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9PLUS-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) ; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST1]](s32) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32) - ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST5]](s32) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST6]](s32) + ; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir index ab6796076810..c7c4e0734200 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir @@ -511,10 +511,9 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] @@ -549,19 +548,18 @@ body: | ; CHECK-LABEL: name: test_bitcast_v4s8_to_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC3]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC5]] - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC7]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY4]], [[TRUNC3]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] @@ -602,14 +600,13 @@ body: | ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C3]](s32) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C4]](s32) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C5]](s32) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C3]](s32) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C4]](s32) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C5]](s32) - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C6]](s32) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C6]](s32) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[COPY1]] @@ -660,31 +657,30 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s4_to_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC3]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC5]] - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC7]] - ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV12]](s32) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[TRUNC9]] - ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV13]](s32) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[TRUNC11]] - ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV14]](s32) - ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[TRUNC12]], [[TRUNC13]] - ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV15]](s32) - ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[TRUNC14]], [[TRUNC15]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY4]], [[TRUNC3]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[COPY5]], [[TRUNC4]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[COPY6]], [[TRUNC5]] + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16) + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:_(s16) = G_ADD [[COPY7]], [[TRUNC6]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s16) = COPY [[TRUNC7]](s16) + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:_(s16) = G_ADD [[COPY8]], [[TRUNC7]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 15 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] @@ -757,12 +753,11 @@ body: | ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) @@ -858,6 +853,10 @@ body: | ; CHECK-LABEL: name: test_bitcast_v8s8_to_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] @@ -872,15 +871,18 @@ body: | ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<8 x s32>) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV12]], [[C]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV13]], [[C]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C]] ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV14]], [[C]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C]] ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV15]], [[C]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C]] ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32) @@ -1106,6 +1108,22 @@ body: | ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s4) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s4) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s4) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s4) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s4) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s4) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s4) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s4) = G_TRUNC [[LSHR3]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] @@ -1135,15 +1153,14 @@ body: | ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<16 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV14]](<2 x s16>) - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>) - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s4) = COPY [[TRUNC]](s4) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s4) = COPY [[TRUNC1]](s4) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s4) = COPY [[TRUNC2]](s4) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s4) = COPY [[TRUNC3]](s4) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s4) = COPY [[TRUNC4]](s4) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s4) = COPY [[TRUNC5]](s4) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s4) = COPY [[TRUNC6]](s4) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s4) = COPY [[TRUNC7]](s4) ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) @@ -1367,25 +1384,24 @@ body: | ; CHECK-LABEL: name: test_bitcast_v6s8_to_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<6 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<6 x s32>) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC3]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC4]], [[TRUNC5]] - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32) - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[TRUNC6]], [[TRUNC7]] - ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32) - ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[TRUNC8]], [[TRUNC9]] - ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32) - ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[TRUNC10]], [[TRUNC11]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[TRUNC3]](s16) + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s16) = G_ADD [[COPY4]], [[TRUNC3]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16) + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:_(s16) = G_ADD [[COPY5]], [[TRUNC4]] + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC5]](s16) + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:_(s16) = G_ADD [[COPY6]], [[TRUNC5]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[ADD]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[ADD1]], [[C]] @@ -1433,15 +1449,14 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC3]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC4]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC5]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[COPY1]], [[TRUNC]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[COPY3]], [[TRUNC2]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[ADD]], [[C]](s16) ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ADD]](s16) @@ -1562,30 +1577,38 @@ body: | ; CHECK-LABEL: name: test_bitcast_v16s8_to_v2s64 ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]] + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C]] + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C]] + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]] ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -1598,29 +1621,36 @@ body: | ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C2]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV24]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] - ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV25]](s32) - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC6]](s8) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC7]](s8) + ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC16]], [[C]] + ; CHECK-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC17]], [[C]] ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL6]] - ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV26]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]] - ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV27]](s32) - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] + ; CHECK-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C]] + ; CHECK-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32) + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C]] ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C1]](s16) ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL7]] - ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV28]](s32) - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C]] - ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV29]](s32) - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C]] + ; CHECK-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[UV12]](s32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC20]], [[C]] + ; CHECK-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[UV13]](s32) + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC21]], [[C]] ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C1]](s16) ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL8]] - ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV30]](s32) - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C]] - ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV31]](s32) - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]] + ; CHECK-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[UV14]](s32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C]] + ; CHECK-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[UV15]](s32) + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C]] ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C1]](s16) ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL9]] ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) @@ -1745,6 +1775,10 @@ body: | ; CHECK-LABEL: name: test_bitcast_v16s8_to_v4s32 ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] @@ -1759,37 +1793,66 @@ body: | ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C3]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV20]], [[C]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV21]], [[C]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C]] ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL3]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV22]], [[C]] + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C]] ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C2]](s32) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV23]], [[C]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV7]], [[C]] ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C3]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] - ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV40]], [[C]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV41]], [[C]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s8) = COPY [[TRUNC6]](s8) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s8) = COPY [[TRUNC7]](s8) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV8]](s32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[UV9]](s32) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[UV10]](s32) + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[UV11]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV8]], [[C]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[UV9]], [[C]] ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV42]], [[C]] + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[UV10]], [[C]] ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[C2]](s32) ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV43]], [[C]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV11]], [[C]] ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C3]](s32) ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[OR7]], [[SHL8]] - ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV60]], [[C]] - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV61]], [[C]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s8) = COPY [[TRUNC2]](s8) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s8) = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s8) = COPY [[TRUNC6]](s8) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s8) = COPY [[TRUNC7]](s8) + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s8) = COPY [[TRUNC10]](s8) + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s8) = COPY [[TRUNC11]](s8) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[UV12]], [[C]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[UV13]], [[C]] ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C1]](s32) ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[UV62]], [[C]] + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[UV14]], [[C]] ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[C2]](s32) ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL10]] - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[UV63]], [[C]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[UV15]], [[C]] ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C3]](s32) ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR10]], [[SHL11]] ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[OR2]](s32), [[OR5]](s32), [[OR8]](s32), [[OR11]](s32) @@ -1885,61 +1948,124 @@ body: | ; CHECK-LABEL: name: test_bitcast_v16s8_to_v8s16 ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C1]](s16) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV18]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV19]](s32) - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C1]](s16) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV36]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV37]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]] + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C1]](s16) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[UV54]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV55]](s32) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C]] + ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]] ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL3]] - ; CHECK-NEXT: [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32), [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32), [[UV68:%[0-9]+]]:_(s32), [[UV69:%[0-9]+]]:_(s32), [[UV70:%[0-9]+]]:_(s32), [[UV71:%[0-9]+]]:_(s32), [[UV72:%[0-9]+]]:_(s32), [[UV73:%[0-9]+]]:_(s32), [[UV74:%[0-9]+]]:_(s32), [[UV75:%[0-9]+]]:_(s32), [[UV76:%[0-9]+]]:_(s32), [[UV77:%[0-9]+]]:_(s32), [[UV78:%[0-9]+]]:_(s32), [[UV79:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[UV72]](s32) - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] - ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV73]](s32) - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) + ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(s8) = G_TRUNC [[UV8]](s32) + ; CHECK-NEXT: [[TRUNC17:%[0-9]+]]:_(s8) = G_TRUNC [[UV9]](s32) + ; CHECK-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[UV8]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC18]], [[C]] + ; CHECK-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[UV9]](s32) + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC19]], [[C]] ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL4]] - ; CHECK-NEXT: [[UV80:%[0-9]+]]:_(s32), [[UV81:%[0-9]+]]:_(s32), [[UV82:%[0-9]+]]:_(s32), [[UV83:%[0-9]+]]:_(s32), [[UV84:%[0-9]+]]:_(s32), [[UV85:%[0-9]+]]:_(s32), [[UV86:%[0-9]+]]:_(s32), [[UV87:%[0-9]+]]:_(s32), [[UV88:%[0-9]+]]:_(s32), [[UV89:%[0-9]+]]:_(s32), [[UV90:%[0-9]+]]:_(s32), [[UV91:%[0-9]+]]:_(s32), [[UV92:%[0-9]+]]:_(s32), [[UV93:%[0-9]+]]:_(s32), [[UV94:%[0-9]+]]:_(s32), [[UV95:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[UV90]](s32) - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]] - ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[UV91]](s32) - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[COPY28:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) + ; CHECK-NEXT: [[COPY29:%[0-9]+]]:_(s8) = COPY [[TRUNC16]](s8) + ; CHECK-NEXT: [[COPY30:%[0-9]+]]:_(s8) = COPY [[TRUNC17]](s8) + ; CHECK-NEXT: [[TRUNC20:%[0-9]+]]:_(s8) = G_TRUNC [[UV10]](s32) + ; CHECK-NEXT: [[TRUNC21:%[0-9]+]]:_(s8) = G_TRUNC [[UV11]](s32) + ; CHECK-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[UV10]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC22]], [[C]] + ; CHECK-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[UV11]](s32) + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC23]], [[C]] ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C1]](s16) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL5]] - ; CHECK-NEXT: [[UV96:%[0-9]+]]:_(s32), [[UV97:%[0-9]+]]:_(s32), [[UV98:%[0-9]+]]:_(s32), [[UV99:%[0-9]+]]:_(s32), [[UV100:%[0-9]+]]:_(s32), [[UV101:%[0-9]+]]:_(s32), [[UV102:%[0-9]+]]:_(s32), [[UV103:%[0-9]+]]:_(s32), [[UV104:%[0-9]+]]:_(s32), [[UV105:%[0-9]+]]:_(s32), [[UV106:%[0-9]+]]:_(s32), [[UV107:%[0-9]+]]:_(s32), [[UV108:%[0-9]+]]:_(s32), [[UV109:%[0-9]+]]:_(s32), [[UV110:%[0-9]+]]:_(s32), [[UV111:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[UV108]](s32) - ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C]] - ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[UV109]](s32) - ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC13]], [[C]] + ; CHECK-NEXT: [[COPY31:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY32:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY33:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY34:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY35:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY36:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[COPY37:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[COPY38:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) + ; CHECK-NEXT: [[COPY39:%[0-9]+]]:_(s8) = COPY [[TRUNC16]](s8) + ; CHECK-NEXT: [[COPY40:%[0-9]+]]:_(s8) = COPY [[TRUNC17]](s8) + ; CHECK-NEXT: [[COPY41:%[0-9]+]]:_(s8) = COPY [[TRUNC20]](s8) + ; CHECK-NEXT: [[COPY42:%[0-9]+]]:_(s8) = COPY [[TRUNC21]](s8) + ; CHECK-NEXT: [[TRUNC24:%[0-9]+]]:_(s8) = G_TRUNC [[UV12]](s32) + ; CHECK-NEXT: [[TRUNC25:%[0-9]+]]:_(s8) = G_TRUNC [[UV13]](s32) + ; CHECK-NEXT: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[UV12]](s32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[TRUNC26]], [[C]] + ; CHECK-NEXT: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[UV13]](s32) + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC27]], [[C]] ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C1]](s16) ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL6]] - ; CHECK-NEXT: [[UV112:%[0-9]+]]:_(s32), [[UV113:%[0-9]+]]:_(s32), [[UV114:%[0-9]+]]:_(s32), [[UV115:%[0-9]+]]:_(s32), [[UV116:%[0-9]+]]:_(s32), [[UV117:%[0-9]+]]:_(s32), [[UV118:%[0-9]+]]:_(s32), [[UV119:%[0-9]+]]:_(s32), [[UV120:%[0-9]+]]:_(s32), [[UV121:%[0-9]+]]:_(s32), [[UV122:%[0-9]+]]:_(s32), [[UV123:%[0-9]+]]:_(s32), [[UV124:%[0-9]+]]:_(s32), [[UV125:%[0-9]+]]:_(s32), [[UV126:%[0-9]+]]:_(s32), [[UV127:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<16 x s32>) - ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[UV126]](s32) - ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C]] - ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[UV127]](s32) - ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C]] + ; CHECK-NEXT: [[COPY43:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY44:%[0-9]+]]:_(s8) = COPY [[TRUNC1]](s8) + ; CHECK-NEXT: [[COPY45:%[0-9]+]]:_(s8) = COPY [[TRUNC4]](s8) + ; CHECK-NEXT: [[COPY46:%[0-9]+]]:_(s8) = COPY [[TRUNC5]](s8) + ; CHECK-NEXT: [[COPY47:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY48:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[COPY49:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[COPY50:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) + ; CHECK-NEXT: [[COPY51:%[0-9]+]]:_(s8) = COPY [[TRUNC16]](s8) + ; CHECK-NEXT: [[COPY52:%[0-9]+]]:_(s8) = COPY [[TRUNC17]](s8) + ; CHECK-NEXT: [[COPY53:%[0-9]+]]:_(s8) = COPY [[TRUNC20]](s8) + ; CHECK-NEXT: [[COPY54:%[0-9]+]]:_(s8) = COPY [[TRUNC21]](s8) + ; CHECK-NEXT: [[COPY55:%[0-9]+]]:_(s8) = COPY [[TRUNC24]](s8) + ; CHECK-NEXT: [[COPY56:%[0-9]+]]:_(s8) = COPY [[TRUNC25]](s8) + ; CHECK-NEXT: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[UV14]](s32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[TRUNC28]], [[C]] + ; CHECK-NEXT: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[UV15]](s32) + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC29]], [[C]] ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C1]](s16) ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL7]] ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) @@ -2210,21 +2336,20 @@ body: | ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL5]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<12 x s16>) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<12 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST6]](s32) ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) ; CHECK-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST7]](s32) ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) ; CHECK-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR7]](s32) @@ -2253,20 +2378,19 @@ body: | ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[ZEXT7]], [[C]](s32) ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[SHL11]] ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<12 x s16>) - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV20]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) ; CHECK-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST8]](s32) ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) ; CHECK-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR8]](s32) - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV21]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; CHECK-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST9]](s32) ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST9]], [[C]](s32) ; CHECK-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR9]](s32) - ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV22]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; CHECK-NEXT: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST10]](s32) ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) ; CHECK-NEXT: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR10]](s32) - ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV23]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV11]](<2 x s16>) ; CHECK-NEXT: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST11]](s32) ; CHECK-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST11]], [[C]](s32) ; CHECK-NEXT: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) @@ -2388,35 +2512,50 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST]](s32) + ; CHECK-NEXT: [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C2]](s16) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[TRUNC10:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST1]](s32) + ; CHECK-NEXT: [[TRUNC11:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR1]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C1]] ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C1]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C2]](s16) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s8) = COPY [[TRUNC10]](s8) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(s8) = COPY [[TRUNC11]](s8) + ; CHECK-NEXT: [[TRUNC12:%[0-9]+]]:_(s8) = G_TRUNC [[BITCAST2]](s32) + ; CHECK-NEXT: [[TRUNC13:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR2]](s32) ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C1]] ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C1]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C2]](s16) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV15]](<2 x s16>) - ; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; CHECK-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(s8) = COPY [[TRUNC8]](s8) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:_(s8) = COPY [[TRUNC9]](s8) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(s8) = COPY [[TRUNC10]](s8) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(s8) = COPY [[TRUNC11]](s8) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s8) = COPY [[TRUNC12]](s8) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s8) = COPY [[TRUNC13]](s8) ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C1]] ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C1]] ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C2]](s16) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir index 55dfdbcc9ec0..c624320344b1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir @@ -9,8 +9,9 @@ body: | liveins: $vgpr0_vgpr1 ; CHECK-LABEL: name: extract_vector_elt_0_v2i32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 0 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -24,8 +25,9 @@ body: | liveins: $vgpr0_vgpr1 ; CHECK-LABEL: name: extract_vector_elt_1_v2i32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -39,8 +41,9 @@ body: | liveins: $vgpr0_vgpr1 ; CHECK-LABEL: name: extract_vector_elt_2_v2i32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -54,8 +57,9 @@ body: | liveins: $vgpr0_vgpr1_vgpr2 ; CHECK-LABEL: name: extract_vector_elt_0_v3i32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_CONSTANT i32 0 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -69,8 +73,9 @@ body: | liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-LABEL: name: extract_vector_elt_0_v4i32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<4 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(s32) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -205,8 +210,9 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v2i8_i32 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](<2 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) %0:_(<2 x s8>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 %2:_(s8) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -241,8 +247,9 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v2i1_i32 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](<2 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) %0:_(<2 x s1>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 %2:_(s1) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -258,8 +265,9 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v2i1_i1 ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[DEF]](<2 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) %0:_(<2 x s1>) = G_IMPLICIT_DEF %1:_(s1) = G_CONSTANT i1 false %2:_(s1) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -301,11 +309,8 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v2s8_constidx_0_i32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[LSHR]](s32) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<2 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -328,9 +333,8 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[LSHR]](s32) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<2 x s32>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -1017,8 +1021,9 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v3s16_idx0_i32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s16>) = G_TRUNC %0 @@ -1036,8 +1041,9 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v3s16_idx1_i32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_CONSTANT i32 1 %2:_(<3 x s16>) = G_TRUNC %0 @@ -1055,8 +1061,9 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_v3s16_idx2_i32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_CONSTANT i32 2 %2:_(<3 x s16>) = G_TRUNC %0 @@ -1164,8 +1171,9 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v2i64 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](<2 x s64>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY [[UV]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY1]](s64) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -1181,8 +1189,9 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v8i64 ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[DEF]](<8 x s64>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<8 x s64>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[UV]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) %0:_(<8 x s64>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -1198,8 +1207,9 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_0_v16i64 ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[DEF]](<16 x s64>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64), [[UV12:%[0-9]+]]:_(s64), [[UV13:%[0-9]+]]:_(s64), [[UV14:%[0-9]+]]:_(s64), [[UV15:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<16 x s64>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[UV]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[COPY]](s64) %0:_(<16 x s64>) = G_IMPLICIT_DEF %1:_(s32) = G_CONSTANT i32 0 %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1 @@ -1215,8 +1225,9 @@ body: | liveins: $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-LABEL: name: extract_vector_elt_look_through_trunc_0_v4i32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<4 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s64) = G_CONSTANT i64 0 %2:_(s32) = G_TRUNC %1 @@ -1234,8 +1245,9 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_7_v64s32 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 4, addrspace 4) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<16 x s32>), 224 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 7 %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) @@ -1255,8 +1267,9 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<16 x s32>), 32 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](s32) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 33 %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) @@ -1294,218 +1307,13 @@ body: | ; CHECK-LABEL: name: extract_vector_elt_33_v64p3 ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 4, addrspace 4) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD]](<16 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 64, align 4, addrspace 4) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD1]](<16 x s32>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD2]](<16 x s32>) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 192 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<16 x s32>) from unknown-address + 192, align 4, addrspace 4) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD3]](<16 x s32>) - ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<16 x s32>) from unknown-address + 128, align 4, addrspace 4) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x p3>) = G_BITCAST [[LOAD]](<16 x s32>) ; CHECK-NEXT: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3), [[UV2:%[0-9]+]]:_(p3), [[UV3:%[0-9]+]]:_(p3), [[UV4:%[0-9]+]]:_(p3), [[UV5:%[0-9]+]]:_(p3), [[UV6:%[0-9]+]]:_(p3), [[UV7:%[0-9]+]]:_(p3), [[UV8:%[0-9]+]]:_(p3), [[UV9:%[0-9]+]]:_(p3), [[UV10:%[0-9]+]]:_(p3), [[UV11:%[0-9]+]]:_(p3), [[UV12:%[0-9]+]]:_(p3), [[UV13:%[0-9]+]]:_(p3), [[UV14:%[0-9]+]]:_(p3), [[UV15:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[BITCAST]](<16 x p3>) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(p3), [[UV17:%[0-9]+]]:_(p3), [[UV18:%[0-9]+]]:_(p3), [[UV19:%[0-9]+]]:_(p3), [[UV20:%[0-9]+]]:_(p3), [[UV21:%[0-9]+]]:_(p3), [[UV22:%[0-9]+]]:_(p3), [[UV23:%[0-9]+]]:_(p3), [[UV24:%[0-9]+]]:_(p3), [[UV25:%[0-9]+]]:_(p3), [[UV26:%[0-9]+]]:_(p3), [[UV27:%[0-9]+]]:_(p3), [[UV28:%[0-9]+]]:_(p3), [[UV29:%[0-9]+]]:_(p3), [[UV30:%[0-9]+]]:_(p3), [[UV31:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[BITCAST1]](<16 x p3>) - ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(p3), [[UV33:%[0-9]+]]:_(p3), [[UV34:%[0-9]+]]:_(p3), [[UV35:%[0-9]+]]:_(p3), [[UV36:%[0-9]+]]:_(p3), [[UV37:%[0-9]+]]:_(p3), [[UV38:%[0-9]+]]:_(p3), [[UV39:%[0-9]+]]:_(p3), [[UV40:%[0-9]+]]:_(p3), [[UV41:%[0-9]+]]:_(p3), [[UV42:%[0-9]+]]:_(p3), [[UV43:%[0-9]+]]:_(p3), [[UV44:%[0-9]+]]:_(p3), [[UV45:%[0-9]+]]:_(p3), [[UV46:%[0-9]+]]:_(p3), [[UV47:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[BITCAST2]](<16 x p3>) - ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(p3), [[UV49:%[0-9]+]]:_(p3), [[UV50:%[0-9]+]]:_(p3), [[UV51:%[0-9]+]]:_(p3), [[UV52:%[0-9]+]]:_(p3), [[UV53:%[0-9]+]]:_(p3), [[UV54:%[0-9]+]]:_(p3), [[UV55:%[0-9]+]]:_(p3), [[UV56:%[0-9]+]]:_(p3), [[UV57:%[0-9]+]]:_(p3), [[UV58:%[0-9]+]]:_(p3), [[UV59:%[0-9]+]]:_(p3), [[UV60:%[0-9]+]]:_(p3), [[UV61:%[0-9]+]]:_(p3), [[UV62:%[0-9]+]]:_(p3), [[UV63:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[BITCAST3]](<16 x p3>) - ; CHECK-NEXT: G_STORE [[UV]](p3), [[FRAME_INDEX]](p5) :: (store (p3) into %stack.0, align 256, addrspace 5) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32) - ; CHECK-NEXT: G_STORE [[UV1]](p3), [[PTR_ADD3]](p5) :: (store (p3) into %stack.0 + 4, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C4]](s32) - ; CHECK-NEXT: G_STORE [[UV2]](p3), [[PTR_ADD4]](p5) :: (store (p3) into %stack.0 + 8, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C5]](s32) - ; CHECK-NEXT: G_STORE [[UV3]](p3), [[PTR_ADD5]](p5) :: (store (p3) into %stack.0 + 12, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C6]](s32) - ; CHECK-NEXT: G_STORE [[UV4]](p3), [[PTR_ADD6]](p5) :: (store (p3) into %stack.0 + 16, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C7]](s32) - ; CHECK-NEXT: G_STORE [[UV5]](p3), [[PTR_ADD7]](p5) :: (store (p3) into %stack.0 + 20, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C8]](s32) - ; CHECK-NEXT: G_STORE [[UV6]](p3), [[PTR_ADD8]](p5) :: (store (p3) into %stack.0 + 24, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 28 - ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C9]](s32) - ; CHECK-NEXT: G_STORE [[UV7]](p3), [[PTR_ADD9]](p5) :: (store (p3) into %stack.0 + 28, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C10]](s32) - ; CHECK-NEXT: G_STORE [[UV8]](p3), [[PTR_ADD10]](p5) :: (store (p3) into %stack.0 + 32, align 32, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 36 - ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C11]](s32) - ; CHECK-NEXT: G_STORE [[UV9]](p3), [[PTR_ADD11]](p5) :: (store (p3) into %stack.0 + 36, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 - ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C12]](s32) - ; CHECK-NEXT: G_STORE [[UV10]](p3), [[PTR_ADD12]](p5) :: (store (p3) into %stack.0 + 40, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 44 - ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C13]](s32) - ; CHECK-NEXT: G_STORE [[UV11]](p3), [[PTR_ADD13]](p5) :: (store (p3) into %stack.0 + 44, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 - ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C14]](s32) - ; CHECK-NEXT: G_STORE [[UV12]](p3), [[PTR_ADD14]](p5) :: (store (p3) into %stack.0 + 48, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 52 - ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C15]](s32) - ; CHECK-NEXT: G_STORE [[UV13]](p3), [[PTR_ADD15]](p5) :: (store (p3) into %stack.0 + 52, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 56 - ; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C16]](s32) - ; CHECK-NEXT: G_STORE [[UV14]](p3), [[PTR_ADD16]](p5) :: (store (p3) into %stack.0 + 56, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 60 - ; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C17]](s32) - ; CHECK-NEXT: G_STORE [[UV15]](p3), [[PTR_ADD17]](p5) :: (store (p3) into %stack.0 + 60, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; CHECK-NEXT: [[PTR_ADD18:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C18]](s32) - ; CHECK-NEXT: G_STORE [[UV16]](p3), [[PTR_ADD18]](p5) :: (store (p3) into %stack.0 + 64, align 64, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 68 - ; CHECK-NEXT: [[PTR_ADD19:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C19]](s32) - ; CHECK-NEXT: G_STORE [[UV17]](p3), [[PTR_ADD19]](p5) :: (store (p3) into %stack.0 + 68, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 72 - ; CHECK-NEXT: [[PTR_ADD20:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C20]](s32) - ; CHECK-NEXT: G_STORE [[UV18]](p3), [[PTR_ADD20]](p5) :: (store (p3) into %stack.0 + 72, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 76 - ; CHECK-NEXT: [[PTR_ADD21:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C21]](s32) - ; CHECK-NEXT: G_STORE [[UV19]](p3), [[PTR_ADD21]](p5) :: (store (p3) into %stack.0 + 76, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C22:%[0-9]+]]:_(s32) = G_CONSTANT i32 80 - ; CHECK-NEXT: [[PTR_ADD22:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C22]](s32) - ; CHECK-NEXT: G_STORE [[UV20]](p3), [[PTR_ADD22]](p5) :: (store (p3) into %stack.0 + 80, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C23:%[0-9]+]]:_(s32) = G_CONSTANT i32 84 - ; CHECK-NEXT: [[PTR_ADD23:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C23]](s32) - ; CHECK-NEXT: G_STORE [[UV21]](p3), [[PTR_ADD23]](p5) :: (store (p3) into %stack.0 + 84, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C24:%[0-9]+]]:_(s32) = G_CONSTANT i32 88 - ; CHECK-NEXT: [[PTR_ADD24:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C24]](s32) - ; CHECK-NEXT: G_STORE [[UV22]](p3), [[PTR_ADD24]](p5) :: (store (p3) into %stack.0 + 88, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C25:%[0-9]+]]:_(s32) = G_CONSTANT i32 92 - ; CHECK-NEXT: [[PTR_ADD25:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C25]](s32) - ; CHECK-NEXT: G_STORE [[UV23]](p3), [[PTR_ADD25]](p5) :: (store (p3) into %stack.0 + 92, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C26:%[0-9]+]]:_(s32) = G_CONSTANT i32 96 - ; CHECK-NEXT: [[PTR_ADD26:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C26]](s32) - ; CHECK-NEXT: G_STORE [[UV24]](p3), [[PTR_ADD26]](p5) :: (store (p3) into %stack.0 + 96, align 32, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C27:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 - ; CHECK-NEXT: [[PTR_ADD27:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C27]](s32) - ; CHECK-NEXT: G_STORE [[UV25]](p3), [[PTR_ADD27]](p5) :: (store (p3) into %stack.0 + 100, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C28:%[0-9]+]]:_(s32) = G_CONSTANT i32 104 - ; CHECK-NEXT: [[PTR_ADD28:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C28]](s32) - ; CHECK-NEXT: G_STORE [[UV26]](p3), [[PTR_ADD28]](p5) :: (store (p3) into %stack.0 + 104, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C29:%[0-9]+]]:_(s32) = G_CONSTANT i32 108 - ; CHECK-NEXT: [[PTR_ADD29:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C29]](s32) - ; CHECK-NEXT: G_STORE [[UV27]](p3), [[PTR_ADD29]](p5) :: (store (p3) into %stack.0 + 108, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C30:%[0-9]+]]:_(s32) = G_CONSTANT i32 112 - ; CHECK-NEXT: [[PTR_ADD30:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C30]](s32) - ; CHECK-NEXT: G_STORE [[UV28]](p3), [[PTR_ADD30]](p5) :: (store (p3) into %stack.0 + 112, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C31:%[0-9]+]]:_(s32) = G_CONSTANT i32 116 - ; CHECK-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C31]](s32) - ; CHECK-NEXT: G_STORE [[UV29]](p3), [[PTR_ADD31]](p5) :: (store (p3) into %stack.0 + 116, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C32:%[0-9]+]]:_(s32) = G_CONSTANT i32 120 - ; CHECK-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C32]](s32) - ; CHECK-NEXT: G_STORE [[UV30]](p3), [[PTR_ADD32]](p5) :: (store (p3) into %stack.0 + 120, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C33:%[0-9]+]]:_(s32) = G_CONSTANT i32 124 - ; CHECK-NEXT: [[PTR_ADD33:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C33]](s32) - ; CHECK-NEXT: G_STORE [[UV31]](p3), [[PTR_ADD33]](p5) :: (store (p3) into %stack.0 + 124, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C34:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 - ; CHECK-NEXT: [[PTR_ADD34:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C34]](s32) - ; CHECK-NEXT: G_STORE [[UV32]](p3), [[PTR_ADD34]](p5) :: (store (p3) into %stack.0 + 128, align 128, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C35:%[0-9]+]]:_(s32) = G_CONSTANT i32 132 - ; CHECK-NEXT: [[PTR_ADD35:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C35]](s32) - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p5) = COPY [[PTR_ADD35]](p5) - ; CHECK-NEXT: G_STORE [[UV33]](p3), [[COPY1]](p5) :: (store (p3) into %stack.0 + 132, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C36:%[0-9]+]]:_(s32) = G_CONSTANT i32 136 - ; CHECK-NEXT: [[PTR_ADD36:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C36]](s32) - ; CHECK-NEXT: G_STORE [[UV34]](p3), [[PTR_ADD36]](p5) :: (store (p3) into %stack.0 + 136, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C37:%[0-9]+]]:_(s32) = G_CONSTANT i32 140 - ; CHECK-NEXT: [[PTR_ADD37:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C37]](s32) - ; CHECK-NEXT: G_STORE [[UV35]](p3), [[PTR_ADD37]](p5) :: (store (p3) into %stack.0 + 140, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C38:%[0-9]+]]:_(s32) = G_CONSTANT i32 144 - ; CHECK-NEXT: [[PTR_ADD38:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C38]](s32) - ; CHECK-NEXT: G_STORE [[UV36]](p3), [[PTR_ADD38]](p5) :: (store (p3) into %stack.0 + 144, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C39:%[0-9]+]]:_(s32) = G_CONSTANT i32 148 - ; CHECK-NEXT: [[PTR_ADD39:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C39]](s32) - ; CHECK-NEXT: G_STORE [[UV37]](p3), [[PTR_ADD39]](p5) :: (store (p3) into %stack.0 + 148, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C40:%[0-9]+]]:_(s32) = G_CONSTANT i32 152 - ; CHECK-NEXT: [[PTR_ADD40:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C40]](s32) - ; CHECK-NEXT: G_STORE [[UV38]](p3), [[PTR_ADD40]](p5) :: (store (p3) into %stack.0 + 152, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C41:%[0-9]+]]:_(s32) = G_CONSTANT i32 156 - ; CHECK-NEXT: [[PTR_ADD41:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C41]](s32) - ; CHECK-NEXT: G_STORE [[UV39]](p3), [[PTR_ADD41]](p5) :: (store (p3) into %stack.0 + 156, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C42:%[0-9]+]]:_(s32) = G_CONSTANT i32 160 - ; CHECK-NEXT: [[PTR_ADD42:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C42]](s32) - ; CHECK-NEXT: G_STORE [[UV40]](p3), [[PTR_ADD42]](p5) :: (store (p3) into %stack.0 + 160, align 32, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C43:%[0-9]+]]:_(s32) = G_CONSTANT i32 164 - ; CHECK-NEXT: [[PTR_ADD43:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C43]](s32) - ; CHECK-NEXT: G_STORE [[UV41]](p3), [[PTR_ADD43]](p5) :: (store (p3) into %stack.0 + 164, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C44:%[0-9]+]]:_(s32) = G_CONSTANT i32 168 - ; CHECK-NEXT: [[PTR_ADD44:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C44]](s32) - ; CHECK-NEXT: G_STORE [[UV42]](p3), [[PTR_ADD44]](p5) :: (store (p3) into %stack.0 + 168, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C45:%[0-9]+]]:_(s32) = G_CONSTANT i32 172 - ; CHECK-NEXT: [[PTR_ADD45:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C45]](s32) - ; CHECK-NEXT: G_STORE [[UV43]](p3), [[PTR_ADD45]](p5) :: (store (p3) into %stack.0 + 172, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C46:%[0-9]+]]:_(s32) = G_CONSTANT i32 176 - ; CHECK-NEXT: [[PTR_ADD46:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C46]](s32) - ; CHECK-NEXT: G_STORE [[UV44]](p3), [[PTR_ADD46]](p5) :: (store (p3) into %stack.0 + 176, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C47:%[0-9]+]]:_(s32) = G_CONSTANT i32 180 - ; CHECK-NEXT: [[PTR_ADD47:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C47]](s32) - ; CHECK-NEXT: G_STORE [[UV45]](p3), [[PTR_ADD47]](p5) :: (store (p3) into %stack.0 + 180, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C48:%[0-9]+]]:_(s32) = G_CONSTANT i32 184 - ; CHECK-NEXT: [[PTR_ADD48:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C48]](s32) - ; CHECK-NEXT: G_STORE [[UV46]](p3), [[PTR_ADD48]](p5) :: (store (p3) into %stack.0 + 184, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C49:%[0-9]+]]:_(s32) = G_CONSTANT i32 188 - ; CHECK-NEXT: [[PTR_ADD49:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C49]](s32) - ; CHECK-NEXT: G_STORE [[UV47]](p3), [[PTR_ADD49]](p5) :: (store (p3) into %stack.0 + 188, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C50:%[0-9]+]]:_(s32) = G_CONSTANT i32 192 - ; CHECK-NEXT: [[PTR_ADD50:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C50]](s32) - ; CHECK-NEXT: G_STORE [[UV48]](p3), [[PTR_ADD50]](p5) :: (store (p3) into %stack.0 + 192, align 64, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C51:%[0-9]+]]:_(s32) = G_CONSTANT i32 196 - ; CHECK-NEXT: [[PTR_ADD51:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C51]](s32) - ; CHECK-NEXT: G_STORE [[UV49]](p3), [[PTR_ADD51]](p5) :: (store (p3) into %stack.0 + 196, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C52:%[0-9]+]]:_(s32) = G_CONSTANT i32 200 - ; CHECK-NEXT: [[PTR_ADD52:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C52]](s32) - ; CHECK-NEXT: G_STORE [[UV50]](p3), [[PTR_ADD52]](p5) :: (store (p3) into %stack.0 + 200, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C53:%[0-9]+]]:_(s32) = G_CONSTANT i32 204 - ; CHECK-NEXT: [[PTR_ADD53:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C53]](s32) - ; CHECK-NEXT: G_STORE [[UV51]](p3), [[PTR_ADD53]](p5) :: (store (p3) into %stack.0 + 204, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C54:%[0-9]+]]:_(s32) = G_CONSTANT i32 208 - ; CHECK-NEXT: [[PTR_ADD54:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C54]](s32) - ; CHECK-NEXT: G_STORE [[UV52]](p3), [[PTR_ADD54]](p5) :: (store (p3) into %stack.0 + 208, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C55:%[0-9]+]]:_(s32) = G_CONSTANT i32 212 - ; CHECK-NEXT: [[PTR_ADD55:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C55]](s32) - ; CHECK-NEXT: G_STORE [[UV53]](p3), [[PTR_ADD55]](p5) :: (store (p3) into %stack.0 + 212, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C56:%[0-9]+]]:_(s32) = G_CONSTANT i32 216 - ; CHECK-NEXT: [[PTR_ADD56:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C56]](s32) - ; CHECK-NEXT: G_STORE [[UV54]](p3), [[PTR_ADD56]](p5) :: (store (p3) into %stack.0 + 216, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C57:%[0-9]+]]:_(s32) = G_CONSTANT i32 220 - ; CHECK-NEXT: [[PTR_ADD57:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C57]](s32) - ; CHECK-NEXT: G_STORE [[UV55]](p3), [[PTR_ADD57]](p5) :: (store (p3) into %stack.0 + 220, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C58:%[0-9]+]]:_(s32) = G_CONSTANT i32 224 - ; CHECK-NEXT: [[PTR_ADD58:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C58]](s32) - ; CHECK-NEXT: G_STORE [[UV56]](p3), [[PTR_ADD58]](p5) :: (store (p3) into %stack.0 + 224, align 32, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C59:%[0-9]+]]:_(s32) = G_CONSTANT i32 228 - ; CHECK-NEXT: [[PTR_ADD59:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C59]](s32) - ; CHECK-NEXT: G_STORE [[UV57]](p3), [[PTR_ADD59]](p5) :: (store (p3) into %stack.0 + 228, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C60:%[0-9]+]]:_(s32) = G_CONSTANT i32 232 - ; CHECK-NEXT: [[PTR_ADD60:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C60]](s32) - ; CHECK-NEXT: G_STORE [[UV58]](p3), [[PTR_ADD60]](p5) :: (store (p3) into %stack.0 + 232, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C61:%[0-9]+]]:_(s32) = G_CONSTANT i32 236 - ; CHECK-NEXT: [[PTR_ADD61:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C61]](s32) - ; CHECK-NEXT: G_STORE [[UV59]](p3), [[PTR_ADD61]](p5) :: (store (p3) into %stack.0 + 236, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C62:%[0-9]+]]:_(s32) = G_CONSTANT i32 240 - ; CHECK-NEXT: [[PTR_ADD62:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C62]](s32) - ; CHECK-NEXT: G_STORE [[UV60]](p3), [[PTR_ADD62]](p5) :: (store (p3) into %stack.0 + 240, align 16, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C63:%[0-9]+]]:_(s32) = G_CONSTANT i32 244 - ; CHECK-NEXT: [[PTR_ADD63:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C63]](s32) - ; CHECK-NEXT: G_STORE [[UV61]](p3), [[PTR_ADD63]](p5) :: (store (p3) into %stack.0 + 244, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C64:%[0-9]+]]:_(s32) = G_CONSTANT i32 248 - ; CHECK-NEXT: [[PTR_ADD64:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C64]](s32) - ; CHECK-NEXT: G_STORE [[UV62]](p3), [[PTR_ADD64]](p5) :: (store (p3) into %stack.0 + 248, align 8, basealign 256, addrspace 5) - ; CHECK-NEXT: [[C65:%[0-9]+]]:_(s32) = G_CONSTANT i32 252 - ; CHECK-NEXT: [[PTR_ADD65:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C65]](s32) - ; CHECK-NEXT: G_STORE [[UV63]](p3), [[PTR_ADD65]](p5) :: (store (p3) into %stack.0 + 252, basealign 256, addrspace 5) - ; CHECK-NEXT: [[LOAD4:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD35]](p5) :: (load (p3) from %stack.0 + 132, addrspace 5) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[LOAD4]](p3) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p3) = COPY [[UV1]](p3) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[COPY1]](p3) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 33 %2:_(<64 x p3>) = G_LOAD %0 :: (load (<64 x p3>), align 4, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir index e8e863a964c9..ba80462858f1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir @@ -187,8 +187,9 @@ body: | ; CHECK-LABEL: name: test_extract_s32_v2s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -201,8 +202,9 @@ body: | ; CHECK-LABEL: name: test_extract_s32_v2s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = G_EXTRACT %0, 32 $vgpr0 = COPY %1 @@ -215,8 +217,9 @@ body: | ; CHECK-LABEL: name: test_extract_s32_v3s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -229,8 +232,9 @@ body: | ; CHECK-LABEL: name: test_extract_s32_v3s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_EXTRACT %0, 32 $vgpr0 = COPY %1 @@ -243,8 +247,9 @@ body: | ; CHECK-LABEL: name: test_extract_s32_v3s32_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = G_EXTRACT %0, 64 $vgpr0 = COPY %1 @@ -257,8 +262,9 @@ body: | ; CHECK-LABEL: name: test_extract_s32_v4s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<4 x s32>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[COPY1]](s32) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -313,8 +319,9 @@ body: | ; CHECK-LABEL: name: test_extract_v2s32_v4s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<4 x s32>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s32>) = G_EXTRACT %0, 0 $vgpr0_vgpr1 = COPY %1 @@ -328,8 +335,9 @@ body: | ; CHECK-LABEL: name: test_extract_v2s32_v4s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<4 x s32>), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV1]](s32), [[UV2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s32>) = G_EXTRACT %0, 32 $vgpr0_vgpr1 = COPY %1 @@ -343,8 +351,9 @@ body: | ; CHECK-LABEL: name: test_extract_v2s32_v4s32_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY]](<4 x s32>), 64 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s32>) = G_EXTRACT %0, 64 $vgpr0_vgpr1 = COPY %1 @@ -357,8 +366,9 @@ body: | ; CHECK-LABEL: name: test_extract_s64_v4s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](<4 x s32>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s64) = G_EXTRACT %0, 0 $vgpr0_vgpr1 = COPY %1 @@ -372,8 +382,9 @@ body: | ; CHECK-LABEL: name: test_extract_s64_v4s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](<4 x s32>), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV1]](s32), [[UV2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s64) = G_EXTRACT %0, 32 $vgpr0_vgpr1 = COPY %1 @@ -387,8 +398,9 @@ body: | ; CHECK-LABEL: name: test_extract_s64_v4s32_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY]](<4 x s32>), 64 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s64) = G_EXTRACT %0, 64 $vgpr0_vgpr1 = COPY %1 @@ -401,8 +413,9 @@ body: | ; CHECK-LABEL: name: test_extract_p0_v4s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(p0) = G_EXTRACT [[COPY]](<4 x s32>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](p0) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(p0) = G_EXTRACT %0, 0 $vgpr0_vgpr1 = COPY %1 @@ -416,8 +429,9 @@ body: | ; CHECK-LABEL: name: test_extract_p0_v4s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(p0) = G_EXTRACT [[COPY]](<4 x s32>), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](p0) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[UV1]](s32), [[UV2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(p0) = G_EXTRACT %0, 32 $vgpr0_vgpr1 = COPY %1 @@ -431,8 +445,9 @@ body: | ; CHECK-LABEL: name: test_extract_p0_v4s32_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(p0) = G_EXTRACT [[COPY]](<4 x s32>), 64 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](p0) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[MV]](p0) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(p0) = G_EXTRACT %0, 64 $vgpr0_vgpr1 = COPY %1 @@ -444,13 +459,8 @@ body: | bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset0 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 0 %2:_(s32) = G_ANYEXT %1 @@ -463,13 +473,8 @@ body: | bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset8 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 16 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV1]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 8 %2:_(s32) = G_ANYEXT %1 @@ -482,13 +487,8 @@ body: | bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset16 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 32 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 16 %2:_(s32) = G_ANYEXT %1 @@ -501,13 +501,8 @@ body: | bb.0: ; CHECK-LABEL: name: extract_s8_v4s8_offset24 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV1]](<2 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 48 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: $vgpr0 = COPY [[UV3]](s32) %0:_(<4 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 24 %2:_(s32) = G_ANYEXT %1 @@ -522,26 +517,7 @@ body: | ; CHECK-LABEL: name: extract_s8_v3s8_offset16 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[DEF1]](<2 x s16>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV4]](<3 x s16>), 0 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<4 x s16>), 32 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[UV2]](s32) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(s8) = G_EXTRACT %0, 16 %2:_(s32) = G_ANYEXT %1 @@ -555,21 +531,7 @@ body: | ; CHECK-LABEL: name: extract_s8_v5s1_offset4 ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF1]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s32>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[DEF2]](<2 x s16>), [[DEF2]](<2 x s16>) - ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF3]], [[UV5]](<5 x s16>), 0 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<6 x s16>), 64 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[UV4]](s32) %0:_(<5 x s1>) = G_IMPLICIT_DEF %1:_(s1) = G_EXTRACT %0, 4 %2:_(s32) = G_ANYEXT %1 @@ -582,8 +544,17 @@ body: | bb.0: ; CHECK-LABEL: name: extract_v2s16_v4s16_offset32 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<4 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 32 $vgpr0 = COPY %1 @@ -595,8 +566,17 @@ body: | bb.0: ; CHECK-LABEL: name: extract_v2s16_v6s16_offset32 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[DEF]](<6 x s16>), 32 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<6 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 32 $vgpr0 = COPY %1 @@ -823,13 +803,9 @@ body: | ; CHECK-LABEL: name: extract_s16_v3s16_offset0 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EXTRACT]](s16) - ; CHECK-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](s32) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(s16) = G_EXTRACT %0, 0 %2:_(s32) = G_ANYEXT %1 @@ -908,12 +884,17 @@ body: | ; CHECK-LABEL: name: extract_v2s16_v3s16_offset0 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 @@ -926,12 +907,17 @@ body: | ; CHECK-LABEL: name: extract_v2s16_v5s16_offset0 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<6 x s16>), 0 - ; CHECK-NEXT: $vgpr0 = COPY [[EXTRACT]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<2 x s16>) = G_EXTRACT %0, 0 $vgpr0 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir index df4112042a13..16bb69c872ea 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir @@ -231,25 +231,19 @@ body: | ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) - ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] - ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] - ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] - ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; SI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) ; VI-LABEL: name: test_fabs_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -274,25 +268,19 @@ body: | ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) - ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] - ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] - ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] - ; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]] - ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) - ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] - ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>) + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) ; VI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: test_fabs_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF @@ -309,12 +297,9 @@ body: | ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[BITCAST4]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[BITCAST2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir index e500e59bff7d..aaff85970bec 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir @@ -446,21 +446,21 @@ body: | ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32) ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<2 x s16>) = G_FADD [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] ; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(<2 x s16>) = G_FADD [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FADD]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FADD1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir index 4e3a1a74a38c..f230dbc1ae68 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fdiv.mir @@ -1382,7 +1382,6 @@ body: | ; SI-LABEL: name: test_fdiv_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) @@ -1391,6 +1390,7 @@ body: | ; SI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) @@ -1451,7 +1451,6 @@ body: | ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_fdiv_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) @@ -1460,6 +1459,7 @@ body: | ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) @@ -1492,7 +1492,6 @@ body: | ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_fdiv_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) @@ -1501,6 +1500,7 @@ body: | ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) @@ -1533,7 +1533,6 @@ body: | ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-UNSAFE-LABEL: name: test_fdiv_v3s16 ; GFX9-UNSAFE: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX9-UNSAFE-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) @@ -1542,6 +1541,7 @@ body: | ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-UNSAFE-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; GFX9-UNSAFE-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-UNSAFE-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) @@ -1562,7 +1562,6 @@ body: | ; GFX9-UNSAFE-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX10-LABEL: name: test_fdiv_v3s16 ; GFX10: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) @@ -1571,6 +1570,7 @@ body: | ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir index bcb249055a24..fed4cfcc793a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fma.mir @@ -518,27 +518,27 @@ body: | ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32) ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY2]](<6 x s16>) ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]], [[BUILD_VECTOR_TRUNC4]] ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC5]] - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[FMA]](<2 x s16>) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[FMA1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir index 3edb265c710d..acae7a10d509 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fmaxnum_s32_ieee_mode_on @@ -496,32 +496,38 @@ body: | ; GFX9-LABEL: name: test_fmaxnum_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV4]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV6]] - ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV5]] - ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV7]] - ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x s16>) - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST7]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir index 097b096f82b7..7c8934a28edd 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fminnum.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_fminnum_s32_ieee_mode_on @@ -496,32 +496,38 @@ body: | ; GFX9-LABEL: name: test_fminnum_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV4]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV6]] - ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV5]] - ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV7]] - ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE1]](<2 x s16>) - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[BITCAST2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FMINNUM_IEEE1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST7]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir index 970c9f64565f..5e86ea25e674 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmul.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9PLUS %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9PLUS %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9PLUS %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9PLUS %s --- name: test_fmul_s32 @@ -433,21 +433,21 @@ body: | ; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) - ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) ; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) ; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9PLUS-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) ; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32) ; GFX9PLUS-NEXT: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] ; GFX9PLUS-NEXT: [[FMUL1:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9PLUS-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9PLUS-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FMUL]](<2 x s16>) ; GFX9PLUS-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; GFX9PLUS-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FMUL1]](<2 x s16>) + ; GFX9PLUS-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; GFX9PLUS-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) ; GFX9PLUS-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir index ddf5ec6db384..f0d91b08d007 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir @@ -223,26 +223,9 @@ body: | ; CHECK-LABEL: name: test_freeze_s1056 ; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s512) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32), [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32) - ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[UV32]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1024) = G_FREEZE [[MV]] - ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(s1024) = G_FREEZE [[MV1]] - ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s33792) = G_MERGE_VALUES [[FREEZE]](s1024), [[FREEZE1]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1056) = G_TRUNC [[MV2]](s33792) - ; CHECK-NEXT: S_NOP 0, implicit [[TRUNC]](s1056) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s1056) = G_ANYEXT [[COPY]](s512) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s1056) = G_FREEZE [[ANYEXT]] + ; CHECK-NEXT: S_NOP 0, implicit [[FREEZE]](s1056) %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 %1:_(s1056) = G_ANYEXT %0 %2:_(s1056) = G_FREEZE %1 @@ -417,19 +400,18 @@ body: | ; CHECK-LABEL: name: test_freeze_v33s32 ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV32]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32) ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR]] ; CHECK-NEXT: [[FREEZE1:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[FREEZE2:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR2]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<528 x s32>) = G_CONCAT_VECTORS [[FREEZE]](<16 x s32>), [[FREEZE1]](<16 x s32>), [[FREEZE2]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>), [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(<33 x s32>), [[UV49:%[0-9]+]]:_(<33 x s32>), [[UV50:%[0-9]+]]:_(<33 x s32>), [[UV51:%[0-9]+]]:_(<33 x s32>), [[UV52:%[0-9]+]]:_(<33 x s32>), [[UV53:%[0-9]+]]:_(<33 x s32>), [[UV54:%[0-9]+]]:_(<33 x s32>), [[UV55:%[0-9]+]]:_(<33 x s32>), [[UV56:%[0-9]+]]:_(<33 x s32>), [[UV57:%[0-9]+]]:_(<33 x s32>), [[UV58:%[0-9]+]]:_(<33 x s32>), [[UV59:%[0-9]+]]:_(<33 x s32>), [[UV60:%[0-9]+]]:_(<33 x s32>), [[UV61:%[0-9]+]]:_(<33 x s32>), [[UV62:%[0-9]+]]:_(<33 x s32>), [[UV63:%[0-9]+]]:_(<33 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<528 x s32>) - ; CHECK-NEXT: S_NOP 0, implicit [[UV48]](<33 x s32>) + ; CHECK-NEXT: [[FREEZE2:%[0-9]+]]:_(s32) = G_FREEZE [[DEF1]] + ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<16 x s32>) + ; CHECK-NEXT: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE1]](<16 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[UV32]](s32), [[UV33]](s32), [[UV34]](s32), [[UV35]](s32), [[UV36]](s32), [[UV37]](s32), [[UV38]](s32), [[UV39]](s32), [[UV40]](s32), [[UV41]](s32), [[UV42]](s32), [[UV43]](s32), [[UV44]](s32), [[UV45]](s32), [[UV46]](s32), [[UV47]](s32), [[UV48]](s32), [[UV49]](s32), [[UV50]](s32), [[UV51]](s32), [[UV52]](s32), [[UV53]](s32), [[UV54]](s32), [[UV55]](s32), [[UV56]](s32), [[UV57]](s32), [[UV58]](s32), [[UV59]](s32), [[UV60]](s32), [[UV61]](s32), [[UV62]](s32), [[UV63]](s32), [[FREEZE2]](s32) + ; CHECK-NEXT: S_NOP 0, implicit [[BUILD_VECTOR2]](<33 x s32>) %0:_(<33 x s32>) = G_IMPLICIT_DEF %1:_(<33 x s32>) = G_FREEZE %0 S_NOP 0, implicit %1 @@ -530,16 +512,13 @@ body: | ; CHECK-LABEL: name: test_freeze_v3s8 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s8>), 0 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[INSERT]](<4 x s8>) - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[ANYEXT]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[FREEZE]](<4 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[TRUNC1]](<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s8>), [[UV1:%[0-9]+]]:_(<3 x s8>), [[UV2:%[0-9]+]]:_(<3 x s8>), [[UV3:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV]](<3 x s8>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT1]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[BUILD_VECTOR]] + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR1]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s8>) = G_TRUNC %0 %2:_(<3 x s8>) = G_FREEZE %1 @@ -569,24 +548,26 @@ body: | ; CHECK-LABEL: name: test_freeze_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV3]](<3 x s16>), 0 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[INSERT]] - ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FREEZE]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[CONCAT_VECTORS]] + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FREEZE]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR]](s32), [[BITCAST3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s16>) = G_TRUNC %0 %2:_(<3 x s16>) = G_FREEZE %1 @@ -616,28 +597,33 @@ body: | ; CHECK-LABEL: name: test_freeze_v5s16 ; CHECK: [[COPY:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<5 x s32>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>) - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s32>) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[DEF1]](<2 x s16>), [[DEF1]](<2 x s16>) - ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV5]](<5 x s16>), 0 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<6 x s16>) = G_FREEZE [[INSERT]] - ; CHECK-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FREEZE]](<6 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32), [[LSHR1]](s32), [[BITCAST2]](s32) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[BUILD_VECTOR3]](<5 x s32>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(<6 x s16>) = G_FREEZE [[CONCAT_VECTORS]] + ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FREEZE]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[BITCAST3]](s32), [[LSHR]](s32), [[BITCAST4]](s32), [[LSHR1]](s32), [[BITCAST5]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[BUILD_VECTOR]](<5 x s32>) %0:_(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 %1:_(<5 x s16>) = G_TRUNC %0 %2:_(<5 x s16>) = G_FREEZE %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir index d3d2e0294f7c..ed51693e83a3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshl.mir @@ -798,19 +798,19 @@ body: | ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[COPY6]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 @@ -845,17 +845,20 @@ body: | ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC11]](<2 x s16>) ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[LSHR5]], [[AND3]](<2 x s16>) ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL1]], [[LSHR6]] + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](<2 x s16>) + ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[BITCAST7]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR7]](s32), [[BITCAST8]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) - ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[LSHR7]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST7]](s32), [[BITCAST8]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR8]](s32), [[BITCAST9]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) + ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC14]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir index e791b23c839f..c2d2557cae8f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fshr.mir @@ -674,8 +674,7 @@ body: | ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY6]], [[C]](s32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; SI-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 @@ -701,22 +700,22 @@ body: | ; SI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[AND7]](s16) ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[ZEXT2]](s32) ; SI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C6]](s32) ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY7]](s32) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY6]](s32) ; SI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[AND8]](s16) ; SI-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[ZEXT3]](s32) ; SI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR6]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s16) = G_OR [[TRUNC2]], [[TRUNC3]] + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY7]](s32) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[COPY8]](s32) - ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY9]](s32) + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[COPY8]](s32) + ; SI-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; SI-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY11]], [[C]](s32) - ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY10]], [[SHL6]] + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY10]], [[C]](s32) + ; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY9]], [[SHL6]] ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) ; SI-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST6]], [[BITCAST8]] ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[XOR2]](<2 x s16>) @@ -730,9 +729,9 @@ body: | ; SI-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR2]](s16) ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT]], [[ZEXT4]](s32) ; SI-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C6]](s32) ; SI-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[SHL4]], [[C1]] - ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY12]](s32) + ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY11]](s32) ; SI-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[AND12]](s16) ; SI-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C1]] ; SI-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[ZEXT5]](s32) @@ -745,116 +744,76 @@ body: | ; SI-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[OR3]](s16) ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT1]], [[ZEXT6]](s32) ; SI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C6]](s32) ; SI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[SHL5]], [[C1]] - ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND17]], [[COPY13]](s32) + ; SI-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND17]], [[COPY12]](s32) ; SI-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[AND16]](s16) ; SI-NEXT: [[AND18:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C1]] ; SI-NEXT: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[ZEXT7]](s32) ; SI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR11]](s32) ; SI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[TRUNC8]], [[TRUNC9]] - ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT9]], [[C]](s32) - ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT8]], [[SHL9]] - ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) ; SI-NEXT: [[AND19:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] ; SI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] ; SI-NEXT: [[AND20:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C4]] - ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND19]](s16) - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT10]](s32) - ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32) - ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[AND19]](s16) + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[BITCAST1]], [[ZEXT8]](s32) + ; SI-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C6]](s32) ; SI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND21]], [[COPY14]](s32) - ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16) + ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND21]], [[COPY13]](s32) + ; SI-NEXT: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[AND20]](s16) ; SI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR12]], [[C1]] - ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[ZEXT11]](s32) + ; SI-NEXT: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[ZEXT9]](s32) ; SI-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR13]](s32) - ; SI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] - ; SI-NEXT: [[AND23:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; SI-NEXT: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; SI-NEXT: [[AND24:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C4]] - ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[AND23]](s16) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[DEF]], [[ZEXT12]](s32) - ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32) - ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[C2]], [[COPY15]](s32) - ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[AND24]](s16) - ; SI-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C1]] - ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[AND25]], [[ZEXT13]](s32) - ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) - ; SI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[TRUNC12]], [[TRUNC13]] - ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[COPY16]](s32) + ; SI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[TRUNC10]], [[TRUNC11]] + ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C6]](s32) + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[COPY14]](s32) + ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; SI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[COPY16]], [[C]](s32) + ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY15]], [[SHL11]] + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; SI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST7]], [[BITCAST10]] + ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>) + ; SI-NEXT: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST11]](s32) + ; SI-NEXT: [[AND23:%[0-9]+]]:_(s16) = G_AND [[TRUNC12]], [[C4]] + ; SI-NEXT: [[XOR7:%[0-9]+]]:_(s16) = G_XOR [[TRUNC12]], [[C5]] + ; SI-NEXT: [[AND24:%[0-9]+]]:_(s16) = G_AND [[XOR7]], [[C4]] + ; SI-NEXT: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[AND23]](s16) + ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR7]](s16) + ; SI-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT10]](s32) + ; SI-NEXT: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32) ; SI-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) - ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[COPY17]](s32) - ; SI-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C]](s32) - ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY19]], [[SHL14]] - ; SI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; SI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST7]], [[BITCAST11]] - ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) - ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST12]](s32) + ; SI-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[SHL10]], [[C1]] + ; SI-NEXT: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND25]], [[COPY17]](s32) + ; SI-NEXT: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[AND24]](s16) + ; SI-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[LSHR14]], [[C1]] + ; SI-NEXT: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[AND26]], [[ZEXT11]](s32) + ; SI-NEXT: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR15]](s32) + ; SI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[TRUNC13]], [[TRUNC14]] + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST12]], [[C]](s32) - ; SI-NEXT: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR16]](s32) - ; SI-NEXT: [[AND26:%[0-9]+]]:_(s16) = G_AND [[TRUNC14]], [[C4]] - ; SI-NEXT: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC14]], [[C5]] - ; SI-NEXT: [[AND27:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C4]] - ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[AND26]](s16) - ; SI-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[OR8]](s16) - ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT2]], [[ZEXT14]](s32) - ; SI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32) - ; SI-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[SHL12]], [[C1]] - ; SI-NEXT: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND28]], [[COPY21]](s32) - ; SI-NEXT: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[AND27]](s16) - ; SI-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[LSHR17]], [[C1]] - ; SI-NEXT: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[AND29]], [[ZEXT15]](s32) - ; SI-NEXT: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR18]](s32) - ; SI-NEXT: [[OR11:%[0-9]+]]:_(s16) = G_OR [[TRUNC16]], [[TRUNC17]] - ; SI-NEXT: [[AND30:%[0-9]+]]:_(s16) = G_AND [[TRUNC15]], [[C4]] - ; SI-NEXT: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC15]], [[C5]] - ; SI-NEXT: [[AND31:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C4]] - ; SI-NEXT: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[AND30]](s16) - ; SI-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[OR9]](s16) - ; SI-NEXT: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[ANYEXT3]], [[ZEXT16]](s32) - ; SI-NEXT: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[SHL16]](s32) - ; SI-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C6]](s32) - ; SI-NEXT: [[AND32:%[0-9]+]]:_(s32) = G_AND [[SHL13]], [[C1]] - ; SI-NEXT: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND32]], [[COPY22]](s32) - ; SI-NEXT: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[AND31]](s16) - ; SI-NEXT: [[AND33:%[0-9]+]]:_(s32) = G_AND [[LSHR19]], [[C1]] - ; SI-NEXT: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND33]], [[ZEXT17]](s32) - ; SI-NEXT: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR20]](s32) - ; SI-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[TRUNC18]], [[TRUNC19]] - ; SI-NEXT: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) - ; SI-NEXT: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) - ; SI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT19]], [[C]](s32) - ; SI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[ZEXT18]], [[SHL17]] - ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>) - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C]](s32) - ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[AND34:%[0-9]+]]:_(s32) = G_AND [[BITCAST14]], [[C1]] - ; SI-NEXT: [[AND35:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]] - ; SI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND35]], [[C]](s32) - ; SI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND34]], [[SHL18]] - ; SI-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) - ; SI-NEXT: [[AND36:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]] - ; SI-NEXT: [[AND37:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]] - ; SI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND37]], [[C]](s32) - ; SI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND36]], [[SHL19]] - ; SI-NEXT: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32) - ; SI-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) - ; SI-NEXT: $vgpr1 = COPY [[BITCAST17]](<2 x s16>) - ; SI-NEXT: $vgpr2 = COPY [[BITCAST18]](<2 x s16>) + ; SI-NEXT: [[BITCAST13:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; SI-NEXT: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; SI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ZEXT13]], [[C]](s32) + ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT12]], [[SHL13]] + ; SI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; SI-NEXT: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; SI-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[BITCAST12]], [[C1]] + ; SI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[C]](s32) + ; SI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT14]], [[SHL14]] + ; SI-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; SI-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C1]] + ; SI-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[BITCAST13]], [[C1]] + ; SI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[C]](s32) + ; SI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND28]], [[SHL15]] + ; SI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32) + ; SI-NEXT: $vgpr0 = COPY [[BITCAST14]](<2 x s16>) + ; SI-NEXT: $vgpr1 = COPY [[BITCAST15]](<2 x s16>) + ; SI-NEXT: $vgpr2 = COPY [[BITCAST16]](<2 x s16>) ; VI-LABEL: name: test_fshr_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -869,7 +828,6 @@ body: | ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -932,75 +890,52 @@ body: | ; VI-NEXT: [[LSHR10:%[0-9]+]]:_(s16) = G_LSHR [[SHL5]], [[C3]](s16) ; VI-NEXT: [[LSHR11:%[0-9]+]]:_(s16) = G_LSHR [[LSHR10]], [[AND10]](s16) ; VI-NEXT: [[OR6:%[0-9]+]]:_(s16) = G_OR [[SHL8]], [[LSHR11]] - ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) - ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) - ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL9]] - ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) ; VI-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] ; VI-NEXT: [[XOR5:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] ; VI-NEXT: [[AND12:%[0-9]+]]:_(s16) = G_AND [[XOR5]], [[C4]] - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND11]](s16) + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[AND11]](s16) ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC5]], [[C3]](s16) ; VI-NEXT: [[LSHR13:%[0-9]+]]:_(s16) = G_LSHR [[LSHR12]], [[AND12]](s16) - ; VI-NEXT: [[OR8:%[0-9]+]]:_(s16) = G_OR [[SHL10]], [[LSHR13]] - ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[C3]], [[C4]] - ; VI-NEXT: [[XOR6:%[0-9]+]]:_(s16) = G_XOR [[C3]], [[C5]] - ; VI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[XOR6]], [[C4]] - ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[DEF]], [[AND13]](s16) - ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[DEF]], [[C3]](s16) - ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[LSHR14]], [[AND14]](s16) - ; VI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[SHL11]], [[LSHR15]] - ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C3]](s16) - ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[DEF]], [[C3]](s16) + ; VI-NEXT: [[OR7:%[0-9]+]]:_(s16) = G_OR [[SHL9]], [[LSHR13]] + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[TRUNC5]], [[C3]](s16) ; VI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32) - ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[COPY7]], [[SHL14]] - ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) - ; VI-NEXT: [[XOR7:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST7]], [[BITCAST11]] - ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[XOR7]](<2 x s16>) - ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST12]](s32) + ; VI-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[COPY8]], [[C]](s32) + ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY7]], [[SHL11]] + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; VI-NEXT: [[XOR6:%[0-9]+]]:_(<2 x s16>) = G_XOR [[BITCAST7]], [[BITCAST10]] + ; VI-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[XOR6]](<2 x s16>) + ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST11]](s32) + ; VI-NEXT: [[AND13:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C4]] + ; VI-NEXT: [[XOR7:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C5]] + ; VI-NEXT: [[AND14:%[0-9]+]]:_(s16) = G_AND [[XOR7]], [[C4]] + ; VI-NEXT: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[OR7]], [[AND13]](s16) + ; VI-NEXT: [[LSHR14:%[0-9]+]]:_(s16) = G_LSHR [[SHL10]], [[C3]](s16) + ; VI-NEXT: [[LSHR15:%[0-9]+]]:_(s16) = G_LSHR [[LSHR14]], [[AND14]](s16) + ; VI-NEXT: [[OR9:%[0-9]+]]:_(s16) = G_OR [[SHL12]], [[LSHR15]] + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST12:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST12]], [[C]](s32) - ; VI-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR16]](s32) - ; VI-NEXT: [[AND15:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C4]] - ; VI-NEXT: [[XOR8:%[0-9]+]]:_(s16) = G_XOR [[TRUNC8]], [[C5]] - ; VI-NEXT: [[AND16:%[0-9]+]]:_(s16) = G_AND [[XOR8]], [[C4]] - ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s16) = G_SHL [[OR8]], [[AND15]](s16) - ; VI-NEXT: [[LSHR17:%[0-9]+]]:_(s16) = G_LSHR [[SHL12]], [[C3]](s16) - ; VI-NEXT: [[LSHR18:%[0-9]+]]:_(s16) = G_LSHR [[LSHR17]], [[AND16]](s16) - ; VI-NEXT: [[OR11:%[0-9]+]]:_(s16) = G_OR [[SHL15]], [[LSHR18]] - ; VI-NEXT: [[AND17:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C4]] - ; VI-NEXT: [[XOR9:%[0-9]+]]:_(s16) = G_XOR [[TRUNC9]], [[C5]] - ; VI-NEXT: [[AND18:%[0-9]+]]:_(s16) = G_AND [[XOR9]], [[C4]] - ; VI-NEXT: [[SHL16:%[0-9]+]]:_(s16) = G_SHL [[OR9]], [[AND17]](s16) - ; VI-NEXT: [[LSHR19:%[0-9]+]]:_(s16) = G_LSHR [[SHL13]], [[C3]](s16) - ; VI-NEXT: [[LSHR20:%[0-9]+]]:_(s16) = G_LSHR [[LSHR19]], [[AND18]](s16) - ; VI-NEXT: [[OR12:%[0-9]+]]:_(s16) = G_OR [[SHL16]], [[LSHR20]] - ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) - ; VI-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) - ; VI-NEXT: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) - ; VI-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL17]] - ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST13]](<2 x s16>) - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST15]], [[C]](s32) - ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; VI-NEXT: [[AND19:%[0-9]+]]:_(s32) = G_AND [[BITCAST14]], [[C1]] - ; VI-NEXT: [[AND20:%[0-9]+]]:_(s32) = G_AND [[BITCAST15]], [[C1]] - ; VI-NEXT: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C]](s32) - ; VI-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL18]] - ; VI-NEXT: [[BITCAST17:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR14]](s32) - ; VI-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[LSHR21]], [[C1]] - ; VI-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]] - ; VI-NEXT: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C]](s32) - ; VI-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL19]] - ; VI-NEXT: [[BITCAST18:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR15]](s32) - ; VI-NEXT: $vgpr0 = COPY [[BITCAST10]](<2 x s16>) - ; VI-NEXT: $vgpr1 = COPY [[BITCAST17]](<2 x s16>) - ; VI-NEXT: $vgpr2 = COPY [[BITCAST18]](<2 x s16>) + ; VI-NEXT: [[BITCAST13:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; VI-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL13]] + ; VI-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; VI-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[BITCAST12]], [[C1]] + ; VI-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C]](s32) + ; VI-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL14]] + ; VI-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; VI-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[LSHR16]], [[C1]] + ; VI-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[BITCAST13]], [[C1]] + ; VI-NEXT: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32) + ; VI-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL15]] + ; VI-NEXT: [[BITCAST16:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32) + ; VI-NEXT: $vgpr0 = COPY [[BITCAST14]](<2 x s16>) + ; VI-NEXT: $vgpr1 = COPY [[BITCAST15]](<2 x s16>) + ; VI-NEXT: $vgpr2 = COPY [[BITCAST16]](<2 x s16>) ; GFX9-LABEL: name: test_fshr_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 @@ -1012,19 +947,19 @@ body: | ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[COPY6]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[COPY7]](s32) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 @@ -1059,17 +994,20 @@ body: | ; GFX9-NEXT: [[SHL3:%[0-9]+]]:_(<2 x s16>) = G_SHL [[SHL2]], [[AND3]](<2 x s16>) ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC3]], [[AND2]](<2 x s16>) ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(<2 x s16>) = G_OR [[SHL3]], [[LSHR4]] + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR]](<2 x s16>) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[OR1]](<2 x s16>) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[BITCAST7]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR5]](s32), [[BITCAST8]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[OR]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) - ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC12:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[LSHR5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC13:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST7]](s32), [[BITCAST8]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC14:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR6]](s32), [[BITCAST9]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC12]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC13]](<2 x s16>) + ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC14]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = COPY $vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir index 00eed0e2a64c..7d6db970ad6b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-implicit-def.mir @@ -327,11 +327,11 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v33s32 ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<16 x s32>) ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: G_STORE [[UV]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) - ; CHECK-NEXT: G_STORE [[UV16]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) + ; CHECK-NEXT: G_STORE [[DEF1]](s32), [[COPY]](p1) :: (volatile store (s32), addrspace 1) %0:_(<33 x s32>) = G_IMPLICIT_DEF %1:_(s32), %2:_(s32), %3:_(s32), %4:_(s32), %5:_(s32), %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32), %10:_(s32), %11:_(s32), %12:_(s32), %13:_(s32), %14:_(s32), %15:_(s32), %16:_(s32), %17:_(s32), %18:_(s32), %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32), %23:_(s32), %24:_(s32), %25:_(s32), %26:_(s32), %27:_(s32), %28:_(s32), %29:_(s32), %30:_(s32), %31:_(s32), %32:_(s32), %33:_(s32) = G_UNMERGE_VALUES %0 %34:_(p1) = COPY $vgpr0_vgpr1 @@ -428,11 +428,28 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v3s16 ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<4 x s16>) = G_IMPLICIT_DEF %2:_(<4 x s16>) = G_INSERT %1, %0, 0 @@ -458,12 +475,42 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v5s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF2]], [[UV]](<5 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<8 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<8 x s16>) = G_IMPLICIT_DEF %2:_(<8 x s16>) = G_INSERT %1, %0, 0 @@ -478,8 +525,40 @@ body: | ; CHECK-LABEL: name: test_implicit_def_v6s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF1]], [[DEF]](<6 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<8 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>) %0:_(<6 x s16>) = G_IMPLICIT_DEF %1:_(<8 x s16>) = G_IMPLICIT_DEF %2:_(<8 x s16>) = G_INSERT %1, %0, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir index f5b89b7e9de9..1f037cc5775a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir @@ -10,8 +10,9 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_0_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s32) = G_CONSTANT i32 0 @@ -28,8 +29,9 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_1_v2s32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[COPY1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s32) = G_CONSTANT i32 1 @@ -107,8 +109,9 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_0_v16s64 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<16 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<16 x s64>) = G_INSERT [[DEF]], [[COPY]](s64), 0 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT]](<16 x s64>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64), [[UV12:%[0-9]+]]:_(s64), [[UV13:%[0-9]+]]:_(s64), [[UV14:%[0-9]+]]:_(s64), [[UV15:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<16 x s64>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[UV1]](s64), [[UV2]](s64), [[UV3]](s64), [[UV4]](s64), [[UV5]](s64), [[UV6]](s64), [[UV7]](s64), [[UV8]](s64), [[UV9]](s64), [[UV10]](s64), [[UV11]](s64), [[UV12]](s64), [[UV13]](s64), [[UV14]](s64), [[UV15]](s64) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<16 x s64>) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(<16 x s64>) = G_IMPLICIT_DEF %2:_(s32) = G_CONSTANT i32 0 @@ -126,8 +129,9 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_0_v2s32_s8 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s8) = G_CONSTANT i8 0 @@ -145,8 +149,9 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_0_v2i8_i32 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[DEF]], [[COPY]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[UV1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(s32) = COPY $vgpr0 %1:_(s8) = G_TRUNC %0 %2:_(<2 x s8>) = G_IMPLICIT_DEF @@ -166,8 +171,9 @@ body: | ; CHECK-LABEL: name: insert_vector_elt_v4s32_s32_look_through_trunc_0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = COPY $vgpr4 %2:_(s64) = G_CONSTANT i64 0 @@ -305,55 +311,62 @@ body: | ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD2]](p1) :: (load (<16 x s32>) from unknown-address + 192, align 4, addrspace 4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12345 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[LOAD2]](<16 x s32>), [[LOAD3]](<16 x s32>) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<32 x s32>) = G_INSERT [[CONCAT_VECTORS]], [[C3]](s32), 32 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD2]](<16 x s32>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD3]](<16 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[C3]](s32), [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR7:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>), [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s32>), [[UV5:%[0-9]+]]:_(<4 x s32>), [[UV6:%[0-9]+]]:_(<4 x s32>), [[UV7:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<4 x s32>), [[UV9:%[0-9]+]]:_(<4 x s32>), [[UV10:%[0-9]+]]:_(<4 x s32>), [[UV11:%[0-9]+]]:_(<4 x s32>), [[UV12:%[0-9]+]]:_(<4 x s32>), [[UV13:%[0-9]+]]:_(<4 x s32>), [[UV14:%[0-9]+]]:_(<4 x s32>), [[UV15:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[INSERT]](<32 x s32>) - ; CHECK-NEXT: G_STORE [[UV]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(<4 x s32>), [[UV33:%[0-9]+]]:_(<4 x s32>), [[UV34:%[0-9]+]]:_(<4 x s32>), [[UV35:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD]](<16 x s32>) + ; CHECK-NEXT: [[UV36:%[0-9]+]]:_(<4 x s32>), [[UV37:%[0-9]+]]:_(<4 x s32>), [[UV38:%[0-9]+]]:_(<4 x s32>), [[UV39:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>) + ; CHECK-NEXT: G_STORE [[UV32]](<4 x s32>), [[COPY1]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C4]](s64) - ; CHECK-NEXT: G_STORE [[UV1]](<4 x s32>), [[PTR_ADD3]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV33]](<4 x s32>), [[PTR_ADD3]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C5]](s64) - ; CHECK-NEXT: G_STORE [[UV2]](<4 x s32>), [[PTR_ADD4]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV34]](<4 x s32>), [[PTR_ADD4]](p1) :: (store (<4 x s32>) into unknown-address + 32, align 4, addrspace 1) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 ; CHECK-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C6]](s64) - ; CHECK-NEXT: G_STORE [[UV3]](<4 x s32>), [[PTR_ADD5]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV35]](<4 x s32>), [[PTR_ADD5]](p1) :: (store (<4 x s32>) into unknown-address + 48, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; CHECK-NEXT: G_STORE [[UV4]](<4 x s32>), [[PTR_ADD6]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV36]](<4 x s32>), [[PTR_ADD6]](p1) :: (store (<4 x s32>) into unknown-address + 64, align 4, addrspace 1) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 ; CHECK-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C7]](s64) - ; CHECK-NEXT: G_STORE [[UV5]](<4 x s32>), [[PTR_ADD7]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV37]](<4 x s32>), [[PTR_ADD7]](p1) :: (store (<4 x s32>) into unknown-address + 80, align 4, addrspace 1) ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 ; CHECK-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C8]](s64) - ; CHECK-NEXT: G_STORE [[UV6]](<4 x s32>), [[PTR_ADD8]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV38]](<4 x s32>), [[PTR_ADD8]](p1) :: (store (<4 x s32>) into unknown-address + 96, align 4, addrspace 1) ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 ; CHECK-NEXT: [[PTR_ADD9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C9]](s64) - ; CHECK-NEXT: G_STORE [[UV7]](<4 x s32>), [[PTR_ADD9]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[UV39]](<4 x s32>), [[PTR_ADD9]](p1) :: (store (<4 x s32>) into unknown-address + 112, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD10:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C1]](s64) - ; CHECK-NEXT: G_STORE [[UV8]](<4 x s32>), [[PTR_ADD10]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[PTR_ADD10]](p1) :: (store (<4 x s32>) into unknown-address + 128, align 4, addrspace 1) ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 144 ; CHECK-NEXT: [[PTR_ADD11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C10]](s64) - ; CHECK-NEXT: G_STORE [[UV9]](<4 x s32>), [[PTR_ADD11]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD11]](p1) :: (store (<4 x s32>) into unknown-address + 144, align 4, addrspace 1) ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 160 ; CHECK-NEXT: [[PTR_ADD12:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C11]](s64) - ; CHECK-NEXT: G_STORE [[UV10]](<4 x s32>), [[PTR_ADD12]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[PTR_ADD12]](p1) :: (store (<4 x s32>) into unknown-address + 160, align 4, addrspace 1) ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 176 ; CHECK-NEXT: [[PTR_ADD13:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C12]](s64) - ; CHECK-NEXT: G_STORE [[UV11]](<4 x s32>), [[PTR_ADD13]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR3]](<4 x s32>), [[PTR_ADD13]](p1) :: (store (<4 x s32>) into unknown-address + 176, align 4, addrspace 1) ; CHECK-NEXT: [[PTR_ADD14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C2]](s64) - ; CHECK-NEXT: G_STORE [[UV12]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR4]](<4 x s32>), [[PTR_ADD14]](p1) :: (store (<4 x s32>) into unknown-address + 192, align 4, addrspace 1) ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 208 ; CHECK-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C13]](s64) - ; CHECK-NEXT: G_STORE [[UV13]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR5]](<4 x s32>), [[PTR_ADD15]](p1) :: (store (<4 x s32>) into unknown-address + 208, align 4, addrspace 1) ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 224 ; CHECK-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C14]](s64) - ; CHECK-NEXT: G_STORE [[UV14]](<4 x s32>), [[PTR_ADD16]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR6]](<4 x s32>), [[PTR_ADD16]](p1) :: (store (<4 x s32>) into unknown-address + 224, align 4, addrspace 1) ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s64) = G_CONSTANT i64 240 ; CHECK-NEXT: [[PTR_ADD17:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C15]](s64) - ; CHECK-NEXT: G_STORE [[UV15]](<4 x s32>), [[PTR_ADD17]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1) + ; CHECK-NEXT: G_STORE [[BUILD_VECTOR7]](<4 x s32>), [[PTR_ADD17]](p1) :: (store (<4 x s32>) into unknown-address + 240, align 4, addrspace 1) %0:_(p1) = COPY $sgpr0_sgpr1 %1:_(s32) = G_CONSTANT i32 33 %2:_(<64 x s32>) = G_LOAD %0 :: (load (<64 x s32>), align 4, addrspace 4) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir index 1a8b37cf7d4c..b6e4c91dec08 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert.mir @@ -409,8 +409,9 @@ body: | ; CHECK-LABEL: name: test_insert_v2s32_s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(<2 x s32>) = G_INSERT %0, %1, 0 @@ -425,8 +426,9 @@ body: | ; CHECK-LABEL: name: test_insert_v2s32_s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[COPY1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(<2 x s32>) = G_INSERT %0, %1, 32 @@ -441,8 +443,9 @@ body: | ; CHECK-LABEL: name: test_insert_v3s32_s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32), [[UV2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = COPY $vgpr3 %2:_(<3 x s32>) = G_INSERT %0, %1, 0 @@ -457,8 +460,9 @@ body: | ; CHECK-LABEL: name: test_insert_v3s32_s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[COPY1]](s32), [[UV2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = COPY $vgpr3 %2:_(<3 x s32>) = G_INSERT %0, %1, 32 @@ -473,8 +477,9 @@ body: | ; CHECK-LABEL: name: test_insert_v3s32_s32_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<3 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 64 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[INSERT]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[COPY1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(s32) = COPY $vgpr3 %2:_(<3 x s32>) = G_INSERT %0, %1, 64 @@ -489,8 +494,9 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = COPY $vgpr4 %2:_(<4 x s32>) = G_INSERT %0, %1, 0 @@ -505,8 +511,9 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[COPY1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = COPY $vgpr4 %2:_(<4 x s32>) = G_INSERT %0, %1, 32 @@ -521,8 +528,9 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s32_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 64 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[COPY1]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = COPY $vgpr4 %2:_(<4 x s32>) = G_INSERT %0, %1, 64 @@ -537,8 +545,9 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s32_offset96 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 96 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[COPY1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = COPY $vgpr4 %2:_(<4 x s32>) = G_INSERT %0, %1, 96 @@ -553,8 +562,10 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s64_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s64), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s64) = COPY $vgpr4_vgpr5 %2:_(<4 x s32>) = G_INSERT %0, %1, 0 @@ -569,8 +580,10 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s64_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s64), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV4]](s32), [[UV5]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s64) = COPY $vgpr4_vgpr5 %2:_(<4 x s32>) = G_INSERT %0, %1, 32 @@ -585,8 +598,10 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s64_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s64), 64 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV4]](s32), [[UV5]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s64) = COPY $vgpr4_vgpr5 %2:_(<4 x s32>) = G_INSERT %0, %1, 64 @@ -601,8 +616,10 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s96_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s96), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s96) = COPY $vgpr4_vgpr5_vgpr6 %2:_(<4 x s32>) = G_INSERT %0, %1, 0 @@ -617,8 +634,10 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_s96_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s96), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s96) = COPY $vgpr4_vgpr5_vgpr6 %2:_(<4 x s32>) = G_INSERT %0, %1, 32 @@ -633,8 +652,10 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_v2s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<2 x s32>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV2]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s32>) = COPY $vgpr4_vgpr5 %2:_(<4 x s32>) = G_INSERT %0, %1, 0 @@ -649,8 +670,10 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_v2s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<2 x s32>), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV4]](s32), [[UV5]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s32>) = COPY $vgpr4_vgpr5 %2:_(<4 x s32>) = G_INSERT %0, %1, 32 @@ -665,8 +688,10 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_v2s32_offset64 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr4_vgpr5 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<2 x s32>), 64 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV4]](s32), [[UV5]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s32>) = COPY $vgpr4_vgpr5 %2:_(<4 x s32>) = G_INSERT %0, %1, 64 @@ -681,8 +706,10 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_v3s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<3 x s32>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV3]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 %2:_(<4 x s32>) = G_INSERT %0, %1, 0 @@ -697,8 +724,10 @@ body: | ; CHECK-LABEL: name: test_insert_v4s32_v3s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](<3 x s32>), 32 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>) %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 %2:_(<4 x s32>) = G_INSERT %0, %1, 32 @@ -763,11 +792,13 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65536 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[AND]] + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -813,11 +844,11 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s32) - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -834,16 +865,29 @@ body: | ; CHECK-LABEL: name: test_insert_v3s16_s16_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 0 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 %2:_(s32) = COPY $vgpr2 @@ -861,16 +905,28 @@ body: | ; CHECK-LABEL: name: test_insert_v3s16_s16_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 16 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 %2:_(s32) = COPY $vgpr2 @@ -888,16 +944,28 @@ body: | ; CHECK-LABEL: name: test_insert_v3s16_s16_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[TRUNC]](s16), 32 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 %2:_(s32) = COPY $vgpr2 @@ -915,15 +983,29 @@ body: | ; CHECK-LABEL: name: test_insert_v3s16_v2s16_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](<2 x s16>), 0 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 %2:_(<2 x s16>) = COPY $vgpr2 @@ -940,15 +1022,29 @@ body: | ; CHECK-LABEL: name: test_insert_v3s16_v2s16_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](<2 x s16>), 16 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 %2:_(<2 x s16>) = COPY $vgpr2 @@ -965,15 +1061,28 @@ body: | ; CHECK-LABEL: name: test_insert_v3s16_s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](s32), 0 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 %2:_(s32) = COPY $vgpr2 @@ -990,15 +1099,28 @@ body: | ; CHECK-LABEL: name: test_insert_v3s16_s32_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[INSERT]], [[COPY1]](s32), 16 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[INSERT1]](<4 x s16>), [[DEF]](<4 x s16>), [[DEF]](<4 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<3 x s16>) = G_EXTRACT %0, 0 %2:_(s32) = COPY $vgpr2 @@ -1016,9 +1138,25 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_s16_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %1 @@ -1034,9 +1172,24 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_s16_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 16 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %1 @@ -1052,9 +1205,25 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_s16_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %1 @@ -1070,9 +1239,24 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_s16_offset48 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[TRUNC]](s16), 48 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(s16) = G_TRUNC %1 @@ -1088,8 +1272,25 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_v2s16_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](<2 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = COPY $vgpr2 %2:_(<4 x s16>) = G_INSERT %0, %1, 0 @@ -1104,8 +1305,26 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_v2s16_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](<2 x s16>), 16 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = COPY $vgpr2 %2:_(<4 x s16>) = G_INSERT %0, %1, 16 @@ -1120,8 +1339,25 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_v2s16_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](<2 x s16>), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<2 x s16>) = COPY $vgpr2 %2:_(<4 x s16>) = G_INSERT %0, %1, 32 @@ -1136,9 +1372,27 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_v3s16_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<3 x s16>) = G_EXTRACT %1, 0 @@ -1154,9 +1408,26 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_v3s16_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[EXTRACT]](<3 x s16>), 16 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<3 x s16>) = G_EXTRACT %1, 0 @@ -1172,8 +1443,24 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_s32_offset0 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](s32), 0 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(<4 x s16>) = G_INSERT %0, %1, 0 @@ -1188,8 +1475,25 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_s32_offset16 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](s32), 16 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(<4 x s16>) = G_INSERT %0, %1, 16 @@ -1204,8 +1508,24 @@ body: | ; CHECK-LABEL: name: test_insert_v4s16_s32_offset32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[COPY]], [[COPY1]](s32), 32 - ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(s32) = COPY $vgpr2 %2:_(<4 x s16>) = G_INSERT %0, %1, 32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll index e814e66d5d82..271b7b23380a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll @@ -117,23 +117,20 @@ define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) - ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C1]] - ; UNPACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16 ; PACKED: bb.1 (%ir-block.0): @@ -153,19 +150,23 @@ define amdgpu_ps <3 x half> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s16>) - ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; PACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; PACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -363,23 +364,20 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8) ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>) ; UNPACKED-NEXT: G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C1]] - ; UNPACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]] + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16 ; PACKED: bb.1 (%ir-block.0): @@ -402,20 +400,24 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s, ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32) - ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>) - ; PACKED-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>) + ; PACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; PACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) + ; PACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %tex = extractvalue { <3 x half>, i32 } %res, 0 @@ -607,23 +609,19 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc, ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) - ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): @@ -642,19 +640,20 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1100(<8 x i32> inreg %rsrc, ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") - ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -679,22 +678,17 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc, ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C1]] - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[AMDGPU_INTRIN_IMAGE_LOAD]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): @@ -713,19 +707,20 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_1000(<8 x i32> inreg %rsrc, ; PACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s16>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") - ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[DEF]](<2 x s16>) - ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) @@ -740,27 +735,24 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc, ; UNPACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; UNPACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; UNPACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; UNPACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) + ; UNPACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_v3f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): @@ -769,27 +761,24 @@ define amdgpu_ps <3 x half> @image_load_v3f16_dmask_0000(<8 x i32> inreg %rsrc, ; PACKED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; PACKED-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; PACKED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; PACKED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; PACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; PACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; PACKED-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %tex @@ -1187,23 +1176,19 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>) ; UNPACKED-NEXT: G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) + ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C1]](s32) ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1100 ; PACKED: bb.1 (%ir-block.0): @@ -1225,20 +1210,21 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource") ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) - ; PACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) @@ -1269,22 +1255,17 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_1000 ; PACKED: bb.1 (%ir-block.0): @@ -1306,20 +1287,21 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) - ; PACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) @@ -1350,22 +1332,17 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs ; UNPACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; UNPACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; UNPACKED-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] - ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; UNPACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]] + ; UNPACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; UNPACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; UNPACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32) ; UNPACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; UNPACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] - ; UNPACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; UNPACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) - ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; UNPACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL]] + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; UNPACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; UNPACKED-NEXT: $vgpr1 = COPY [[BITCAST1]](<2 x s16>) ; UNPACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; PACKED-LABEL: name: image_load_tfe_v3f16_dmask_0000 ; PACKED: bb.1 (%ir-block.0): @@ -1387,20 +1364,21 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs ; PACKED-NEXT: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource") ; PACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>) ; PACKED-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32) - ; PACKED-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) - ; PACKED-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[DEF1]](<2 x s16>) - ; PACKED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST]](<2 x s16>) ; PACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; PACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; PACKED-NEXT: G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1) ; PACKED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; PACKED-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; PACKED-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; PACKED-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; PACKED-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) + ; PACKED-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; PACKED-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; PACKED-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; PACKED-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] + ; PACKED-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; PACKED-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; PACKED-NEXT: $vgpr1 = COPY [[BITCAST3]](<2 x s16>) ; PACKED-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll index 1d84858080da..862d88de3b5c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=UNPACKED %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX81 %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s -; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=legalizer -o - %s | FileCheck -check-prefix=UNPACKED %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX81 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, half %data) { ; UNPACKED-LABEL: name: image_store_f16 @@ -195,14 +195,12 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; UNPACKED-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; UNPACKED-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; UNPACKED-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; UNPACKED-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; UNPACKED-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) - ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; UNPACKED-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) + ; UNPACKED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) ; UNPACKED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; UNPACKED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; UNPACKED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) ; UNPACKED-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[LSHR]](s32), [[UV1]](s32) + ; UNPACKED-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) ; UNPACKED-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BUILD_VECTOR2]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) ; UNPACKED-NEXT: S_ENDPGM 0 ; GFX81-LABEL: name: image_store_v3f16 @@ -222,29 +220,27 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; GFX81-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX81-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX81-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX81-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX81-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX81-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) + ; GFX81-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) ; GFX81-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX81-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX81-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX81-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) ; GFX81-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) ; GFX81-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX81-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C1]] + ; GFX81-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; GFX81-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; GFX81-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX81-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX81-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX81-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C1]] + ; GFX81-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX81-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; GFX81-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX81-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) ; GFX81-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX81-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX81-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX81-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL1]] - ; GFX81-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX81-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) - ; GFX81-NEXT: [[BITCAST4:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST4]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) + ; GFX81-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX81-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) + ; GFX81-NEXT: [[BITCAST5:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) + ; GFX81-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[BITCAST5]](<3 x s32>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) ; GFX81-NEXT: S_ENDPGM 0 ; GFX9-LABEL: name: image_store_v3f16 ; GFX9: bb.1 (%ir-block.0): @@ -263,19 +259,16 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[DEF1]](s32) - ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV3]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) ; GFX9-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: image_store_v3f16 ; GFX10: bb.1 (%ir-block.0): @@ -294,19 +287,16 @@ define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) + ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY10]](<2 x s16>) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32) - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[DEF1]](s32) - ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-NEXT: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY11]](<2 x s16>) ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[UV3]](<3 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX10-NEXT: G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.2d), [[CONCAT_VECTORS]](<4 x s16>), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable store (<3 x s16>) into custom "ImageResource", align 8) ; GFX10-NEXT: S_ENDPGM 0 call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir index 97d0d92cbe3a..27b6aaed74a3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.s.buffer.load.mir @@ -30,10 +30,9 @@ body: | ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN-NEXT: S_ENDPGM 0, implicit [[UV]](<3 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -51,10 +50,9 @@ body: | ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x p3>) = G_BITCAST [[UV]](<3 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x p3>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GCN-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<3 x p3>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 @@ -73,10 +71,9 @@ body: | ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[UV]](<3 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; GCN-NEXT: S_ENDPGM 0, implicit [[BITCAST]](<6 x s16>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 @@ -95,10 +92,9 @@ body: | ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<8 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 4) - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<24 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>), [[DEF]](<8 x s32>), [[DEF]](<8 x s32>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<6 x s32>), [[UV1:%[0-9]+]]:_(<6 x s32>), [[UV2:%[0-9]+]]:_(<6 x s32>), [[UV3:%[0-9]+]]:_(<6 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<24 x s32>) - ; GCN-NEXT: S_ENDPGM 0, implicit [[UV]](<6 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<8 x s32>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) + ; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<6 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -116,10 +112,9 @@ body: | ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s64>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s192), align 4) - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s64>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s64>), [[DEF]](<4 x s64>), [[DEF]](<4 x s64>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s64>), [[UV1:%[0-9]+]]:_(<3 x s64>), [[UV2:%[0-9]+]]:_(<3 x s64>), [[UV3:%[0-9]+]]:_(<3 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s64>) - ; GCN-NEXT: S_ENDPGM 0, implicit [[UV]](<3 x s64>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s64>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64) + ; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s64>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s64>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -137,24 +132,21 @@ body: | ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](<3 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C1]](s32) + ; GCN-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C1]](s32) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GCN-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C2]](s32) + ; GCN-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C2]](s32) ; GCN-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 - ; GCN-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV4]], [[C3]](s32) - ; GCN-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C1]](s32) - ; GCN-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C2]](s32) - ; GCN-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32) - ; GCN-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C1]](s32) - ; GCN-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C2]](s32) - ; GCN-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C3]](s32) + ; GCN-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C3]](s32) + ; GCN-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C1]](s32) + ; GCN-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C2]](s32) + ; GCN-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C3]](s32) + ; GCN-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C1]](s32) + ; GCN-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C2]](s32) + ; GCN-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C3]](s32) ; GCN-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C4]] + ; GCN-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C4]] ; GCN-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C4]] ; GCN-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32) ; GCN-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] @@ -164,7 +156,7 @@ body: | ; GCN-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C2]](s32) ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; GCN-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GCN-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C4]] + ; GCN-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C4]] ; GCN-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] ; GCN-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C2]](s32) ; GCN-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] @@ -174,7 +166,7 @@ body: | ; GCN-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C2]](s32) ; GCN-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; GCN-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; GCN-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV6]], [[C4]] + ; GCN-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C4]] ; GCN-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]] ; GCN-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C2]](s32) ; GCN-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL4]] @@ -184,8 +176,8 @@ body: | ; GCN-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C2]](s32) ; GCN-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL5]] ; GCN-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; GCN-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) - ; GCN-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS1]](<12 x s16>) + ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[BITCAST]](<2 x s16>), [[BITCAST1]](<2 x s16>), [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GCN-NEXT: S_ENDPGM 0, implicit [[CONCAT_VECTORS]](<12 x s16>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<12 x s8>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 @@ -204,10 +196,9 @@ body: | ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[C]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF - ; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>) - ; GCN-NEXT: S_ENDPGM 0, implicit [[UV]](<3 x s32>) + ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) + ; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; GCN-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s32>) = G_INTRINSIC intrinsic(@llvm.amdgcn.s.buffer.load), %0, %1, 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir index a95c47eb578c..067e66444ac6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -1160,16 +1160,10 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 4) ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; CI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; CI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; CI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; CI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; VI-LABEL: name: test_load_constant_s224_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -1179,16 +1173,10 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 4) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; VI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; VI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; VI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; GFX9-LABEL: name: test_load_constant_s224_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 @@ -1198,16 +1186,10 @@ body: | ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p4) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 4) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(s224) = G_LOAD %0 :: (load (s224), align 4, addrspace 4) @@ -3364,12 +3346,12 @@ body: | ; CI-LABEL: name: test_load_constant_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -3395,12 +3377,12 @@ body: | ; VI-LABEL: name: test_load_constant_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -3426,12 +3408,12 @@ body: | ; GFX9-LABEL: name: test_load_constant_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p4) :: (load (<4 x s16>), addrspace 4) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -4935,24 +4917,27 @@ body: | ; CI-LABEL: name: test_load_constant_v3s64_align32 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_constant_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_constant_v3s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p4) :: (load (<4 x s64>), addrspace 4) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 4) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -4973,10 +4958,10 @@ body: | ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4) ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_constant_v3s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4) @@ -4984,10 +4969,10 @@ body: | ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4) ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_constant_v3s64_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p4) :: (load (<2 x s64>), align 8, addrspace 4) @@ -4995,10 +4980,10 @@ body: | ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (load (s64) from unknown-address + 16, addrspace 4) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 4) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -5119,10 +5104,10 @@ body: | ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; CI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_constant_v3s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) @@ -5230,10 +5215,10 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_constant_v3s64_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p4) :: (load (s8), addrspace 4) @@ -5341,10 +5326,10 @@ body: | ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; GFX9-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p4) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 4) %2:_(<4 x s64>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir index e08cfd5ffa09..7b047ddd27e3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -1010,16 +1010,10 @@ body: | ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4) ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; CI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; CI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; CI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; CI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; CI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; VI-LABEL: name: test_load_flat_s224_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -1029,16 +1023,10 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; VI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; VI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; VI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; GFX9-LABEL: name: test_load_flat_s224_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 @@ -1048,16 +1036,10 @@ body: | ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p0) :: (load (<3 x s32>) from unknown-address + 16, align 4) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(s224) = G_LOAD %0 :: (load (s224), align 4, addrspace 0) @@ -3510,12 +3492,12 @@ body: | ; CI-LABEL: name: test_load_flat_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -3541,12 +3523,12 @@ body: | ; VI-LABEL: name: test_load_flat_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -3572,12 +3554,12 @@ body: | ; GFX9-LABEL: name: test_load_flat_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p0) :: (load (<4 x s16>)) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -4944,10 +4926,10 @@ body: | ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_flat_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32) @@ -4955,10 +4937,10 @@ body: | ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_flat_v3s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 32) @@ -4966,10 +4948,10 @@ body: | ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16, align 16) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 0) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -4990,10 +4972,10 @@ body: | ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_flat_v3s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) @@ -5001,10 +4983,10 @@ body: | ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_flat_v3s64_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p0) :: (load (<2 x s64>), align 8) @@ -5012,10 +4994,10 @@ body: | ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from unknown-address + 16) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 0) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -5136,10 +5118,10 @@ body: | ; CI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; CI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; CI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_flat_v3s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) @@ -5247,10 +5229,10 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_flat_v3s64_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) @@ -5358,10 +5340,10 @@ body: | ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; GFX9-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; GFX9-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p0) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 0) %2:_(<4 x s64>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir index 70538d0097a7..acaf1dab33f8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -1203,8 +1203,9 @@ body: | ; SI-LABEL: name: test_load_global_s96_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[EXTRACT]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](s96) ; CI-HSA-LABEL: name: test_load_global_s96_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1775,16 +1776,10 @@ body: | ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32) from unknown-address + 24, addrspace 1) ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>) ; SI-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[UV4]](s32) - ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV]](s32), [[UV1]](s32) - ; SI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[LOAD2]](s32), [[DEF]](s32), [[DEF]](s32) - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(<7 x s32>), [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV6]](<7 x s32>) - ; SI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; SI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; CI-HSA-LABEL: name: test_load_global_s224_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1794,16 +1789,10 @@ body: | ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1) ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; CI-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; CI-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; CI-HSA-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; CI-HSA-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; CI-HSA-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; CI-MESA-LABEL: name: test_load_global_s224_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1813,16 +1802,10 @@ body: | ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1) ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; CI-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; CI-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; CI-MESA-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; CI-MESA-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; CI-MESA-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; VI-LABEL: name: test_load_global_s224_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1832,16 +1815,10 @@ body: | ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1) ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; VI-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; VI-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; VI-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; VI-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; GFX9-HSA-LABEL: name: test_load_global_s224_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1851,16 +1828,10 @@ body: | ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1) ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; GFX9-HSA-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; GFX9-HSA-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; GFX9-HSA-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) ; GFX9-MESA-LABEL: name: test_load_global_s224_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -1870,16 +1841,10 @@ body: | ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<3 x s32>) from unknown-address + 16, align 4, addrspace 1) ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s32>) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[UV5]](s32) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[DEF]](s32), [[DEF]](s32) - ; GFX9-MESA-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<21 x s32>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s32>), [[BUILD_VECTOR1]](<3 x s32>), [[BUILD_VECTOR2]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>), [[DEF1]](<3 x s32>) - ; GFX9-MESA-NEXT: [[UV7:%[0-9]+]]:_(<7 x s32>), [[UV8:%[0-9]+]]:_(<7 x s32>), [[UV9:%[0-9]+]]:_(<7 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<21 x s32>) - ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[UV7]](<7 x s32>) - ; GFX9-MESA-NEXT: [[DEF2:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF2]], [[BITCAST]](s224), 0 + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<7 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32) + ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s224) = G_BITCAST [[BUILD_VECTOR]](<7 x s32>) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(s256) = G_IMPLICIT_DEF + ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(s256) = G_INSERT [[DEF]], [[BITCAST]](s224), 0 ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](s256) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s224) = G_LOAD %0 :: (load (s224), align 4, addrspace 1) @@ -5837,12 +5802,12 @@ body: | ; SI-LABEL: name: test_load_global_v3s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -5868,12 +5833,12 @@ body: | ; CI-HSA-LABEL: name: test_load_global_v3s16_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -5899,12 +5864,12 @@ body: | ; CI-MESA-LABEL: name: test_load_global_v3s16_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -5930,12 +5895,12 @@ body: | ; VI-LABEL: name: test_load_global_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -5961,12 +5926,12 @@ body: | ; GFX9-HSA-LABEL: name: test_load_global_v3s16_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-HSA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -5979,12 +5944,12 @@ body: | ; GFX9-MESA-LABEL: name: test_load_global_v3s16_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p1) :: (load (<4 x s16>), addrspace 1) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-MESA-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -6996,7 +6961,6 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7004,6 +6968,7 @@ body: | ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -7029,7 +6994,6 @@ body: | ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7037,6 +7001,7 @@ body: | ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CI-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -7062,7 +7027,6 @@ body: | ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7070,6 +7034,7 @@ body: | ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CI-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -7095,7 +7060,6 @@ body: | ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7103,6 +7067,7 @@ body: | ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -7128,7 +7093,6 @@ body: | ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7136,6 +7100,7 @@ body: | ; GFX9-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-HSA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; GFX9-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32) @@ -7148,7 +7113,6 @@ body: | ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -7156,6 +7120,7 @@ body: | ; GFX9-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-MESA-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; GFX9-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32) @@ -7233,21 +7198,21 @@ body: | ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7271,21 +7236,21 @@ body: | ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7309,21 +7274,21 @@ body: | ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7446,21 +7411,21 @@ body: | ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7484,21 +7449,21 @@ body: | ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7522,21 +7487,21 @@ body: | ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7625,21 +7590,21 @@ body: | ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; SI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7663,21 +7628,21 @@ body: | ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7701,21 +7666,21 @@ body: | ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-MESA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; CI-MESA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7739,21 +7704,21 @@ body: | ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; VI-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7864,21 +7829,21 @@ body: | ; SI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C7]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C7]] - ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C6]] + ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; SI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL5]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C7]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C7]] - ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C6]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C6]] + ; SI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; SI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL6]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C7]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C6]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL7]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7902,21 +7867,21 @@ body: | ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C5]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C5]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C4]](s32) + ; CI-HSA-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C4]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C4]] + ; CI-HSA-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C5]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C5]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C4]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C4]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C5]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C5]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C4]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C4]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C4]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C5]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -7962,21 +7927,21 @@ body: | ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C7]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C7]] - ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C6]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C6]] + ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-MESA-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL5]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C7]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C7]] - ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C6]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C6]] + ; CI-MESA-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-MESA-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL6]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C7]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C6]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL7]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -8022,21 +7987,21 @@ body: | ; VI-NEXT: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C7]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C7]] - ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C6]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C6]] + ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; VI-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL5]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C7]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C7]] - ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C6]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C6]] + ; VI-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; VI-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL6]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C7]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C6]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL7]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -8132,8 +8097,9 @@ body: | ; SI-LABEL: name: test_load_global_v6s16_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[EXTRACT]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s16>) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BITCAST]](<6 x s16>) ; CI-HSA-LABEL: name: test_load_global_v6s16_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 @@ -8621,7 +8587,6 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -8631,6 +8596,7 @@ body: | ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -8662,7 +8628,6 @@ body: | ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -8672,6 +8637,7 @@ body: | ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-HSA-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; CI-HSA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -8703,7 +8669,6 @@ body: | ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -8713,6 +8678,7 @@ body: | ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-MESA-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; CI-MESA-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -8744,7 +8710,6 @@ body: | ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -8754,6 +8719,7 @@ body: | ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 @@ -8785,7 +8751,6 @@ body: | ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; GFX9-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-HSA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -8795,6 +8760,7 @@ body: | ; GFX9-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-HSA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX9-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; GFX9-HSA-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-HSA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32) @@ -8809,7 +8775,6 @@ body: | ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) ; GFX9-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s16>) = G_BITCAST [[LOAD]](<4 x s32>) - ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST]](<8 x s16>) ; GFX9-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-MESA-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -8819,6 +8784,7 @@ body: | ; GFX9-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-MESA-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX9-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; GFX9-MESA-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-MESA-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR]](s32) @@ -8871,26 +8837,26 @@ body: | ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -8921,26 +8887,26 @@ body: | ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -8971,26 +8937,26 @@ body: | ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9021,26 +8987,26 @@ body: | ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9153,26 +9119,26 @@ body: | ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9203,26 +9169,26 @@ body: | ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9253,26 +9219,26 @@ body: | ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9303,26 +9269,26 @@ body: | ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9435,26 +9401,26 @@ body: | ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; SI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; SI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9485,26 +9451,26 @@ body: | ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9535,26 +9501,26 @@ body: | ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-MESA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; CI-MESA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-MESA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; CI-MESA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-MESA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; CI-MESA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-MESA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-MESA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CI-MESA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9585,26 +9551,26 @@ body: | ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; VI-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; VI-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9747,26 +9713,26 @@ body: | ; SI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]] - ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C9]] - ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32) + ; SI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]] + ; SI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C9]](s32) ; SI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL7]] ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C9]] - ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C9]] - ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C8]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C8]] + ; SI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32) ; SI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]] ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C9]] - ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C9]] - ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C8]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C8]] + ; SI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s32) ; SI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]] ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C9]] - ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C9]] - ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C8]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]] + ; SI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32) ; SI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL10]] ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) ; SI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9797,26 +9763,26 @@ body: | ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-HSA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C7]] - ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C7]] - ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C6]](s32) + ; CI-HSA-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-HSA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LOAD]], [[C6]] + ; CI-HSA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LOAD1]], [[C6]] + ; CI-HSA-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-HSA-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C7]](s32) ; CI-HSA-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; CI-HSA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C7]] - ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C7]] - ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LOAD2]], [[C6]] + ; CI-HSA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LOAD3]], [[C6]] + ; CI-HSA-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32) ; CI-HSA-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] ; CI-HSA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C7]] - ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C7]] - ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LOAD4]], [[C6]] + ; CI-HSA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LOAD5]], [[C6]] + ; CI-HSA-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s32) ; CI-HSA-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] ; CI-HSA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C7]] - ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C7]] - ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CI-HSA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LOAD6]], [[C6]] + ; CI-HSA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C6]] + ; CI-HSA-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C7]](s32) ; CI-HSA-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] ; CI-HSA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) ; CI-HSA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9877,26 +9843,26 @@ body: | ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; CI-MESA-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]] - ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C9]] - ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32) + ; CI-MESA-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-MESA-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] + ; CI-MESA-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]] + ; CI-MESA-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-MESA-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C9]](s32) ; CI-MESA-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL7]] ; CI-MESA-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C9]] - ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C9]] - ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32) + ; CI-MESA-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C8]] + ; CI-MESA-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C8]] + ; CI-MESA-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32) ; CI-MESA-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]] ; CI-MESA-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C9]] - ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C9]] - ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s32) + ; CI-MESA-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C8]] + ; CI-MESA-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C8]] + ; CI-MESA-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s32) ; CI-MESA-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]] ; CI-MESA-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C9]] - ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C9]] - ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) + ; CI-MESA-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C8]] + ; CI-MESA-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]] + ; CI-MESA-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32) ; CI-MESA-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL10]] ; CI-MESA-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) ; CI-MESA-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -9957,26 +9923,26 @@ body: | ; VI-NEXT: [[DEF:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<8 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) - ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]] - ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C9]] - ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C8]](s32) + ; VI-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]] + ; VI-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C9]](s32) ; VI-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL7]] ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) - ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C9]] - ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C9]] - ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C8]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR2]], [[C8]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[OR3]], [[C8]] + ; VI-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C9]](s32) ; VI-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL8]] ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) - ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C9]] - ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C9]] - ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C8]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[OR4]], [[C8]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR5]], [[C8]] + ; VI-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C9]](s32) ; VI-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL9]] ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) - ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C9]] - ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C9]] - ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C8]](s32) + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[OR6]], [[C8]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C8]] + ; VI-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s32) ; VI-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL10]] ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) ; VI-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>) @@ -10504,8 +10470,9 @@ body: | ; SI-LABEL: name: test_load_global_v3s32_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[EXTRACT]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) ; CI-HSA-LABEL: name: test_load_global_v3s32_align16 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[COPY]](p1) :: (load (<3 x s32>), align 16, addrspace 1) @@ -11453,45 +11420,51 @@ body: | ; SI-LABEL: name: test_load_global_v3s64_align32 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-HSA-LABEL: name: test_load_global_v3s64_align32 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; CI-HSA-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-HSA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-MESA-LABEL: name: test_load_global_v3s64_align32 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; CI-MESA-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-MESA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_global_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align32 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; GFX9-HSA-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-HSA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align32 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[COPY]](p1) :: (load (<4 x s64>), addrspace 1) - ; GFX9-MESA-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[LOAD]](<4 x s64>), 0 + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<4 x s64>) ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s64>), 0 - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-MESA-NEXT: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV7]](s64) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 1) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -11512,10 +11485,10 @@ body: | ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-HSA-LABEL: name: test_load_global_v3s64_align8 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) @@ -11523,10 +11496,10 @@ body: | ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-MESA-LABEL: name: test_load_global_v3s64_align8 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) @@ -11534,10 +11507,10 @@ body: | ; CI-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-MESA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_global_v3s64_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) @@ -11545,10 +11518,10 @@ body: | ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align8 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) @@ -11556,10 +11529,10 @@ body: | ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align8 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 8, addrspace 1) @@ -11567,10 +11540,10 @@ body: | ; GFX9-MESA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-MESA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, addrspace 1) ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-MESA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 8, addrspace 1) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -11691,10 +11664,10 @@ body: | ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; SI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; SI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-HSA-LABEL: name: test_load_global_v3s64_align1 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1) @@ -11702,10 +11675,10 @@ body: | ; CI-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; CI-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, align 1, addrspace 1) ; CI-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-MESA-LABEL: name: test_load_global_v3s64_align1 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) @@ -11813,10 +11786,10 @@ body: | ; CI-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; CI-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; CI-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; CI-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; CI-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_global_v3s64_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) @@ -11924,10 +11897,10 @@ body: | ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; VI-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; VI-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-HSA-LABEL: name: test_load_global_v3s64_align1 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-HSA-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p1) :: (load (<2 x s64>), align 1, addrspace 1) @@ -11935,10 +11908,10 @@ body: | ; GFX9-HSA-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; GFX9-HSA-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p1) :: (load (s64) from unknown-address + 16, align 1, addrspace 1) ; GFX9-HSA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-HSA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-HSA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-HSA-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-HSA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-HSA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-MESA-LABEL: name: test_load_global_v3s64_align1 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-MESA-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1) @@ -12046,10 +12019,10 @@ body: | ; GFX9-MESA-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C5]](s32) ; GFX9-MESA-NEXT: [[SHL20:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT2]], [[COPY2]](s32) ; GFX9-MESA-NEXT: [[OR20:%[0-9]+]]:_(s64) = G_OR [[SHL20]], [[ZEXT2]] - ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64) ; GFX9-MESA-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-MESA-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-MESA-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-MESA-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[OR6]](s64), [[OR13]](s64), [[OR20]](s64), [[UV3]](s64) + ; GFX9-MESA-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 1, addrspace 1) %2:_(<4 x s64>) = G_IMPLICIT_DEF @@ -14947,17 +14920,18 @@ body: | ; SI-LABEL: name: test_global_v2s96_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), addrspace 1) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[EXTRACT]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD]](p1) :: (load (<2 x s32>) from unknown-address + 12, align 4, addrspace 1) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C1]](s64) ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load (s32) from unknown-address + 20, addrspace 1) - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD2]](s32) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR]](<3 x s32>) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<2 x s32>) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[LOAD2]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[BUILD_VECTOR1]](<3 x s32>) ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY [[BITCAST]](s96) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s96) = COPY [[BITCAST1]](s96) ; SI-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[COPY1]](s96) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir index 17f6b68307ea..46eabff12de2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir @@ -7733,12 +7733,12 @@ body: | ; SI-LABEL: name: test_load_local_v3s16_align8 ; SI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -7764,12 +7764,12 @@ body: | ; CI-LABEL: name: test_load_local_v3s16_align8 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -7795,12 +7795,12 @@ body: | ; CI-DS128-LABEL: name: test_load_local_v3s16_align8 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; CI-DS128-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CI-DS128-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-DS128-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CI-DS128-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; CI-DS128-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; CI-DS128-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; CI-DS128-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -7826,12 +7826,12 @@ body: | ; VI-LABEL: name: test_load_local_v3s16_align8 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -7857,12 +7857,12 @@ body: | ; GFX9-LABEL: name: test_load_local_v3s16_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -7875,12 +7875,12 @@ body: | ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s16_align8 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX9-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX9-UNALIGNED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX9-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -7893,12 +7893,12 @@ body: | ; GFX10-LABEL: name: test_load_local_v3s16_align8 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX10-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX10-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX10-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -7911,12 +7911,12 @@ body: | ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s16_align8 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[COPY]](p3) :: (load (<4 x s16>), addrspace 3) - ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) ; GFX10-UNALIGNED-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX10-UNALIGNED-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX10-UNALIGNED-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX10-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; GFX10-UNALIGNED-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; GFX10-UNALIGNED-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -11741,10 +11741,10 @@ body: | ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; SI-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[UV3]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-LABEL: name: test_load_local_v3s64_align32 ; CI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p3) :: (load (s64), align 32, addrspace 3) @@ -11754,10 +11754,10 @@ body: | ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32) ; CI-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LOAD]](s64), [[LOAD1]](s64), [[LOAD2]](s64), [[UV3]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-DS128-LABEL: name: test_load_local_v3s64_align32 ; CI-DS128: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; CI-DS128-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) @@ -11765,10 +11765,10 @@ body: | ; CI-DS128-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; CI-DS128-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; CI-DS128-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; CI-DS128-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-DS128-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-DS128-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-DS128-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; CI-DS128-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_local_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) @@ -11776,10 +11776,10 @@ body: | ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_local_v3s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) @@ -11787,10 +11787,10 @@ body: | ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-UNALIGNED-LABEL: name: test_load_local_v3s64_align32 ; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) @@ -11798,10 +11798,10 @@ body: | ; GFX9-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX9-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; GFX9-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX9-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-UNALIGNED-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX9-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX10-LABEL: name: test_load_local_v3s64_align32 ; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) @@ -11809,10 +11809,10 @@ body: | ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX10-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX10-UNALIGNED-LABEL: name: test_load_local_v3s64_align32 ; GFX10-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX10-UNALIGNED-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[COPY]](p3) :: (load (<2 x s64>), align 32, addrspace 3) @@ -11820,10 +11820,10 @@ body: | ; GFX10-UNALIGNED-NEXT: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32) ; GFX10-UNALIGNED-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p3) :: (load (s64) from unknown-address + 16, align 16, addrspace 3) ; GFX10-UNALIGNED-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>) - ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64) ; GFX10-UNALIGNED-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX10-UNALIGNED-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX10-UNALIGNED-NEXT: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX10-UNALIGNED-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[LOAD1]](s64), [[UV5]](s64) + ; GFX10-UNALIGNED-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p3) = COPY $vgpr0 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 3) %2:_(<4 x s64>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir index c13629c8dc03..cca3ae38e94e 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-private.mir @@ -7496,10 +7496,10 @@ body: | ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; SI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; SI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; CI-LABEL: name: test_load_private_v3s64_align32 ; CI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; CI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) @@ -7519,10 +7519,10 @@ body: | ; CI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; CI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; CI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; CI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; CI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_load_private_v3s64_align32 ; VI: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) @@ -7542,10 +7542,10 @@ body: | ; VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; VI-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; VI-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_load_private_v3s64_align32 ; GFX9: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0 ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (load (s32), align 32, addrspace 5) @@ -7565,10 +7565,10 @@ body: | ; GFX9-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[PTR_ADD3]], [[C]](s32) ; GFX9-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD4]](p5) :: (load (s32) from unknown-address + 20, addrspace 5) ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[UV3]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(p5) = COPY $vgpr0 %1:_(<3 x s64>) = G_LOAD %0 :: (load (<3 x s64>), align 32, addrspace 5) %2:_(<4 x s64>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir index f61de3cc898c..cf4144166b51 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_lshr_s32_s32 @@ -414,43 +414,40 @@ body: | ; SI-LABEL: name: test_lshr_v3s64_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV3]](s32) - ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV4]](s32) - ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV5]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV4]](s32) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV5]](s32) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV6]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_lshr_v3s64_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV3]](s32) - ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV4]](s32) - ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV5]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV4]](s32) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV5]](s32) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV6]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_lshr_v3s64_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV3]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV4]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[UV4]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[UV5]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[UV6]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64), [[LSHR2]](s64), [[UV10]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<3 x s64>) = G_EXTRACT %0, 0 %2:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 @@ -593,17 +590,12 @@ body: | ; SI-LABEL: name: test_lshr_v3s16_v3s16 ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; SI-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) @@ -617,29 +609,26 @@ body: | ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[AND4]](s32) + ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; SI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; SI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] - ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] - ; SI-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; SI-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL1]] - ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF1]](<2 x s16>) - ; SI-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; SI-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV4]](<3 x s16>), 0 - ; SI-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; SI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; VI-LABEL: name: test_lshr_v3s16_v3s16 ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 @@ -647,8 +636,7 @@ body: | ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; VI-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -658,49 +646,52 @@ body: | ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC3]](s16) ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[TRUNC4]](s16) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[TRUNC5]](s16) + ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; VI-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; VI-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR2]](s16) ; VI-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR3]](s16) ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; VI-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[LSHR4]](s16) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF1]](<2 x s16>) - ; VI-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; VI-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV4]](<3 x s16>), 0 - ; VI-NEXT: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>) + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) + ; VI-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: test_lshr_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT1]](<4 x s16>), 32 - ; GFX9-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT4:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 - ; GFX9-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT5:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[EXTRACT2]], [[EXTRACT4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[EXTRACT3]], [[EXTRACT5]](s16) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR2]](s32) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[DEF1]](s32) - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF2]](<2 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-NEXT: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[INSERT4]](<4 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR4]](s32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[LSHR5]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 %2:_(<3 x s16>) = G_EXTRACT %0, 0 @@ -807,32 +798,33 @@ body: | ; GFX9-LABEL: name: test_ashr_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT1]](<4 x s16>), 32 - ; GFX9-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 - ; GFX9-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[EXTRACT]], [[EXTRACT2]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[EXTRACT1]], [[EXTRACT3]](s16) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR2]](s32) - ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST2]](s32) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR4]](s32) + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR3]](s16) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR5]](s32), [[BITCAST6]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir index b376bff0ca95..60cdfdd685b1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-or.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -allow-ginsert-as-artifact=0 -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: test_or_s32 @@ -305,18 +305,14 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[OR]](<2 x s32>), [[OR1]](<2 x s32>), [[DEF1]](<2 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<3 x s32>), [[UV7:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV6]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV2]], [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[UV7]](s32), [[OR1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_OR %0, %1 @@ -353,23 +349,20 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32) - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR3]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR4]] - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR2]], [[BUILD_VECTOR5]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[OR]](<2 x s32>), [[OR1]](<2 x s32>), [[OR2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>) - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<5 x s32>), [[UV11:%[0-9]+]]:_(<5 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s32>) - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[UV10]](<5 x s32>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV4]], [[UV9]] + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR]](<2 x s32>) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[OR1]](<2 x s32>) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](<8 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[OR2]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x s32>) %0:_(<5 x s32>) = G_IMPLICIT_DEF %1:_(<5 x s32>) = G_IMPLICIT_DEF %2:_(<5 x s32>) = G_OR %0, %1 @@ -424,40 +417,65 @@ body: | ; CHECK-LABEL: name: test_or_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT]], [[INSERT1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR4]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C1]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -493,54 +511,102 @@ body: | ; CHECK-LABEL: name: test_or_v5s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0 - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) - ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV14]](<3 x s16>), 0 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT2]], [[INSERT3]] - ; CHECK-NEXT: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV13]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV15]](<3 x s16>), 0 - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<4 x s16>) = G_OR [[INSERT4]], [[INSERT5]] - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>) - ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>), [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<30 x s16>) - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV20]](<5 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR4]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL7]] + ; CHECK-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(<4 x s16>) = G_OR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[OR9]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C]](s32) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST17]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST18:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST18]], [[C]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL8]] + ; CHECK-NEXT: [[BITCAST19:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] + ; CHECK-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C]](s32) + ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL10]] + ; CHECK-NEXT: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR12]](s32) + ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[BITCAST18]], [[C1]] + ; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32) + ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL11]] + ; CHECK-NEXT: [[BITCAST22:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR13]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST19]](<2 x s16>), [[BITCAST20]](<2 x s16>), [[BITCAST21]](<2 x s16>), [[BITCAST22]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF %2:_(<5 x s16>) = G_OR %0, %1 @@ -555,34 +621,15 @@ body: | bb.0: ; CHECK-LABEL: name: test_or_v3s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF1]](<3 x s8>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[OR]](s32) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ANYEXT2]], [[ANYEXT3]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[OR1]](s32) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ANYEXT4]], [[ANYEXT5]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[OR2]](s32) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[ANYEXT6]], [[ANYEXT7]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[OR3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF2]](<4 x s8>), [[DEF2]](<4 x s8>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[UV]], [[UV4]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[UV1]], [[UV5]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[UV2]], [[UV6]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[OR]](s32), [[OR1]](s32), [[OR2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s8>) = G_IMPLICIT_DEF %2:_(<3 x s8>) = G_OR %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir index 7b867c833e1f..5be7981d5c72 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -127,77 +127,77 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 - ; CHECK-NEXT: G_BR %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: successors: %bb.2(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR1]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST3]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[DEF2]](<2 x s16>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; CHECK-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[INSERT]](<4 x s16>), %bb.0, [[INSERT3]](<4 x s16>), %bb.1 - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[PHI]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[BITCAST1]], [[BITCAST1]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C3]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ADD1]], [[C3]] ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C4]](s32) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C4]](s32) - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C5]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C5]] ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C4]](s32) ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C5]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C5]] - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C4]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ADD2]], [[C3]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C5]], [[C4]](s32) ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] - ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C5]] - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C5]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C4]](s32) - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]] - ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<4 x s16>) = G_PHI [[CONCAT_VECTORS]](<4 x s16>), %bb.0, [[CONCAT_VECTORS1]](<4 x s16>), %bb.1 + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[PHI]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C6]](s32) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C6]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C7]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C7]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C6]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C7]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C7]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C6]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C7]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C7]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C6]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST10]](<2 x s16>), [[BITCAST11]](<2 x s16>), [[BITCAST12]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 @@ -252,10 +252,9 @@ body: | ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C1]](s32) ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[BITCAST]], [[BITCAST2]] ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR]], [[LSHR2]] @@ -935,38 +934,39 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[UV]](s64), [[UV1]](s64), [[UV2]](s64) ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.1 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) - ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV6]], [[UV8]] - ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV7]], [[UV9]], [[UADDO1]] + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV4]](s64) - ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV10]], [[UV12]] - ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV11]], [[UV13]], [[UADDO3]] + ; CHECK-NEXT: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; CHECK-NEXT: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] ; CHECK-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV5]](s64) - ; CHECK-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV14]], [[UV16]] - ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV15]], [[UV17]], [[UADDO5]] + ; CHECK-NEXT: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[UV12]], [[UV14]] + ; CHECK-NEXT: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[UV13]], [[UV15]], [[UADDO5]] ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO4]](s32), [[UADDE4]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<3 x s64>) = G_PHI [[EXTRACT]](<3 x s64>), %bb.0, [[BUILD_VECTOR]](<3 x s64>), %bb.1 + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(<3 x s64>) = G_PHI [[BUILD_VECTOR]](<3 x s64>), %bb.0, [[BUILD_VECTOR1]](<3 x s64>), %bb.1 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[PHI]](<3 x s64>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s64), [[UV17:%[0-9]+]]:_(s64), [[UV18:%[0-9]+]]:_(s64), [[UV19:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(s64), [[UV21:%[0-9]+]]:_(s64), [[UV22:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[PHI]](<3 x s64>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[UV20]](s64), [[UV21]](s64), [[UV22]](s64), [[UV19]](s64) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR2]](<4 x s64>) ; CHECK-NEXT: S_SETPC_B64 undef $sgpr30_sgpr31 bb.0: successors: %bb.1, %bb.2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir index 7b0547e8b03b..e39849ad6dd7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir @@ -424,11 +424,9 @@ body: | ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 @@ -443,7 +441,7 @@ body: | ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[SMIN1]] ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[ADD]], [[C]](s32) ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[SMAX2]] ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] @@ -463,27 +461,27 @@ body: | ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[SMIN5]] ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[ADD2]], [[C]](s32) ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: saddsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -495,13 +493,11 @@ body: | ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 @@ -528,27 +524,27 @@ body: | ; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB4]] ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[SMIN5]] ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: saddsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -557,29 +553,27 @@ body: | ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) ; GFX9-NEXT: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] ; GFX9-NEXT: [[SADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT1]](<2 x s16>) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[SADDSAT1]](<2 x s16>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST6]](s32) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir index cac4b9b9864e..fa1488e87ffa 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-select.mir @@ -260,35 +260,23 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr4_vgpr5_vgpr6 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY2]](<3 x s32>) ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[TRUNC]](<3 x s8>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF]], [[TRUNC1]](<3 x s8>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV4]](s8) - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT]](s16) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[UV5]](s8) - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[ANYEXT2]], [[ANYEXT3]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT1]](s16) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[UV2]](s8) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[UV6]](s8) - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[ANYEXT4]], [[ANYEXT5]] - ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT2]](s16) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s16) = G_ANYEXT [[UV7]](s8) - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[ANYEXT6]], [[ANYEXT7]] - ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT3]](s16) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC2]], [[TRUNC3]] + ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) + ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP]](s1), [[TRUNC4]], [[TRUNC5]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_CONSTANT i32 0 %2:_(<3 x s32>) = COPY $vgpr1_vgpr2_vgpr3 @@ -384,40 +372,65 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr6 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY2]](s32), [[C]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<4 x s16>) = G_SELECT [[ICMP]](s1), [[INSERT]], [[INSERT1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[SELECT]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C1]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C2]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C2]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C2]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C1]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C1]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C1]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C1]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<4 x s16>) = G_SELECT [[ICMP]](s1), [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[SELECT]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C1]](s32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C2]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C2]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C1]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C2]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C2]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C1]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C2]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C2]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C1]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(s32) = COPY $vgpr6 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir index 46a4616d1933..5529b114ac7b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext-inreg.mir @@ -705,10 +705,10 @@ body: | ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]](<2 x s16>) ; GFX9-NEXT: [[ASHR1:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[SHL1]], [[BUILD_VECTOR_TRUNC3]](<2 x s16>) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>) ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir index 09064e3dec7d..088d586cae03 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir @@ -109,10 +109,7 @@ body: | ; CHECK-LABEL: name: test_sext_v3s16_to_v3s32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir index b406ed427ab0..a1a14756cf6f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_shl_s32_s32 @@ -405,43 +405,40 @@ body: | ; SI-LABEL: name: test_shl_v3s64_v3s32 ; SI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV3]](s32) - ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV4]](s32) - ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV5]](s32) - ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64) + ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV4]](s32) + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV5]](s32) + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV6]](s32) ; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64) + ; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; VI-LABEL: name: test_shl_v3s64_v3s32 ; VI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV3]](s32) - ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV4]](s32) - ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV5]](s32) - ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64) + ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV4]](s32) + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV5]](s32) + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV6]](s32) ; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64) + ; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) ; GFX9-LABEL: name: test_shl_v3s64_v3s32 ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>) ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV3]](s32) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV4]](s32) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV5]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UV4]](s32) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UV5]](s32) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[UV6]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0 - ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>) + ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[SHL]](s64), [[SHL1]](s64), [[SHL2]](s64), [[UV10]](s64) + ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>) %0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(<3 x s64>) = G_EXTRACT %0, 0 %2:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10 @@ -664,32 +661,33 @@ body: | ; GFX9-LABEL: name: test_shl_v3s16_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT1]](<4 x s16>), 32 - ; GFX9-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0 - ; GFX9-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[EXTRACT]], [[EXTRACT2]](<2 x s16>) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[EXTRACT1]], [[EXTRACT3]](s16) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SHL]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SHL]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST6]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir index 8dc47c4e362d..8bbde77b4269 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir @@ -35,9 +35,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[EXTRACT1]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -59,9 +61,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 32 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[EXTRACT1]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV1]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -83,9 +87,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[EXTRACT1]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -131,8 +137,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[EXTRACT]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[COPY2]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -154,9 +161,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<2 x s32>), 0 + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[DEF]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[DEF]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -178,10 +186,13 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64 - ; CHECK-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 32 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[EXTRACT1]](s32), [[EXTRACT2]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV5]](s32) + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV7]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 @@ -203,9 +214,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 64 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<3 x s32>), 32 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT]](s32), [[EXTRACT1]](s32) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 @@ -293,50 +306,29 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; CHECK-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; CHECK-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 64 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR1]](s32), [[BITCAST3]](s32) - ; CHECK-NEXT: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 32 - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32) - ; CHECK-NEXT: [[EXTRACT4:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR2]](<3 x s32>), 0 - ; CHECK-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT3]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST6]](s32), [[LSHR3]](s32), [[BITCAST7]](s32) - ; CHECK-NEXT: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR3]](<3 x s32>), 0 + ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[EXTRACT2]], [[C1]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT3]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[EXTRACT4]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[EXTRACT5]], [[C1]] + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 %1:_(<4 x s16>) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir index e5a21d494c4c..faff7290f6d5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir @@ -862,66 +862,48 @@ body: | ; GFX8-NEXT: {{ $}} ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX8-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; GFX8-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX8-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; GFX8-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 64 - ; GFX8-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX8-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX8-NEXT: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 32 - ; GFX8-NEXT: [[EXTRACT4:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 0 - ; GFX8-NEXT: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 0 + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[EXTRACT2]], [[C1]] - ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[EXTRACT3]], [[C1]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] + ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[EXTRACT4]], [[C1]] - ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[EXTRACT5]], [[C1]] + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX8-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST3]](<2 x s16>), [[BITCAST4]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) ; GFX9-LABEL: name: shufflevector_v4s16_v3s16_2_0 ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY1]](<4 x s16>), 0 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 64 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST2]](s32), [[LSHR1]](s32), [[BITCAST3]](s32) - ; GFX9-NEXT: [[EXTRACT3:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 32 - ; GFX9-NEXT: [[EXTRACT4:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<3 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR1]](<3 x s32>), 0 - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[EXTRACT2]](s32), [[EXTRACT3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[EXTRACT4]](s32), [[EXTRACT5]](s32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY4]](s32), [[COPY5]](s32) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -948,12 +930,14 @@ body: | ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<2 x s32>), 0 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C1]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C1]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST1]](<2 x s32>), 0 - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C2]] ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C2]] @@ -968,12 +952,14 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<2 x s32>), 0 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C1]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C1]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST1]](<2 x s32>), 0 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 @@ -997,12 +983,14 @@ body: | ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<2 x s32>), 0 + ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX8-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST1]](<2 x s32>), 32 - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) + ; GFX8-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] @@ -1016,12 +1004,14 @@ body: | ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<2 x s32>), 0 + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]](<4 x s16>) - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST1]](<2 x s32>), 32 - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[EXTRACT1]], [[C]](s32) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s32>) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[C]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR]](s32), [[LSHR1]](s32) ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<4 x s16>) = COPY $vgpr0_vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir index f336e7b5aa8f..ce0afe0b50cb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smax.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_smax_s32 @@ -327,12 +327,12 @@ body: | ; SI-LABEL: name: test_smax_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -350,7 +350,6 @@ body: | ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_smax_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) @@ -359,6 +358,7 @@ body: | ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) @@ -376,23 +376,27 @@ body: | ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_smax_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV8]], [[UV10]] - ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[UV9]], [[UV11]] - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[SMAX:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[SMAX1:%[0-9]+]]:_(<2 x s16>) = G_SMAX [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[SMAX1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32) ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir index 60ae4743796c..08df7e169734 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-smin.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_smin_s32 @@ -327,12 +327,12 @@ body: | ; SI-LABEL: name: test_smin_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -350,7 +350,6 @@ body: | ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_smin_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) @@ -359,6 +358,7 @@ body: | ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) @@ -376,23 +376,27 @@ body: | ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_smin_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV8]], [[UV10]] - ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[UV9]], [[UV11]] - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[SMIN:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[SMIN1:%[0-9]+]]:_(<2 x s16>) = G_SMIN [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[SMIN1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32) ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir index 21a553538466..ddfa0e0669c3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sshlsat.mir @@ -479,11 +479,9 @@ body: | ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) @@ -497,7 +495,7 @@ body: | ; GFX6-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[ASHR]] ; GFX6-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SELECT]], [[SHL1]] ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SELECT1]], [[C]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[AND1]](s32) @@ -516,26 +514,26 @@ body: | ; GFX6-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL5]] ; GFX6-NEXT: [[ASHR5:%[0-9]+]]:_(s32) = G_ASHR [[SELECT5]], [[C]](s32) ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C1]] ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ASHR3]], [[C1]] ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ASHR5]], [[C1]] - ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX6-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; GFX6-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: sshlsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -547,13 +545,11 @@ body: | ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC3]](s16) @@ -577,27 +573,27 @@ body: | ; GFX8-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] ; GFX8-NEXT: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT3]](s16) ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT5]](s16) ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] - ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: sshlsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -609,13 +605,11 @@ body: | ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) ; GFX9-NEXT: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL]], [[TRUNC3]](s16) @@ -639,16 +633,16 @@ body: | ; GFX9-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[ASHR2]] ; GFX9-NEXT: [[SELECT5:%[0-9]+]]:_(s16) = G_SELECT [[ICMP5]](s1), [[SELECT4]], [[SHL2]] ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST4]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST4]](s32) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir index e2516e5d79f7..d6ae62a28a93 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir @@ -424,11 +424,9 @@ body: | ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 @@ -443,7 +441,7 @@ body: | ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[SMIN1]] ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SUB2]], [[C]](s32) ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) ; GFX6-NEXT: [[SMAX2:%[0-9]+]]:_(s32) = G_SMAX [[SHL2]], [[C3]] ; GFX6-NEXT: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SMAX2]], [[C1]] ; GFX6-NEXT: [[SMIN2:%[0-9]+]]:_(s32) = G_SMIN [[SHL2]], [[C3]] @@ -463,27 +461,27 @@ body: | ; GFX6-NEXT: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[SMIN5]] ; GFX6-NEXT: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SUB8]], [[C]](s32) ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX6-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ASHR]], [[C4]] ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ASHR1]], [[C4]] ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ASHR2]], [[C4]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: ssubsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -495,13 +493,11 @@ body: | ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 32767 ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 -32768 @@ -528,27 +524,27 @@ body: | ; GFX8-NEXT: [[SMIN5:%[0-9]+]]:_(s16) = G_SMIN [[SMAX5]], [[SUB7]] ; GFX8-NEXT: [[SUB8:%[0-9]+]]:_(s16) = G_SUB [[TRUNC2]], [[SMIN5]] ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SUB2]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SUB5]](s16) ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SUB8]](s16) ; GFX8-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C4]] ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C4]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C4]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C4]] ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: ssubsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -557,29 +553,27 @@ body: | ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) ; GFX9-NEXT: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] ; GFX9-NEXT: [[SSUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT1]](<2 x s16>) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[SSUBSAT1]](<2 x s16>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST6]](s32) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir index c64f0951965d..d4f19a54f59c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir @@ -1,16 +1,9 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -pass-remarks-missed='gisel.*' %s 2> %t.err -o - | FileCheck -check-prefix=SI %s -# RUN: FileCheck -check-prefix=ERR %s < %t.err -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=CI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -pass-remarks-missed='gisel.*' 2> %t.err %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: FileCheck -check-prefix=ERR %s < %t.err -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s - - -# ERR-NOT: remark -# ERR: remark: :0:0: unable to legalize instruction: %{{[0-9]+}}:_(<132 x s16>) = G_CONCAT_VECTORS {{.*}} (in function: test_store_global_v11s16_align4) -# ERR-NOT: remark +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=CI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_store_global_s1_align1 @@ -2388,9 +2381,7 @@ body: | ; SI-LABEL: name: test_store_global_v3s32_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -2429,7 +2420,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -2523,9 +2514,7 @@ body: | ; SI-LABEL: name: test_store_global_v3s32_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -2542,7 +2531,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) @@ -2594,12 +2583,12 @@ body: | ; SI-LABEL: name: test_store_global_v3s32_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) ; CI-LABEL: name: test_store_global_v3s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -2626,12 +2615,12 @@ body: | ; SI-LABEL: name: test_store_global_v3s32_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v3s32_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -2658,12 +2647,12 @@ body: | ; SI-LABEL: name: test_store_global_v3s32_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v3s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -4091,9 +4080,7 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -4132,7 +4119,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR5]](s32), [[PTR_ADD6]](p1) :: (store (s8) into unknown-address + 7, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -4230,9 +4217,7 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<2 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -4249,7 +4234,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR1]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 6, addrspace 1) ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV2]](s32) ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) ; SI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD3]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY4]](s32), [[PTR_ADD3]](p1) :: (store (s16) into unknown-address + 8, addrspace 1) @@ -4305,12 +4290,12 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) ; CI-LABEL: name: test_store_global_s96_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 @@ -4341,12 +4326,12 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_s96_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 @@ -4377,12 +4362,12 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY1]](s96) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 16, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_s96_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr2_vgpr3_vgpr4 @@ -4763,9 +4748,7 @@ body: | ; SI-LABEL: name: test_store_global_v5s32_align1 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -4838,7 +4821,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -4856,18 +4839,16 @@ body: | ; CI-LABEL: name: test_store_global_v5s32_align1 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) ; VI-LABEL: name: test_store_global_v5s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -4940,7 +4921,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) @@ -4958,12 +4939,12 @@ body: | ; GFX9-LABEL: name: test_store_global_v5s32_align1 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x s32>), align 1, addrspace 1) @@ -4978,9 +4959,7 @@ body: | ; SI-LABEL: name: test_store_global_v5s32_align2 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -5011,7 +4990,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) @@ -5019,18 +4998,16 @@ body: | ; CI-LABEL: name: test_store_global_v5s32_align2 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v5s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -5061,7 +5038,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) @@ -5069,12 +5046,12 @@ body: | ; GFX9-LABEL: name: test_store_global_v5s32_align2 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x s32>), align 2, addrspace 1) @@ -5089,39 +5066,39 @@ body: | ; SI-LABEL: name: test_store_global_v5s32_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; CI-LABEL: name: test_store_global_v5s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; VI-LABEL: name: test_store_global_v5s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x s32>), align 4, addrspace 1) @@ -5136,39 +5113,39 @@ body: | ; SI-LABEL: name: test_store_global_v5s32_align8 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v5s32_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; VI-LABEL: name: test_store_global_v5s32_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5s32_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x s32>), align 8, addrspace 1) @@ -5183,39 +5160,39 @@ body: | ; SI-LABEL: name: test_store_global_v5s32_align16 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v5s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v5s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[COPY1]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x s32>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x s32>), align 16, addrspace 1) @@ -5230,9 +5207,7 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -5305,7 +5280,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -5324,19 +5299,17 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) ; VI-LABEL: name: test_store_global_v5p3_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -5409,7 +5382,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) @@ -5428,12 +5401,12 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x p3>), align 1, addrspace 1) @@ -5449,9 +5422,7 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -5482,7 +5453,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) @@ -5491,19 +5462,17 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v5p3_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -5534,7 +5503,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) @@ -5543,12 +5512,12 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x p3>), align 2, addrspace 1) @@ -5564,42 +5533,42 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; CI-LABEL: name: test_store_global_v5p3_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; VI-LABEL: name: test_store_global_v5p3_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5p3_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x p3>), align 4, addrspace 1) @@ -5615,42 +5584,42 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v5p3_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; VI-LABEL: name: test_store_global_v5p3_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5p3_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x p3>), align 8, addrspace 1) @@ -5666,42 +5635,42 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v5p3_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v5p3_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v5p3_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](<5 x p3>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<5 x p3>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (<5 x p3>), align 16, addrspace 1) @@ -5717,42 +5686,42 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v10s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v10s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v10s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<10 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[DEF]](<10 x s16>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<10 x s16>) = G_IMPLICIT_DEF G_STORE %1, %0 :: (store (<10 x s16>), align 16, addrspace 1) @@ -5767,159 +5736,182 @@ body: | ; SI-LABEL: name: test_store_global_v11s16_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>) - ; SI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>) - ; SI-NEXT: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>) - ; SI-NEXT: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0 - ; SI-NEXT: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128 - ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>) - ; SI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; SI-NEXT: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) - ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>) - ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>) - ; SI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) - ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64) - ; SI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) - ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; SI-NEXT: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; SI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; SI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; SI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; SI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; SI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; SI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; SI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; SI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; SI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; SI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) + ; SI-NEXT: G_STORE [[BITCAST10]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; SI-NEXT: G_STORE [[BITCAST4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) + ; SI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; SI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; SI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) + ; SI-NEXT: G_STORE [[BITCAST5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) ; CI-LABEL: name: test_store_global_v11s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>) - ; CI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>) - ; CI-NEXT: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>) - ; CI-NEXT: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0 - ; CI-NEXT: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128 - ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>) - ; CI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CI-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; CI-NEXT: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) - ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>) - ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>) - ; CI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) - ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64) - ; CI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) - ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; CI-NEXT: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; CI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; CI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; CI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; CI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; CI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; CI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; CI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; CI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; CI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; CI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; CI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; CI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; CI-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) + ; CI-NEXT: G_STORE [[BITCAST10]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CI-NEXT: G_STORE [[BITCAST4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) + ; CI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; CI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) + ; CI-NEXT: G_STORE [[BITCAST5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) ; VI-LABEL: name: test_store_global_v11s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>) - ; VI-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>) - ; VI-NEXT: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>) - ; VI-NEXT: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0 - ; VI-NEXT: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128 - ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>) - ; VI-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; VI-NEXT: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) - ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>) - ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>) - ; VI-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) - ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64) - ; VI-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) - ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; VI-NEXT: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; VI-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; VI-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; VI-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 + ; VI-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] + ; VI-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] + ; VI-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; VI-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] + ; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; VI-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] + ; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; VI-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; VI-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] + ; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL3]] + ; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) + ; VI-NEXT: G_STORE [[BITCAST10]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; VI-NEXT: G_STORE [[BITCAST4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) + ; VI-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C4]](s64) + ; VI-NEXT: G_STORE [[BITCAST5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) ; GFX9-LABEL: name: test_store_global_v11s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<12 x s16>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>), [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>), [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>), [[UV23:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV24:%[0-9]+]]:_(<2 x s16>), [[UV25:%[0-9]+]]:_(<2 x s16>), [[UV26:%[0-9]+]]:_(<2 x s16>), [[UV27:%[0-9]+]]:_(<2 x s16>), [[UV28:%[0-9]+]]:_(<2 x s16>), [[UV29:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV30:%[0-9]+]]:_(<2 x s16>), [[UV31:%[0-9]+]]:_(<2 x s16>), [[UV32:%[0-9]+]]:_(<2 x s16>), [[UV33:%[0-9]+]]:_(<2 x s16>), [[UV34:%[0-9]+]]:_(<2 x s16>), [[UV35:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV36:%[0-9]+]]:_(<2 x s16>), [[UV37:%[0-9]+]]:_(<2 x s16>), [[UV38:%[0-9]+]]:_(<2 x s16>), [[UV39:%[0-9]+]]:_(<2 x s16>), [[UV40:%[0-9]+]]:_(<2 x s16>), [[UV41:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV42:%[0-9]+]]:_(<2 x s16>), [[UV43:%[0-9]+]]:_(<2 x s16>), [[UV44:%[0-9]+]]:_(<2 x s16>), [[UV45:%[0-9]+]]:_(<2 x s16>), [[UV46:%[0-9]+]]:_(<2 x s16>), [[UV47:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV48:%[0-9]+]]:_(<2 x s16>), [[UV49:%[0-9]+]]:_(<2 x s16>), [[UV50:%[0-9]+]]:_(<2 x s16>), [[UV51:%[0-9]+]]:_(<2 x s16>), [[UV52:%[0-9]+]]:_(<2 x s16>), [[UV53:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV54:%[0-9]+]]:_(<2 x s16>), [[UV55:%[0-9]+]]:_(<2 x s16>), [[UV56:%[0-9]+]]:_(<2 x s16>), [[UV57:%[0-9]+]]:_(<2 x s16>), [[UV58:%[0-9]+]]:_(<2 x s16>), [[UV59:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[UV60:%[0-9]+]]:_(<2 x s16>), [[UV61:%[0-9]+]]:_(<2 x s16>), [[UV62:%[0-9]+]]:_(<2 x s16>), [[UV63:%[0-9]+]]:_(<2 x s16>), [[UV64:%[0-9]+]]:_(<2 x s16>), [[UV65:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<12 x s16>) - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<132 x s16>) = G_CONCAT_VECTORS [[UV]](<2 x s16>), [[UV1]](<2 x s16>), [[UV2]](<2 x s16>), [[UV3]](<2 x s16>), [[UV4]](<2 x s16>), [[UV5]](<2 x s16>), [[UV6]](<2 x s16>), [[UV7]](<2 x s16>), [[UV8]](<2 x s16>), [[UV9]](<2 x s16>), [[UV10]](<2 x s16>), [[UV11]](<2 x s16>), [[UV12]](<2 x s16>), [[UV13]](<2 x s16>), [[UV14]](<2 x s16>), [[UV15]](<2 x s16>), [[UV16]](<2 x s16>), [[UV17]](<2 x s16>), [[UV18]](<2 x s16>), [[UV19]](<2 x s16>), [[UV20]](<2 x s16>), [[UV21]](<2 x s16>), [[UV22]](<2 x s16>), [[UV23]](<2 x s16>), [[UV24]](<2 x s16>), [[UV25]](<2 x s16>), [[UV26]](<2 x s16>), [[UV27]](<2 x s16>), [[UV28]](<2 x s16>), [[UV29]](<2 x s16>), [[UV30]](<2 x s16>), [[UV31]](<2 x s16>), [[UV32]](<2 x s16>), [[UV33]](<2 x s16>), [[UV34]](<2 x s16>), [[UV35]](<2 x s16>), [[UV36]](<2 x s16>), [[UV37]](<2 x s16>), [[UV38]](<2 x s16>), [[UV39]](<2 x s16>), [[UV40]](<2 x s16>), [[UV41]](<2 x s16>), [[UV42]](<2 x s16>), [[UV43]](<2 x s16>), [[UV44]](<2 x s16>), [[UV45]](<2 x s16>), [[UV46]](<2 x s16>), [[UV47]](<2 x s16>), [[UV48]](<2 x s16>), [[UV49]](<2 x s16>), [[UV50]](<2 x s16>), [[UV51]](<2 x s16>), [[UV52]](<2 x s16>), [[UV53]](<2 x s16>), [[UV54]](<2 x s16>), [[UV55]](<2 x s16>), [[UV56]](<2 x s16>), [[UV57]](<2 x s16>), [[UV58]](<2 x s16>), [[UV59]](<2 x s16>), [[UV60]](<2 x s16>), [[UV61]](<2 x s16>), [[UV62]](<2 x s16>), [[UV63]](<2 x s16>), [[UV64]](<2 x s16>), [[UV65]](<2 x s16>) - ; GFX9-NEXT: [[UV66:%[0-9]+]]:_(<11 x s16>), [[UV67:%[0-9]+]]:_(<11 x s16>), [[UV68:%[0-9]+]]:_(<11 x s16>), [[UV69:%[0-9]+]]:_(<11 x s16>), [[UV70:%[0-9]+]]:_(<11 x s16>), [[UV71:%[0-9]+]]:_(<11 x s16>), [[UV72:%[0-9]+]]:_(<11 x s16>), [[UV73:%[0-9]+]]:_(<11 x s16>), [[UV74:%[0-9]+]]:_(<11 x s16>), [[UV75:%[0-9]+]]:_(<11 x s16>), [[UV76:%[0-9]+]]:_(<11 x s16>), [[UV77:%[0-9]+]]:_(<11 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<132 x s16>) - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<8 x s16>) = G_EXTRACT [[INSERT]](<12 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<12 x s16>) = G_INSERT [[DEF1]], [[UV66]](<11 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[INSERT1]](<12 x s16>), 128 - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[EXTRACT]](<8 x s16>) - ; GFX9-NEXT: G_STORE [[BITCAST]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) - ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0 - ; GFX9-NEXT: [[UV78:%[0-9]+]]:_(<2 x s16>), [[UV79:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT2]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV78]](<2 x s16>) - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV79]](<2 x s16>) - ; GFX9-NEXT: G_STORE [[BITCAST1]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[CONCAT_VECTORS]](<8 x s16>) + ; GFX9-NEXT: G_STORE [[BITCAST6]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; GFX9-NEXT: G_STORE [[BITCAST4]](s32), [[PTR_ADD]](p1) :: (store (s16) into unknown-address + 16, align 16, addrspace 1) ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C2]](s64) - ; GFX9-NEXT: G_STORE [[LSHR]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) + ; GFX9-NEXT: G_STORE [[LSHR4]](s32), [[PTR_ADD1]](p1) :: (store (s16) into unknown-address + 18, addrspace 1) ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; GFX9-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD]], [[C3]](s64) - ; GFX9-NEXT: G_STORE [[BITCAST2]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) + ; GFX9-NEXT: G_STORE [[BITCAST5]](s32), [[PTR_ADD2]](p1) :: (store (s16) into unknown-address + 20, align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<11 x s16>) = G_IMPLICIT_DEF G_STORE %1, %0 :: (store (<11 x s16>), align 16, addrspace 1) @@ -5935,42 +5927,46 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v12s16_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v12s16_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v12s16_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<12 x s16>) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<6 x s32>) = G_BITCAST [[DEF]](<12 x s16>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<6 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<6 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[PTR_ADD]](p1) :: (store (<2 x s32>) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<12 x s16>) = G_IMPLICIT_DEF G_STORE %1, %0 :: (store (<12 x s16>), align 16, addrspace 1) @@ -5986,9 +5982,7 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -6061,7 +6055,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -6080,19 +6074,17 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) ; VI-LABEL: name: test_store_global_s160_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -6165,7 +6157,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; VI-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY6]](s32) @@ -6184,12 +6176,12 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (s160), align 1, addrspace 1) @@ -6205,9 +6197,7 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; SI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -6238,7 +6228,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; SI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) @@ -6247,19 +6237,17 @@ body: | ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_s160_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) ; VI-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32) @@ -6290,7 +6278,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[EXTRACT1]](s32) + ; VI-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY6]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY6]](s32), [[PTR_ADD7]](p1) :: (store (s16) into unknown-address + 16, addrspace 1) @@ -6299,12 +6287,12 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (s160), align 2, addrspace 1) @@ -6320,42 +6308,42 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; CI-LABEL: name: test_store_global_s160_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; VI-LABEL: name: test_store_global_s160_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_s160_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (s160), align 4, addrspace 1) @@ -6371,42 +6359,42 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_s160_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; VI-LABEL: name: test_store_global_s160_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_s160_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (s160), align 8, addrspace 1) @@ -6422,42 +6410,42 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_s160_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; CI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_s160_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_s160_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(<5 x s32>) = G_BITCAST [[COPY1]](s160) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[BITCAST]](<5 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<5 x s32>), 128 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<5 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV4]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 16, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(s160) = COPY $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 G_STORE %1, %0 :: (store (s160), align 16, addrspace 1) @@ -7797,11 +7785,9 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) @@ -7874,7 +7860,6 @@ body: | ; SI-NEXT: G_STORE [[LSHR11]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; SI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>) ; SI-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY15]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) @@ -7940,7 +7925,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR23]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) ; SI-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64) - ; SI-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY [[EXTRACT2]](s32) + ; SI-NEXT: [[COPY27:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) ; SI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[COPY27]], [[C]](s32) ; SI-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C1]](s64) ; SI-NEXT: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C2]](s32) @@ -7960,27 +7945,26 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1) + ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1) ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1) + ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1) ; VI-LABEL: name: test_store_global_v9s32_align1 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) @@ -8053,7 +8037,6 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT7]](s32), [[PTR_ADD14]](p1) :: (store (s8) into unknown-address + 15, addrspace 1) ; VI-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>) ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) @@ -8119,7 +8102,7 @@ body: | ; VI-NEXT: G_STORE [[ANYEXT15]](s32), [[PTR_ADD30]](p1) :: (store (s8) into unknown-address + 31, addrspace 1) ; VI-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD31:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64) - ; VI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[EXTRACT2]](s32) + ; VI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) ; VI-NEXT: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32) ; VI-NEXT: [[PTR_ADD32:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD31]], [[C1]](s64) ; VI-NEXT: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[COPY12]](s32) @@ -8139,17 +8122,18 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 1, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 1, addrspace 1) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 1, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 @@ -8169,11 +8153,9 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) ; SI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) @@ -8204,7 +8186,6 @@ body: | ; SI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; SI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>) ; SI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; SI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; SI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) @@ -8230,7 +8211,7 @@ body: | ; SI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; SI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[EXTRACT2]](s32) + ; SI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) ; SI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32) ; SI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; SI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1) @@ -8240,27 +8221,26 @@ body: | ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1) + ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1) ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1) + ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1) ; VI-LABEL: name: test_store_global_v9s32_align2 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT]](<4 x s32>) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) ; VI-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32) ; VI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; VI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY4]], [[C]](s32) @@ -8291,7 +8271,6 @@ body: | ; VI-NEXT: G_STORE [[LSHR3]](s32), [[PTR_ADD6]](p1) :: (store (s16) into unknown-address + 14, addrspace 1) ; VI-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[EXTRACT1]](<4 x s32>) ; VI-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[UV4]](s32) ; VI-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY8]], [[C]](s32) ; VI-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD7]], [[C1]](s64) @@ -8317,7 +8296,7 @@ body: | ; VI-NEXT: G_STORE [[LSHR7]](s32), [[PTR_ADD14]](p1) :: (store (s16) into unknown-address + 30, addrspace 1) ; VI-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD15:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; VI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[EXTRACT2]](s32) + ; VI-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[UV8]](s32) ; VI-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY12]], [[C]](s32) ; VI-NEXT: [[PTR_ADD16:%[0-9]+]]:_(p1) = G_PTR_ADD [[PTR_ADD15]], [[C1]](s64) ; VI-NEXT: G_STORE [[COPY12]](s32), [[PTR_ADD15]](p1) :: (store (s16) into unknown-address + 32, addrspace 1) @@ -8327,17 +8306,18 @@ body: | ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 2, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 2, addrspace 1) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 2, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 @@ -8357,65 +8337,69 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; SI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) + ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) ; CI-LABEL: name: test_store_global_v9s32_align4 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) + ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) ; VI-LABEL: name: test_store_global_v9s32_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; VI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) + ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) ; GFX9-LABEL: name: test_store_global_v9s32_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 4, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 4, addrspace 1) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 @@ -8435,65 +8419,69 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) ; CI-LABEL: name: test_store_global_v9s32_align8 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) + ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) ; VI-LABEL: name: test_store_global_v9s32_align8 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; VI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) + ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) ; GFX9-LABEL: name: test_store_global_v9s32_align8 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), align 8, addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, align 8, addrspace 1) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 8, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 @@ -8513,65 +8501,69 @@ body: | ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; SI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; SI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; SI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; SI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; SI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; SI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) ; SI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) + ; SI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) ; CI-LABEL: name: test_store_global_v9s32_align16 ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; CI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; CI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; CI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; CI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; CI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; CI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; CI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; CI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; CI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; CI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; CI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; CI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; CI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; CI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; CI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; CI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; CI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) ; CI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) + ; CI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) ; VI-LABEL: name: test_store_global_v9s32_align16 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; VI-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; VI-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; VI-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; VI-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; VI-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; VI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; VI-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; VI-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; VI-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; VI-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) ; VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) + ; VI-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) ; GFX9-LABEL: name: test_store_global_v9s32_align16 ; GFX9: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr7_vgpr8_vgpr9 - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<9 x s32>) = G_CONCAT_VECTORS [[COPY1]](<3 x s32>), [[COPY2]](<3 x s32>), [[COPY3]](<3 x s32>) - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<9 x s32>), 128 - ; GFX9-NEXT: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY3]](<3 x s32>), 64 - ; GFX9-NEXT: G_STORE [[EXTRACT]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](<3 x s32>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](<3 x s32>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR]](<4 x s32>), [[COPY]](p1) :: (store (<4 x s32>), addrspace 1) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 ; GFX9-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[PTR_ADD]](p1) :: (store (<4 x s32>) into unknown-address + 16, addrspace 1) ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-NEXT: G_STORE [[EXTRACT2]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) + ; GFX9-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p1) :: (store (s32) into unknown-address + 32, align 16, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 %2:_(<3 x s32>) = COPY $vgpr5_vgpr6_vgpr7 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir index 778d6932f8b0..2b562df4f134 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -152,12 +152,12 @@ body: | ; SI-LABEL: name: test_store_global_v3s32 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[COPY1]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY]](p1) :: (store (<2 x s32>), align 4, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, addrspace 1) ; VI-LABEL: name: test_store_global_v3s32 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr2_vgpr3_vgpr4 @@ -473,12 +473,12 @@ body: | ; SI: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 ; SI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr3_vgpr4 ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(<3 x s32>) = G_BITCAST [[COPY]](s96) - ; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[BITCAST]](<3 x s32>), 0 - ; SI-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[BITCAST]](<3 x s32>), 64 - ; SI-NEXT: G_STORE [[EXTRACT]](<2 x s32>), [[COPY1]](p1) :: (store (<2 x s32>), align 16, addrspace 1) + ; SI-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<3 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) + ; SI-NEXT: G_STORE [[BUILD_VECTOR]](<2 x s32>), [[COPY1]](p1) :: (store (<2 x s32>), align 16, addrspace 1) ; SI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; SI-NEXT: G_STORE [[EXTRACT1]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) + ; SI-NEXT: G_STORE [[UV2]](s32), [[PTR_ADD]](p1) :: (store (s32) into unknown-address + 8, align 8, addrspace 1) ; VI-LABEL: name: test_store_global_96 ; VI: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 ; VI-NEXT: [[COPY1:%[0-9]+]]:_(p1) = COPY $vgpr3_vgpr4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir index 411bbc68b47c..9c210beb285a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir @@ -329,11 +329,9 @@ body: | ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 @@ -342,7 +340,7 @@ body: | ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SHL]], [[UMIN]] ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[ADD]], [[C]](s32) ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[SHL2]], [[C1]] ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[XOR1]], [[SHL3]] ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SHL2]], [[UMIN1]] @@ -354,27 +352,27 @@ body: | ; GFX6-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SHL4]], [[UMIN2]] ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[ADD2]], [[C]](s32) ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX6-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C2]] ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C2]] ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C2]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: uaddsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -386,39 +384,37 @@ body: | ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX8-NEXT: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC3]] ; GFX8-NEXT: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC1]], [[TRUNC4]] ; GFX8-NEXT: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC2]], [[TRUNC5]] ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT1]](s16) ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT2]](s16) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: uaddsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -427,29 +423,27 @@ body: | ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) ; GFX9-NEXT: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] ; GFX9-NEXT: [[UADDSAT1:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT1]](<2 x s16>) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UADDSAT1]](<2 x s16>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST6]](s32) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir index d3d1c0b65d98..83976e15d820 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umax.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_umax_s32 @@ -331,12 +331,12 @@ body: | ; SI-LABEL: name: test_umax_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -355,7 +355,6 @@ body: | ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_umax_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) @@ -364,6 +363,7 @@ body: | ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) @@ -381,23 +381,27 @@ body: | ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_umax_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV8]], [[UV10]] - ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[UV9]], [[UV11]] - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[UMAX:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[UMAX1:%[0-9]+]]:_(<2 x s16>) = G_UMAX [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UMAX1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32) ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir index ec873d966ffe..d08f57f96123 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umin.mir @@ -1,8 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s --- name: test_umin_s32 @@ -331,12 +331,12 @@ body: | ; SI-LABEL: name: test_umin_v3s16 ; SI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; SI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; SI-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; SI-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; SI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; SI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; SI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; SI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) @@ -355,7 +355,6 @@ body: | ; SI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; VI-LABEL: name: test_umin_v3s16 ; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) ; VI-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; VI-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) @@ -364,6 +363,7 @@ body: | ; VI-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; VI-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; VI-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; VI-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF ; VI-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) ; VI-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) ; VI-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) @@ -381,23 +381,27 @@ body: | ; VI-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) ; GFX9-LABEL: name: test_umin_v3s16 ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>), [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s16>) - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<12 x s16>) = G_CONCAT_VECTORS [[DEF2]](<4 x s16>), [[DEF1]](<4 x s16>), [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>), [[UV6:%[0-9]+]]:_(<3 x s16>), [[UV7:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<12 x s16>) - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV]](<3 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[UV4]](<3 x s16>), 0 - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>) - ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV8]], [[UV10]] - ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[UV9]], [[UV11]] - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN1]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST]](s32), [[LSHR]](s32), [[BITCAST1]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF2]](s32) + ; GFX9-NEXT: [[UMIN:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[UMIN1:%[0-9]+]]:_(<2 x s16>) = G_UMIN [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UMIN1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[BITCAST4]](s32), [[LSHR2]](s32), [[BITCAST5]](s32) ; GFX9-NEXT: S_NOP 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s16>) = G_IMPLICIT_DEF %1:_(<3 x s16>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir index 830c6459e428..eac097ae0a26 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-umulh.mir @@ -443,67 +443,51 @@ body: | ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) - ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) - ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) - ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) + ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C]] ; GFX9-NEXT: [[AND1:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[C]] ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[AND1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) - ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] - ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] + ; GFX9-NEXT: [[AND2:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] + ; GFX9-NEXT: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[AND2]](s16) ; GFX9-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[AND3]](s16) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[ANYEXT3]](s32) ; GFX9-NEXT: [[MUL:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]] - ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[C1]](s32) + ; GFX9-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[C2]](s32) ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL]], [[BUILD_VECTOR_TRUNC2]](<2 x s16>) - ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC2]], [[C]] - ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[C]] - ; GFX9-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[AND4]](s16) - ; GFX9-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[AND5]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT4]](s32), [[ANYEXT5]](s32) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-NEXT: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC4]], [[C]] + ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32) + ; GFX9-NEXT: [[AND5:%[0-9]+]]:_(s16) = G_AND [[TRUNC5]], [[C]] + ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(s16) = G_MUL [[AND4]], [[AND5]] + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[MUL1]], [[C1]](s16) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) + ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C3]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX9-NEXT: [[AND6:%[0-9]+]]:_(s16) = G_AND [[TRUNC6]], [[C]] + ; GFX9-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[AND7:%[0-9]+]]:_(s16) = G_AND [[TRUNC7]], [[C]] - ; GFX9-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[AND6]](s16) - ; GFX9-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[AND7]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT6]](s32), [[ANYEXT7]](s32) - ; GFX9-NEXT: [[MUL1:%[0-9]+]]:_(<2 x s16>) = G_MUL [[BUILD_VECTOR_TRUNC3]], [[BUILD_VECTOR_TRUNC4]] - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s16>) = G_LSHR [[MUL1]], [[BUILD_VECTOR_TRUNC5]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR]](<2 x s16>) - ; GFX9-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[LSHR1]](<2 x s16>) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) - ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] - ; GFX9-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC9]], [[C]] - ; GFX9-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C3]](s16) - ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL]] - ; GFX9-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[AND10:%[0-9]+]]:_(s16) = G_AND [[TRUNC10]], [[C]] - ; GFX9-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[DEF1]](s32) - ; GFX9-NEXT: [[AND11:%[0-9]+]]:_(s16) = G_AND [[TRUNC11]], [[C]] - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C3]](s16) - ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL1]] + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C1]](s16) + ; GFX9-NEXT: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL]] + ; GFX9-NEXT: [[AND8:%[0-9]+]]:_(s16) = G_AND [[LSHR1]], [[C]] + ; GFX9-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; GFX9-NEXT: [[AND9:%[0-9]+]]:_(s16) = G_AND [[TRUNC8]], [[C]] + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C1]](s16) + ; GFX9-NEXT: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL1]] ; GFX9-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR]](s16) ; GFX9-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR1]](s16) - ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; GFX9-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C3]](s32) ; GFX9-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL2]] ; GFX9-NEXT: $vgpr0 = COPY [[OR2]](s32) %0:_(s32) = COPY $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir index 7c93db9ba2af..84c4eec55f2d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ushlsat.mir @@ -407,11 +407,9 @@ body: | ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) @@ -421,7 +419,7 @@ body: | ; GFX6-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL]](s32), [[LSHR3]] ; GFX6-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C2]], [[SHL1]] ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SELECT]], [[C]](s32) - ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SHL2]], [[AND1]](s32) ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SHL3]], [[AND1]](s32) @@ -436,26 +434,26 @@ body: | ; GFX6-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[C2]], [[SHL5]] ; GFX6-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SELECT2]], [[C]](s32) ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C1]] - ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX6-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C1]] - ; GFX6-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; GFX6-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32) ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: ushlsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -467,13 +465,11 @@ body: | ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC3]](s16) @@ -489,27 +485,27 @@ body: | ; GFX8-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR5]] ; GFX8-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT1]](s16) ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SELECT2]](s16) ; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C2]] ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C2]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C2]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C2]] ; GFX8-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL5]] - ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: ushlsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -521,13 +517,11 @@ body: | ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX9-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s16) = G_LSHR [[SHL]], [[TRUNC3]](s16) @@ -543,16 +537,16 @@ body: | ; GFX9-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[TRUNC2]](s16), [[LSHR5]] ; GFX9-NEXT: [[SELECT2:%[0-9]+]]:_(s16) = G_SELECT [[ICMP2]](s1), [[C1]], [[SHL2]] ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT]](s16) ; GFX9-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT1]](s16) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[ANYEXT1]](s32) ; GFX9-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[SELECT2]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST4]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR6]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT2]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR6]](s32), [[BITCAST4]](s32) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir index 6221a15da5d5..fe8966843e35 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir @@ -317,18 +317,16 @@ body: | ; GFX6-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX6-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX6-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) - ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX6-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[BITCAST]], [[C]](s32) ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32) ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SHL]], [[SHL1]] ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[SHL]], [[UMIN]] ; GFX6-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SUB]], [[C]](s32) ; GFX6-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32) - ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[BITCAST2]], [[C]](s32) ; GFX6-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[SHL2]], [[SHL3]] ; GFX6-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[SHL2]], [[UMIN1]] ; GFX6-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[SUB1]], [[C]](s32) @@ -338,27 +336,27 @@ body: | ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SHL4]], [[UMIN2]] ; GFX6-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[SUB2]], [[C]](s32) ; GFX6-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX6-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX6-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX6-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX6-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX6-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX6-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] ; GFX6-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] ; GFX6-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; GFX6-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL6]] - ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX6-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX6-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; GFX6-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) ; GFX6-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL7]] - ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX6-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX6-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] - ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; GFX6-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; GFX6-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) ; GFX6-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL8]] - ; GFX6-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX6-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX6-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX8-LABEL: name: usubsat_v3s16 ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -370,39 +368,37 @@ body: | ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) ; GFX8-NEXT: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC3]] ; GFX8-NEXT: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC1]], [[TRUNC4]] ; GFX8-NEXT: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC2]], [[TRUNC5]] ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT1]](s16) ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT2]](s16) ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] - ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; GFX8-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C]](s32) ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) ; GFX9-LABEL: name: usubsat_v3s16 ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 @@ -411,29 +407,27 @@ body: | ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32) ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[DEF]](s32) ; GFX9-NEXT: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] ; GFX9-NEXT: [[USUBSAT1:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT1]](<2 x s16>) ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT]](<2 x s16>) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[USUBSAT1]](<2 x s16>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) - ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[BITCAST6]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST7]](s32) + ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[BITCAST5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR4]](s32), [[BITCAST6]](s32) ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[BUILD_VECTOR_TRUNC6]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir index 3d718081ba92..7e00c75a2a51 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s --- | @@ -73,100 +73,88 @@ body: | ; GFX8-LABEL: name: add_v3i16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX8-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s96) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC3]] ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC4]] ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC5]] - ; GFX8-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] ; GFX8-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 ; GFX9-LABEL: name: add_v3i16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s96) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[DEF1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV3]](s32), [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV4]](s32), [[DEF1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32) ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(<2 x s16>) = G_ADD [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]] ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(<2 x s16>) = G_ADD [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]] - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[ADD1]](<2 x s16>) - ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[BITCAST3]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[ADD]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[ADD]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[ADD1]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>) ; GFX9-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] ; GFX9-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 %3:_(<2 x s16>) = COPY $vgpr0 %4:_(<2 x s16>) = COPY $vgpr1 - %5:_(<2 x s16>) = G_IMPLICIT_DEF - %6:_(<6 x s16>) = G_CONCAT_VECTORS %3(<2 x s16>), %4(<2 x s16>), %5(<2 x s16>) - %19:_(s96) = G_BITCAST %6(<6 x s16>) - %20:_(s48) = G_TRUNC %19(s96) - %0:_(<3 x s16>) = G_BITCAST %20(s48) - %8:_(<2 x s16>) = COPY $vgpr2 - %9:_(<2 x s16>) = COPY $vgpr3 - %10:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %5(<2 x s16>) - %21:_(s96) = G_BITCAST %10(<6 x s16>) - %22:_(s48) = G_TRUNC %21(s96) - %1:_(<3 x s16>) = G_BITCAST %22(s48) + %5:_(<4 x s16>) = G_CONCAT_VECTORS %3(<2 x s16>), %4(<2 x s16>) + %6:_(s16), %7:_(s16), %8:_(s16), %9:_(s16) = G_UNMERGE_VALUES %5(<4 x s16>) + %0:_(<3 x s16>) = G_BUILD_VECTOR %6(s16), %7(s16), %8(s16) + %10:_(<2 x s16>) = COPY $vgpr2 + %11:_(<2 x s16>) = COPY $vgpr3 + %12:_(<4 x s16>) = G_CONCAT_VECTORS %10(<2 x s16>), %11(<2 x s16>) + %13:_(s16), %14:_(s16), %15:_(s16), %16:_(s16) = G_UNMERGE_VALUES %12(<4 x s16>) + %1:_(<3 x s16>) = G_BUILD_VECTOR %13(s16), %14(s16), %15(s16) %2:sgpr_64 = COPY $sgpr30_sgpr31 - %12:_(<3 x s16>) = G_ADD %0, %1 - %16:_(<3 x s16>) = G_IMPLICIT_DEF - %17:_(<6 x s16>) = G_CONCAT_VECTORS %12(<3 x s16>), %16(<3 x s16>) - %14:_(<2 x s16>), %15:_(<2 x s16>), %18:_(<2 x s16>) = G_UNMERGE_VALUES %17(<6 x s16>) - $vgpr0 = COPY %14(<2 x s16>) - $vgpr1 = COPY %15(<2 x s16>) - %13:ccr_sgpr_64 = COPY %2 - S_SETPC_B64_return %13, implicit $vgpr0, implicit $vgpr1 + %17:_(<3 x s16>) = G_ADD %0, %1 + %21:_(s16), %22:_(s16), %23:_(s16) = G_UNMERGE_VALUES %17(<3 x s16>) + %24:_(s16) = G_IMPLICIT_DEF + %25:_(<4 x s16>) = G_BUILD_VECTOR %21(s16), %22(s16), %23(s16), %24(s16) + %19:_(<2 x s16>), %20:_(<2 x s16>) = G_UNMERGE_VALUES %25(<4 x s16>) + $vgpr0 = COPY %19(<2 x s16>) + $vgpr1 = COPY %20(<2 x s16>) + %18:ccr_sgpr_64 = COPY %2 + S_SETPC_B64_return %18, implicit $vgpr0, implicit $vgpr1 ... --- @@ -178,116 +166,88 @@ body: | ; GFX8-LABEL: name: shl_v3i16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX8-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX8-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s96) - ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) - ; GFX8-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL1]](s16) ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SHL2]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) + ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) ; GFX8-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] ; GFX8-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 ; GFX9-LABEL: name: shl_v3i16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s96) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[DEF1]](s32) - ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<3 x s16>), [[UV4:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s96) = G_BITCAST [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s96) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) - ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV5]](s32), [[LSHR1]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV6]](s32), [[DEF1]](s32) - ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[UV8:%[0-9]+]]:_(<3 x s16>), [[UV9:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) ; GFX9-NEXT: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV3]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0 - ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16) - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[UV8]](<3 x s16>), 0 - ; GFX9-NEXT: [[EXTRACT1:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT1]](<4 x s16>), 0 - ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16) - ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[EXTRACT]], [[EXTRACT1]](<2 x s16>) - ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[COPY5]], [[COPY6]](s16) - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[SHL]](<2 x s16>) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; GFX9-NEXT: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<4 x s16>) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>) + ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[SHL]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32) ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SHL1]](s16) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST3]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC4]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC5]](<2 x s16>) - ; GFX9-NEXT: [[COPY7:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; GFX9-NEXT: S_SETPC_B64_return [[COPY7]], implicit $vgpr0, implicit $vgpr1 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[DEF]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC2]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC3]](<2 x s16>) + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; GFX9-NEXT: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 %3:_(<2 x s16>) = COPY $vgpr0 %4:_(<2 x s16>) = COPY $vgpr1 - %5:_(<2 x s16>) = G_IMPLICIT_DEF - %6:_(<6 x s16>) = G_CONCAT_VECTORS %3(<2 x s16>), %4(<2 x s16>), %5(<2 x s16>) - %19:_(s96) = G_BITCAST %6(<6 x s16>) - %20:_(s48) = G_TRUNC %19(s96) - %0:_(<3 x s16>) = G_BITCAST %20(s48) - %8:_(<2 x s16>) = COPY $vgpr2 - %9:_(<2 x s16>) = COPY $vgpr3 - %10:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %5(<2 x s16>) - %21:_(s96) = G_BITCAST %10(<6 x s16>) - %22:_(s48) = G_TRUNC %21(s96) - %1:_(<3 x s16>) = G_BITCAST %22(s48) + %5:_(<4 x s16>) = G_CONCAT_VECTORS %3(<2 x s16>), %4(<2 x s16>) + %6:_(s16), %7:_(s16), %8:_(s16), %9:_(s16) = G_UNMERGE_VALUES %5(<4 x s16>) + %0:_(<3 x s16>) = G_BUILD_VECTOR %6(s16), %7(s16), %8(s16) + %10:_(<2 x s16>) = COPY $vgpr2 + %11:_(<2 x s16>) = COPY $vgpr3 + %12:_(<4 x s16>) = G_CONCAT_VECTORS %10(<2 x s16>), %11(<2 x s16>) + %13:_(s16), %14:_(s16), %15:_(s16), %16:_(s16) = G_UNMERGE_VALUES %12(<4 x s16>) + %1:_(<3 x s16>) = G_BUILD_VECTOR %13(s16), %14(s16), %15(s16) %2:sgpr_64 = COPY $sgpr30_sgpr31 - %12:_(<3 x s16>) = G_SHL %0, %1(<3 x s16>) - %16:_(<3 x s16>) = G_IMPLICIT_DEF - %17:_(<6 x s16>) = G_CONCAT_VECTORS %12(<3 x s16>), %16(<3 x s16>) - %14:_(<2 x s16>), %15:_(<2 x s16>), %18:_(<2 x s16>) = G_UNMERGE_VALUES %17(<6 x s16>) - $vgpr0 = COPY %14(<2 x s16>) - $vgpr1 = COPY %15(<2 x s16>) - %13:ccr_sgpr_64 = COPY %2 - S_SETPC_B64_return %13, implicit $vgpr0, implicit $vgpr1 + %17:_(<3 x s16>) = G_SHL %0, %1(<3 x s16>) + %21:_(s16), %22:_(s16), %23:_(s16) = G_UNMERGE_VALUES %17(<3 x s16>) + %24:_(s16) = G_IMPLICIT_DEF + %25:_(<4 x s16>) = G_BUILD_VECTOR %21(s16), %22(s16), %23(s16), %24(s16) + %19:_(<2 x s16>), %20:_(<2 x s16>) = G_UNMERGE_VALUES %25(<4 x s16>) + $vgpr0 = COPY %19(<2 x s16>) + $vgpr1 = COPY %20(<2 x s16>) + %18:ccr_sgpr_64 = COPY %2 + S_SETPC_B64_return %18, implicit $vgpr0, implicit $vgpr1 ... --- @@ -389,31 +349,30 @@ body: | ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX8-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[CONCAT_VECTORS]](<10 x s16>) - ; GFX8-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s160) - ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32) + ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) + ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) - ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32) - ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) + ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) - ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32) + ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX8-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s160) = G_BITCAST [[CONCAT_VECTORS1]](<10 x s16>) - ; GFX8-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s160) - ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32) - ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) + ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) - ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[UV6]](s32) - ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV6]], [[C]](s32) + ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) + ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) ; GFX8-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) - ; GFX8-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[UV7]](s32) + ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) + ; GFX8-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) ; GFX8-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] ; GFX8-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] ; GFX8-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] @@ -429,103 +388,90 @@ body: | ; GFX8-NEXT: [[FCANONICALIZE8:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] ; GFX8-NEXT: [[FCANONICALIZE9:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC9]] ; GFX8-NEXT: [[FMAXNUM_IEEE4:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE8]], [[FCANONICALIZE9]] - ; GFX8-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX8-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) - ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>) ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] - ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16) ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE3]](s16) ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] - ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE4]](s16) - ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32) + ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL2]] - ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>) - ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>) - ; GFX8-NEXT: $vgpr2 = COPY [[BITCAST5]](<2 x s16>) + ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x s16>) + ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>) + ; GFX8-NEXT: $vgpr2 = COPY [[BITCAST8]](<2 x s16>) ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX9-LABEL: name: maxnum_v5i16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s160) = G_BITCAST [[CONCAT_VECTORS]](<10 x s16>) - ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](s160) + ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) ; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32) - ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV]](s32), [[LSHR]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV1]](s32), [[LSHR1]](s32) - ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV2]](s32), [[DEF1]](s32) - ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[UV5:%[0-9]+]]:_(<5 x s16>), [[UV6:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<10 x s16>) + ; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s160) = G_BITCAST [[CONCAT_VECTORS2]](<10 x s16>) - ; GFX9-NEXT: [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]](s160) - ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV7]], [[C]](s32) - ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV7]](s32), [[LSHR2]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV8]](s32), [[LSHR3]](s32) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[UV9]](s32), [[DEF1]](s32) - ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>), [[BUILD_VECTOR_TRUNC5]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-NEXT: [[UV12:%[0-9]+]]:_(<5 x s16>), [[UV13:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<10 x s16>) - ; GFX9-NEXT: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV5]](<5 x s16>), 0 - ; GFX9-NEXT: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF2]], [[UV12]](<5 x s16>), 0 - ; GFX9-NEXT: [[UV14:%[0-9]+]]:_(<2 x s16>), [[UV15:%[0-9]+]]:_(<2 x s16>), [[UV16:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) - ; GFX9-NEXT: [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) - ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV14]] - ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV17]] + ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) + ; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) + ; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[LSHR1]](s32) + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[LSHR2]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR3]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST5]](s32), [[DEF]](s32) + ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC]] + ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC3]] ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] - ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV15]] - ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV18]] + ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC1]] + ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC4]] ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] - ; GFX9-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV16]] - ; GFX9-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV19]] + ; GFX9-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC2]] + ; GFX9-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[BUILD_VECTOR_TRUNC5]] ; GFX9-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] - ; GFX9-NEXT: [[DEF3:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE2]](<2 x s16>) - ; GFX9-NEXT: [[UV20:%[0-9]+]]:_(<2 x s16>), [[UV21:%[0-9]+]]:_(<2 x s16>), [[UV22:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF3]](<6 x s16>) - ; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV20]](<2 x s16>) - ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[BITCAST3]](s32) - ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](<2 x s16>) - ; GFX9-NEXT: $vgpr1 = COPY [[FMAXNUM_IEEE1]](<2 x s16>) - ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC6]](<2 x s16>) + ; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE]](<2 x s16>) + ; GFX9-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE1]](<2 x s16>) + ; GFX9-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST7]], [[C]](s32) + ; GFX9-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[FMAXNUM_IEEE2]](<2 x s16>) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST6]](s32), [[LSHR4]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST7]](s32), [[LSHR5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR_TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST8]](s32), [[DEF]](s32) + ; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC6]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR_TRUNC7]](<2 x s16>) + ; GFX9-NEXT: $vgpr2 = COPY [[BUILD_VECTOR_TRUNC8]](<2 x s16>) ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %2:_(<2 x s16>) = COPY $vgpr0 %3:_(<2 x s16>) = COPY $vgpr1 %4:_(<2 x s16>) = COPY $vgpr2 - %5:_(<2 x s16>) = G_IMPLICIT_DEF - %6:_(<10 x s16>) = G_CONCAT_VECTORS %2(<2 x s16>), %3(<2 x s16>), %4(<2 x s16>), %5(<2 x s16>), %5(<2 x s16>) - %22:_(s160) = G_BITCAST %6(<10 x s16>) - %23:_(s80) = G_TRUNC %22(s160) - %0:_(<5 x s16>) = G_BITCAST %23(s80) - %8:_(<2 x s16>) = COPY $vgpr3 - %9:_(<2 x s16>) = COPY $vgpr4 - %10:_(<2 x s16>) = COPY $vgpr5 - %11:_(<10 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %10(<2 x s16>), %5(<2 x s16>), %5(<2 x s16>) - %24:_(s160) = G_BITCAST %11(<10 x s16>) - %25:_(s80) = G_TRUNC %24(s160) - %1:_(<5 x s16>) = G_BITCAST %25(s80) - %14:_(<5 x s16>) = G_FMAXNUM %0, %1 - %18:_(<5 x s16>) = G_IMPLICIT_DEF - %19:_(<10 x s16>) = G_CONCAT_VECTORS %14(<5 x s16>), %18(<5 x s16>) - %15:_(<2 x s16>), %16:_(<2 x s16>), %17:_(<2 x s16>), %20:_(<2 x s16>), %21:_(<2 x s16>) = G_UNMERGE_VALUES %19(<10 x s16>) - $vgpr0 = COPY %15(<2 x s16>) - $vgpr1 = COPY %16(<2 x s16>) - $vgpr2 = COPY %17(<2 x s16>) + %5:_(<6 x s16>) = G_CONCAT_VECTORS %2(<2 x s16>), %3(<2 x s16>), %4(<2 x s16>) + %6:_(s16), %7:_(s16), %8:_(s16), %9:_(s16), %10:_(s16), %11:_(s16) = G_UNMERGE_VALUES %5(<6 x s16>) + %0:_(<5 x s16>) = G_BUILD_VECTOR %6(s16), %7(s16), %8(s16), %9(s16), %10(s16) + %12:_(<2 x s16>) = COPY $vgpr3 + %13:_(<2 x s16>) = COPY $vgpr4 + %14:_(<2 x s16>) = COPY $vgpr5 + %15:_(<6 x s16>) = G_CONCAT_VECTORS %12(<2 x s16>), %13(<2 x s16>), %14(<2 x s16>) + %16:_(s16), %17:_(s16), %18:_(s16), %19:_(s16), %20:_(s16), %21:_(s16) = G_UNMERGE_VALUES %15(<6 x s16>) + %1:_(<5 x s16>) = G_BUILD_VECTOR %16(s16), %17(s16), %18(s16), %19(s16), %20(s16) + %23:_(<5 x s16>) = G_FMAXNUM %0, %1 + %27:_(s16), %28:_(s16), %29:_(s16), %30:_(s16), %31:_(s16) = G_UNMERGE_VALUES %23(<5 x s16>) + %32:_(s16) = G_IMPLICIT_DEF + %33:_(<6 x s16>) = G_BUILD_VECTOR %27(s16), %28(s16), %29(s16), %30(s16), %31(s16), %32(s16) + %24:_(<2 x s16>), %25:_(<2 x s16>), %26:_(<2 x s16>) = G_UNMERGE_VALUES %33(<6 x s16>) + $vgpr0 = COPY %24(<2 x s16>) + $vgpr1 = COPY %25(<2 x s16>) + $vgpr2 = COPY %26(<2 x s16>) SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir index a0a2e3ee8303..e954104c2f70 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-xor.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -allow-ginsert-as-artifact=0 -global-isel-abort=0 %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s --- name: test_xor_s32 @@ -305,18 +305,14 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[XOR]](<2 x s32>), [[XOR1]](<2 x s32>), [[DEF1]](<2 x s32>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<3 x s32>), [[UV7:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV6]](<3 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV5]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV6]](s32), [[UV7]](s32), [[XOR1]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>) %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<3 x s32>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s32>) = G_XOR %0, %1 @@ -353,23 +349,20 @@ body: | ; CHECK: [[DEF:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<5 x s32>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32) ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF2]](s32) - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<5 x s32>) - ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV9]](s32), [[DEF2]](s32) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR3]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR4]] - ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR2]], [[BUILD_VECTOR5]] - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[XOR]](<2 x s32>), [[XOR1]](<2 x s32>), [[XOR2]](<2 x s32>), [[DEF3]](<2 x s32>), [[DEF3]](<2 x s32>) - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<5 x s32>), [[UV11:%[0-9]+]]:_(<5 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s32>) - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<8 x s32>) = G_INSERT [[DEF4]], [[UV10]](<5 x s32>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<8 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV5]](s32), [[UV6]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV7]](s32), [[UV8]](s32) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR2]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<2 x s32>) = G_XOR [[BUILD_VECTOR1]], [[BUILD_VECTOR3]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[UV4]], [[UV9]] + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](<2 x s32>) + ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](<2 x s32>) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF2]](<8 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[XOR2]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR4]](<8 x s32>) %0:_(<5 x s32>) = G_IMPLICIT_DEF %1:_(<5 x s32>) = G_IMPLICIT_DEF %2:_(<5 x s32>) = G_XOR %0, %1 @@ -424,40 +417,65 @@ body: | ; CHECK-LABEL: name: test_xor_v3s16 ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT]], [[INSERT1]] - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST2]](<2 x s16>), [[BITCAST3]](<2 x s16>) + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST8]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST8]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST9]], [[C1]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>), [[BITCAST14]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS2]](<6 x s16>) %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2 %1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5 %2:_(<3 x s16>), %3:_(<3 x s16>) = G_UNMERGE_VALUES %0 @@ -492,54 +510,102 @@ body: | ; CHECK-LABEL: name: test_xor_v5s16 ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>), [[UV2:%[0-9]+]]:_(<5 x s16>), [[UV3:%[0-9]+]]:_(<5 x s16>), [[UV4:%[0-9]+]]:_(<5 x s16>), [[UV5:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<30 x s16>) - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[DEF2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<5 x s16>), [[UV7:%[0-9]+]]:_(<5 x s16>), [[UV8:%[0-9]+]]:_(<5 x s16>), [[UV9:%[0-9]+]]:_(<5 x s16>), [[UV10:%[0-9]+]]:_(<5 x s16>), [[UV11:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<30 x s16>) - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV]](<5 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[DEF1]], [[UV6]](<5 x s16>), 0 - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<3 x s16>), [[UV13:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>) - ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(<3 x s16>), [[UV15:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>) - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV12]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV14]](<3 x s16>), 0 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT2]], [[INSERT3]] - ; CHECK-NEXT: [[INSERT4:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV13]](<3 x s16>), 0 - ; CHECK-NEXT: [[INSERT5:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF3]], [[UV15]](<3 x s16>), 0 - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[INSERT4]], [[INSERT5]] - ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(<2 x s16>), [[UV17:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV16]](<2 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>) - ; CHECK-NEXT: [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR1]](<4 x s16>) - ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>) - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) - ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<6 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<6 x s16>) + ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>) + ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>) + ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C1]] ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) + ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST1]], [[C1]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32) ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]] - ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL2]] - ; CHECK-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) - ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>), [[BITCAST6]](<2 x s16>) - ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<30 x s16>) = G_CONCAT_VECTORS [[CONCAT_VECTORS2]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>), [[DEF1]](<6 x s16>) - ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(<5 x s16>), [[UV21:%[0-9]+]]:_(<5 x s16>), [[UV22:%[0-9]+]]:_(<5 x s16>), [[UV23:%[0-9]+]]:_(<5 x s16>), [[UV24:%[0-9]+]]:_(<5 x s16>), [[UV25:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<30 x s16>) - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT6:%[0-9]+]]:_(<8 x s16>) = G_INSERT [[DEF4]], [[UV20]](<5 x s16>), 0 - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT6]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST3]], [[C1]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]] + ; CHECK-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]] + ; CHECK-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST10:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>) + ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST10]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST11:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]] + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL4]] + ; CHECK-NEXT: [[BITCAST12:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[C]](s32) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL5]] + ; CHECK-NEXT: [[BITCAST13:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR5]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST12]](<2 x s16>), [[BITCAST13]](<2 x s16>) + ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C1]] + ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[C]](s32) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[AND8]], [[SHL6]] + ; CHECK-NEXT: [[BITCAST14:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR6]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY4]], [[C]](s32) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[SHL7]] + ; CHECK-NEXT: [[BITCAST15:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR7]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST14]](<2 x s16>), [[BITCAST15]](<2 x s16>) + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<4 x s16>) = G_XOR [[CONCAT_VECTORS2]], [[CONCAT_VECTORS3]] + ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[XOR1]](<4 x s16>) + ; CHECK-NEXT: [[BITCAST16:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>) + ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST16]], [[C]](s32) + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<8 x s16>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(<2 x s16>), [[UV11:%[0-9]+]]:_(<2 x s16>), [[UV12:%[0-9]+]]:_(<2 x s16>), [[UV13:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF2]](<8 x s16>) + ; CHECK-NEXT: [[BITCAST17:%[0-9]+]]:_(s32) = G_BITCAST [[UV12]](<2 x s16>) + ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST17]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST18:%[0-9]+]]:_(s32) = G_BITCAST [[UV13]](<2 x s16>) + ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST18]], [[C]](s32) + ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[BITCAST10]], [[C1]] + ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C1]] + ; CHECK-NEXT: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C]](s32) + ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL8]] + ; CHECK-NEXT: [[BITCAST19:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR8]](s32) + ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[BITCAST11]], [[C1]] + ; CHECK-NEXT: [[AND13:%[0-9]+]]:_(s32) = G_AND [[BITCAST16]], [[C1]] + ; CHECK-NEXT: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[C]](s32) + ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND12]], [[SHL9]] + ; CHECK-NEXT: [[BITCAST20:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR9]](s32) + ; CHECK-NEXT: [[AND14:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] + ; CHECK-NEXT: [[AND15:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C1]] + ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[C]](s32) + ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[AND14]], [[SHL10]] + ; CHECK-NEXT: [[BITCAST21:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR10]](s32) + ; CHECK-NEXT: [[AND16:%[0-9]+]]:_(s32) = G_AND [[BITCAST18]], [[C1]] + ; CHECK-NEXT: [[AND17:%[0-9]+]]:_(s32) = G_AND [[LSHR7]], [[C1]] + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[C]](s32) + ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND16]], [[SHL11]] + ; CHECK-NEXT: [[BITCAST22:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR11]](s32) + ; CHECK-NEXT: [[CONCAT_VECTORS4:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BITCAST19]](<2 x s16>), [[BITCAST20]](<2 x s16>), [[BITCAST21]](<2 x s16>), [[BITCAST22]](<2 x s16>) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS4]](<8 x s16>) %0:_(<5 x s16>) = G_IMPLICIT_DEF %1:_(<5 x s16>) = G_IMPLICIT_DEF %2:_(<5 x s16>) = G_XOR %0, %1 @@ -554,34 +620,15 @@ body: | bb.0: ; CHECK-LABEL: name: test_xor_v3s8 - ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF]](<3 x s8>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s8>) = G_INSERT [[DEF2]], [[DEF1]](<3 x s8>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](<4 x s8>) - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT1]](<4 x s8>) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT]], [[ANYEXT1]] - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[XOR]](s32) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT2]], [[ANYEXT3]] - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[XOR1]](s32) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) - ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT4]], [[ANYEXT5]] - ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[XOR2]](s32) - ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) - ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ANYEXT6]], [[ANYEXT7]] - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[XOR3]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s8>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<4 x s8>), [[DEF2]](<4 x s8>), [[DEF2]](<4 x s8>) - ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<3 x s8>), [[UV9:%[0-9]+]]:_(<3 x s8>), [[UV10:%[0-9]+]]:_(<3 x s8>), [[UV11:%[0-9]+]]:_(<3 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s8>) - ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[UV8]](<3 x s8>) - ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT8]](<3 x s32>) + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<4 x s32>) + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<4 x s32>) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV4]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV1]], [[UV5]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV6]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[XOR]](s32), [[XOR1]](s32), [[XOR2]](s32) + ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(<3 x s8>) = G_IMPLICIT_DEF %1:_(<3 x s8>) = G_IMPLICIT_DEF %2:_(<3 x s8>) = G_XOR %0, %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir index 5cac99355652..b62013d28e81 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir @@ -142,10 +142,7 @@ body: | ; CHECK-LABEL: name: test_zext_v3s16_to_v3s32 ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1 - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0 - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>) ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) @@ -707,12 +704,6 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[UV8]], [[C2]](s32) - ; CHECK-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[UV10]], [[C2]](s32) - ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV12]], [[C2]](s32) - ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) - ; CHECK-NEXT: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[UV14]], [[C2]](s32) ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[UV4]], [[C3]] ; CHECK-NEXT: [[AND12:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C3]] ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[C2]](s32) @@ -737,28 +728,16 @@ body: | ; CHECK-NEXT: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[C2]](s32) ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[AND19]], [[SHL10]] ; CHECK-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR10]](s32), [[OR11]](s32) - ; CHECK-NEXT: [[AND21:%[0-9]+]]:_(s32) = G_AND [[UV10]], [[C3]] - ; CHECK-NEXT: [[AND22:%[0-9]+]]:_(s32) = G_AND [[LSHR8]], [[C3]] - ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[C2]](s32) - ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND21]], [[SHL11]] - ; CHECK-NEXT: [[AND23:%[0-9]+]]:_(s32) = G_AND [[UV11]], [[C3]] - ; CHECK-NEXT: [[AND24:%[0-9]+]]:_(s32) = G_AND [[UV12]], [[C3]] - ; CHECK-NEXT: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C2]](s32) - ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND23]], [[SHL12]] + ; CHECK-NEXT: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[C2]](s32) + ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[AND18]], [[SHL11]] + ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL9]] ; CHECK-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR12]](s32), [[OR13]](s32) - ; CHECK-NEXT: [[AND25:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C3]] - ; CHECK-NEXT: [[AND26:%[0-9]+]]:_(s32) = G_AND [[UV13]], [[C3]] - ; CHECK-NEXT: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[C2]](s32) - ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[SHL13]] - ; CHECK-NEXT: [[AND27:%[0-9]+]]:_(s32) = G_AND [[UV14]], [[C3]] - ; CHECK-NEXT: [[AND28:%[0-9]+]]:_(s32) = G_AND [[LSHR10]], [[C3]] - ; CHECK-NEXT: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND28]], [[C2]](s32) - ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND27]], [[SHL14]] - ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR14]](s32), [[OR15]](s32) - ; CHECK-NEXT: [[AND29:%[0-9]+]]:_(s32) = G_AND [[UV15]], [[C3]] - ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND29]], [[SHL3]] - ; CHECK-NEXT: [[OR17:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]] - ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR16]](s32), [[OR17]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR11]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR12]](s32) + ; CHECK-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND20]], [[SHL3]] + ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[COPY1]], [[SHL3]] + ; CHECK-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR14]](s32), [[OR15]](s32) ; CHECK-NEXT: [[MV9:%[0-9]+]]:_(s384) = G_MERGE_VALUES [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s112) = G_TRUNC [[MV9]](s384) ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s112) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll index 47c4ac2aa042..88b3244df5fa 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll @@ -509,13 +509,10 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) { ; GFX8-UNPACKED-NEXT: s_mov_b32 s7, s9 ; GFX8-UNPACKED-NEXT: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm d16 ; GFX8-UNPACKED-NEXT: s_mov_b32 s0, 0xffff -; GFX8-UNPACKED-NEXT: s_and_b32 s1, s0, s0 -; GFX8-UNPACKED-NEXT: s_lshl_b32 s1, s1, 16 -; GFX8-UNPACKED-NEXT: v_mov_b32_e32 v3, s1 ; GFX8-UNPACKED-NEXT: s_waitcnt vmcnt(0) -; GFX8-UNPACKED-NEXT: v_and_b32_e32 v4, s0, v1 -; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v4 +; GFX8-UNPACKED-NEXT: v_and_b32_e32 v3, s0, v1 +; GFX8-UNPACKED-NEXT: v_and_b32_e32 v1, s0, v2 +; GFX8-UNPACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v3 ; GFX8-UNPACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-UNPACKED-NEXT: ; return to shader part epilog ; @@ -530,12 +527,11 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) { ; GFX8-PACKED-NEXT: s_mov_b32 s6, s8 ; GFX8-PACKED-NEXT: s_mov_b32 s7, s9 ; GFX8-PACKED-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 unorm d16 -; GFX8-PACKED-NEXT: s_mov_b32 s0, 0xffff -; GFX8-PACKED-NEXT: s_and_b32 s0, s0, s0 -; GFX8-PACKED-NEXT: s_lshl_b32 s0, s0, 16 -; GFX8-PACKED-NEXT: v_mov_b32_e32 v2, s0 ; GFX8-PACKED-NEXT: s_waitcnt vmcnt(0) -; GFX8-PACKED-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-PACKED-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX8-PACKED-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX8-PACKED-NEXT: v_and_b32_e32 v1, 0xffff, v1 +; GFX8-PACKED-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-PACKED-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: load_1d_v3f16_xyz: @@ -552,7 +548,10 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) { ; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff ; GFX9-NEXT: s_lshl_b32 s0, s0, 16 ; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 ; GFX9-NEXT: v_and_or_b32 v1, v1, v2, s0 +; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v3 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10-LABEL: load_1d_v3f16_xyz: @@ -565,11 +564,15 @@ define amdgpu_ps <3 x half> @load_1d_v3f16_xyz(<8 x i32> inreg %rsrc, i32 %s) { ; GFX10-NEXT: s_mov_b32 s5, s7 ; GFX10-NEXT: s_mov_b32 s6, s8 ; GFX10-NEXT: s_mov_b32 s7, s9 +; GFX10-NEXT: v_mov_b32_e32 v3, 0xffff ; GFX10-NEXT: image_load v[0:1], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm d16 ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_lshl_b32 s0, s0, 16 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, s0 +; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX10-NEXT: v_and_or_b32 v1, v1, v3, s0 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 +; GFX10-NEXT: v_and_or_b32 v0, v0, v3, v2 ; GFX10-NEXT: ; return to shader part epilog %v = call <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) ret <3 x half> %v diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll index 36ae4e61ab3c..b93c0102f171 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll @@ -9,46 +9,49 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; GFX6-LABEL: name: s_buffer_load_i32 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret i32 %val } @@ -56,46 +59,49 @@ define amdgpu_ps i32 @s_buffer_load_i32(<4 x i32> inreg %rsrc, i32 inreg %soffse define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; GFX6-LABEL: name: s_buffer_load_i32_glc ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_glc ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_glc ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[COPY4]], 1 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 %soffset, i32 1) ret i32 %val } @@ -103,61 +109,64 @@ define amdgpu_ps i32 @s_buffer_load_i32_glc(<4 x i32> inreg %rsrc, i32 inreg %so define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; GFX6-LABEL: name: s_buffer_load_v2i32 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ; GFX7-LABEL: name: s_buffer_load_v2i32 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 ; GFX8-LABEL: name: s_buffer_load_v2i32 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec - ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX2_SGPR:%[0-9]+]]:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s64), align 4) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX2_SGPR]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec + ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 %val = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <2 x i32> %val } @@ -165,85 +174,79 @@ define amdgpu_ps <2 x i32> @s_buffer_load_v2i32(<4 x i32> inreg %rsrc, i32 inreg define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; GFX6-LABEL: name: s_buffer_load_v3i32 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) - ; GFX6: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 - ; GFX6: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]] - ; GFX6: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2 - ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0 - ; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1 - ; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; GFX6: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX6-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 ; GFX7-LABEL: name: s_buffer_load_v3i32 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) - ; GFX7: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 - ; GFX7: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]] - ; GFX7: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2 - ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0 - ; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1 - ; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; GFX7: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX7-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 ; GFX8-LABEL: name: s_buffer_load_v3i32 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) - ; GFX8: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_512 = REG_SEQUENCE [[S_BUFFER_LOAD_DWORDX4_SGPR]], %subreg.sub0_sub1_sub2_sub3, [[DEF]], %subreg.sub4_sub5_sub6_sub7, [[DEF]], %subreg.sub8_sub9_sub10_sub11 - ; GFX8: [[COPY5:%[0-9]+]]:sgpr_512_with_sub0_sub1_sub2 = COPY [[REG_SEQUENCE1]] - ; GFX8: [[COPY6:%[0-9]+]]:sgpr_96 = COPY [[COPY5]].sub0_sub1_sub2 - ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub0 - ; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub1 - ; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[COPY6]].sub2 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec - ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec - ; GFX8: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX4_SGPR:%[0-9]+]]:sgpr_128 = S_BUFFER_LOAD_DWORDX4_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s96), align 4) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX4_SGPR]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec + ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec + ; GFX8-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2 %val = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x i32> %val } @@ -251,133 +254,136 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; GFX6-LABEL: name: s_buffer_load_v8i32 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; GFX6: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec - ; GFX6: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX6: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec - ; GFX6: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX6: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec - ; GFX6: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX6: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec - ; GFX6: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX6: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec - ; GFX6: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec + ; GFX6-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec + ; GFX6-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec + ; GFX6-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec + ; GFX6-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec + ; GFX6-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec + ; GFX6-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 ; GFX7-LABEL: name: s_buffer_load_v8i32 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; GFX7: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec - ; GFX7: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX7: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec - ; GFX7: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX7: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec - ; GFX7: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX7: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec - ; GFX7: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX7: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec - ; GFX7: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec + ; GFX7-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec + ; GFX7-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec + ; GFX7-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec + ; GFX7-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec + ; GFX7-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec + ; GFX7-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 ; GFX8-LABEL: name: s_buffer_load_v8i32 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec - ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec - ; GFX8: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec - ; GFX8: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX8: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec - ; GFX8: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX8: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec - ; GFX8: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX8: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec - ; GFX8: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX8: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec - ; GFX8: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX8_SGPR:%[0-9]+]]:sgpr_256 = S_BUFFER_LOAD_DWORDX8_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s256), align 4) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX8_SGPR]].sub7 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY13]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY14]], implicit $exec + ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY15]], implicit $exec + ; GFX8-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY16]], implicit $exec + ; GFX8-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY17]], implicit $exec + ; GFX8-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec + ; GFX8-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec + ; GFX8-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY20]], implicit $exec + ; GFX8-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7 %val = call <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x i32> %val } @@ -385,229 +391,232 @@ define amdgpu_ps <8 x i32> @s_buffer_load_v8i32(<4 x i32> inreg %rsrc, i32 inreg define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inreg %soffset) { ; GFX6-LABEL: name: s_buffer_load_v16i32 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 - ; GFX6: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 - ; GFX6: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 - ; GFX6: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 - ; GFX6: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 - ; GFX6: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 - ; GFX6: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 - ; GFX6: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 - ; GFX6: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 - ; GFX6: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec - ; GFX6: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX6: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec - ; GFX6: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX6: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec - ; GFX6: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX6: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX6: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec - ; GFX6: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX6: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX6: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec - ; GFX6: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX6: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX6: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec - ; GFX6: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX6: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX6: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec - ; GFX6: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX6: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] - ; GFX6: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec - ; GFX6: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] - ; GFX6: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] - ; GFX6: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec - ; GFX6: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] - ; GFX6: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] - ; GFX6: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec - ; GFX6: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] - ; GFX6: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] - ; GFX6: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec - ; GFX6: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] - ; GFX6: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] - ; GFX6: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec - ; GFX6: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] - ; GFX6: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] - ; GFX6: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec - ; GFX6: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] - ; GFX6: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] - ; GFX6: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec - ; GFX6: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] - ; GFX6: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] - ; GFX6: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec - ; GFX6: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 + ; GFX6-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec + ; GFX6-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX6-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec + ; GFX6-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX6-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec + ; GFX6-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] + ; GFX6-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec + ; GFX6-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] + ; GFX6-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec + ; GFX6-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] + ; GFX6-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec + ; GFX6-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] + ; GFX6-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec + ; GFX6-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX6-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec + ; GFX6-NEXT: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] + ; GFX6-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec + ; GFX6-NEXT: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] + ; GFX6-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec + ; GFX6-NEXT: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] + ; GFX6-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec + ; GFX6-NEXT: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] + ; GFX6-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec + ; GFX6-NEXT: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] + ; GFX6-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec + ; GFX6-NEXT: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] + ; GFX6-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec + ; GFX6-NEXT: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] + ; GFX6-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec + ; GFX6-NEXT: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; GFX7-LABEL: name: s_buffer_load_v16i32 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 - ; GFX7: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 - ; GFX7: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 - ; GFX7: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 - ; GFX7: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 - ; GFX7: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 - ; GFX7: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 - ; GFX7: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 - ; GFX7: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 - ; GFX7: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec - ; GFX7: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX7: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec - ; GFX7: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX7: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec - ; GFX7: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX7: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX7: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec - ; GFX7: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX7: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX7: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec - ; GFX7: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX7: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX7: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec - ; GFX7: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX7: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX7: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec - ; GFX7: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX7: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] - ; GFX7: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec - ; GFX7: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] - ; GFX7: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] - ; GFX7: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec - ; GFX7: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] - ; GFX7: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] - ; GFX7: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec - ; GFX7: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] - ; GFX7: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] - ; GFX7: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec - ; GFX7: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] - ; GFX7: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] - ; GFX7: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec - ; GFX7: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] - ; GFX7: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] - ; GFX7: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec - ; GFX7: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] - ; GFX7: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] - ; GFX7: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec - ; GFX7: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] - ; GFX7: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] - ; GFX7: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec - ; GFX7: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 + ; GFX7-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec + ; GFX7-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX7-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec + ; GFX7-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX7-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec + ; GFX7-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] + ; GFX7-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec + ; GFX7-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] + ; GFX7-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec + ; GFX7-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] + ; GFX7-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec + ; GFX7-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] + ; GFX7-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec + ; GFX7-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX7-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec + ; GFX7-NEXT: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] + ; GFX7-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec + ; GFX7-NEXT: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] + ; GFX7-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec + ; GFX7-NEXT: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] + ; GFX7-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec + ; GFX7-NEXT: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] + ; GFX7-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec + ; GFX7-NEXT: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] + ; GFX7-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec + ; GFX7-NEXT: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] + ; GFX7-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec + ; GFX7-NEXT: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] + ; GFX7-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec + ; GFX7-NEXT: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 ; GFX8-LABEL: name: s_buffer_load_v16i32 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 - ; GFX8: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 - ; GFX8: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 - ; GFX8: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 - ; GFX8: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 - ; GFX8: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 - ; GFX8: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 - ; GFX8: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 - ; GFX8: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 - ; GFX8: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec - ; GFX8: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] - ; GFX8: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec - ; GFX8: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] - ; GFX8: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec - ; GFX8: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] - ; GFX8: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] - ; GFX8: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec - ; GFX8: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] - ; GFX8: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] - ; GFX8: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec - ; GFX8: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] - ; GFX8: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] - ; GFX8: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec - ; GFX8: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] - ; GFX8: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] - ; GFX8: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec - ; GFX8: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] - ; GFX8: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] - ; GFX8: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec - ; GFX8: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] - ; GFX8: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] - ; GFX8: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec - ; GFX8: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] - ; GFX8: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] - ; GFX8: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec - ; GFX8: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] - ; GFX8: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] - ; GFX8: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec - ; GFX8: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] - ; GFX8: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] - ; GFX8: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec - ; GFX8: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] - ; GFX8: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] - ; GFX8: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec - ; GFX8: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] - ; GFX8: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] - ; GFX8: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec - ; GFX8: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] - ; GFX8: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] - ; GFX8: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec - ; GFX8: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORDX16_SGPR:%[0-9]+]]:sgpr_512 = S_BUFFER_LOAD_DWORDX16_SGPR [[REG_SEQUENCE]], [[COPY4]], 0 :: (dereferenceable invariant load (s512), align 4) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub7 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub8 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub9 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub10 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub11 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub12 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub13 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub14 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:sreg_32 = COPY [[S_BUFFER_LOAD_DWORDX16_SGPR]].sub15 + ; GFX8-NEXT: [[COPY21:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY21]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: [[COPY22:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY22]], implicit $exec + ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] + ; GFX8-NEXT: [[COPY23:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY23]], implicit $exec + ; GFX8-NEXT: $sgpr2 = COPY [[V_READFIRSTLANE_B32_2]] + ; GFX8-NEXT: [[COPY24:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY24]], implicit $exec + ; GFX8-NEXT: $sgpr3 = COPY [[V_READFIRSTLANE_B32_3]] + ; GFX8-NEXT: [[COPY25:%[0-9]+]]:vgpr_32 = COPY [[COPY9]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY25]], implicit $exec + ; GFX8-NEXT: $sgpr4 = COPY [[V_READFIRSTLANE_B32_4]] + ; GFX8-NEXT: [[COPY26:%[0-9]+]]:vgpr_32 = COPY [[COPY10]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY26]], implicit $exec + ; GFX8-NEXT: $sgpr5 = COPY [[V_READFIRSTLANE_B32_5]] + ; GFX8-NEXT: [[COPY27:%[0-9]+]]:vgpr_32 = COPY [[COPY11]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY27]], implicit $exec + ; GFX8-NEXT: $sgpr6 = COPY [[V_READFIRSTLANE_B32_6]] + ; GFX8-NEXT: [[COPY28:%[0-9]+]]:vgpr_32 = COPY [[COPY12]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY28]], implicit $exec + ; GFX8-NEXT: $sgpr7 = COPY [[V_READFIRSTLANE_B32_7]] + ; GFX8-NEXT: [[COPY29:%[0-9]+]]:vgpr_32 = COPY [[COPY13]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY29]], implicit $exec + ; GFX8-NEXT: $sgpr8 = COPY [[V_READFIRSTLANE_B32_8]] + ; GFX8-NEXT: [[COPY30:%[0-9]+]]:vgpr_32 = COPY [[COPY14]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY30]], implicit $exec + ; GFX8-NEXT: $sgpr9 = COPY [[V_READFIRSTLANE_B32_9]] + ; GFX8-NEXT: [[COPY31:%[0-9]+]]:vgpr_32 = COPY [[COPY15]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY31]], implicit $exec + ; GFX8-NEXT: $sgpr10 = COPY [[V_READFIRSTLANE_B32_10]] + ; GFX8-NEXT: [[COPY32:%[0-9]+]]:vgpr_32 = COPY [[COPY16]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY32]], implicit $exec + ; GFX8-NEXT: $sgpr11 = COPY [[V_READFIRSTLANE_B32_11]] + ; GFX8-NEXT: [[COPY33:%[0-9]+]]:vgpr_32 = COPY [[COPY17]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_12:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY33]], implicit $exec + ; GFX8-NEXT: $sgpr12 = COPY [[V_READFIRSTLANE_B32_12]] + ; GFX8-NEXT: [[COPY34:%[0-9]+]]:vgpr_32 = COPY [[COPY18]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_13:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY34]], implicit $exec + ; GFX8-NEXT: $sgpr13 = COPY [[V_READFIRSTLANE_B32_13]] + ; GFX8-NEXT: [[COPY35:%[0-9]+]]:vgpr_32 = COPY [[COPY19]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_14:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY35]], implicit $exec + ; GFX8-NEXT: $sgpr14 = COPY [[V_READFIRSTLANE_B32_14]] + ; GFX8-NEXT: [[COPY36:%[0-9]+]]:vgpr_32 = COPY [[COPY20]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_15:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY36]], implicit $exec + ; GFX8-NEXT: $sgpr15 = COPY [[V_READFIRSTLANE_B32_15]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1, implicit $sgpr2, implicit $sgpr3, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15 %val = call <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x i32> %val } @@ -615,45 +624,48 @@ define amdgpu_ps <16 x i32> @s_buffer_load_v16i32(<4 x i32> inreg %rsrc, i32 inr define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_1 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1, i32 0) ret i32 %val } @@ -661,43 +673,46 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1(<4 x i32> inreg %rsrc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_glc_4 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_4 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1, 1 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_4 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 4, 1 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 4, i32 1) ret i32 %val } @@ -705,45 +720,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_4(<4 x i32> inreg %rsrc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_255 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_255 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 255 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_255 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 255, i32 0) ret i32 %val } @@ -751,43 +769,46 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_255(<4 x i32> inreg %rsrc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_256 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_256 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 64, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_256 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 256, i32 0) ret i32 %val } @@ -795,43 +816,46 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_256(<4 x i32> inreg %rsrc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_1020 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1020 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 255, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1020 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1020, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1020, i32 0) ret i32 %val } @@ -839,45 +863,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1020(<4 x i32> inreg %rsrc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_1023 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1023 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1023 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1023 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1023, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1023, i32 0) ret i32 %val } @@ -885,44 +912,47 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1023(<4 x i32> inreg %rsrc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_1024 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1024 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 256, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1024 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1024, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1024, i32 0) ret i32 %val } @@ -930,45 +960,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1024(<4 x i32> inreg %rsrc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_1025 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_1025 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1025 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_1025 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 1025, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %rsrc, i32 1025, i32 0) ret i32 %val } @@ -976,46 +1009,49 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_1025(<4 x i32> inreg %rsrc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg1 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg1 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX7: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg1 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1, i32 0) ret i32 %load } @@ -1023,45 +1059,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg1(<4 x i32> inreg %desc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg4 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg4 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741823, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741823, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg4 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -4 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -4, i32 0) ret i32 %load } @@ -1069,45 +1108,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg4(<4 x i32> inreg %desc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg8 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg8 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741822, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073741822, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg8 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -8, i32 0) ret i32 %load } @@ -1115,45 +1157,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg8(<4 x i32> inreg %desc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit31 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit31 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 536870912, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 536870912, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit31 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -2147483648, i32 0) ret i32 %load } @@ -1161,45 +1206,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit31(<4 x i32> inreg %desc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_glc_bit30 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_glc_bit30 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 268435456, 1 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 268435456, 1 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_glc_bit30 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1073741824 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 1 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1073741824, i32 1) ret i32 %load } @@ -1207,45 +1255,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_glc_bit30(<4 x i32> inreg %desc) define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit29 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit29 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 134217728, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 134217728, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit29 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 536870912 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 536870912, i32 0) ret i32 %load } @@ -1253,45 +1304,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit29(<4 x i32> inreg %desc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit21 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit21 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit21 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2097152 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 2097152, i32 0) ret i32 %load } @@ -1299,45 +1353,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit21(<4 x i32> inreg %desc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit20 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit20 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 262144, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 262144, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit20 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 1048576, i32 0) ret i32 %load } @@ -1345,45 +1402,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit20(<4 x i32> inreg %desc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg_bit20 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit20 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073479680, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073479680, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit20 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1048576 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -1048576, i32 0) ret i32 %load } @@ -1391,44 +1451,47 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit20(<4 x i32> inreg %desc) define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_bit19 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 524288 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 524288 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_bit19 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 131072, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 131072, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_bit19 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM [[REG_SEQUENCE]], 524288, 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 524288, i32 0) ret i32 %load } @@ -1436,45 +1499,48 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_bit19(<4 x i32> inreg %desc) { define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc) { ; GFX6-LABEL: name: s_buffer_load_i32_offset_neg_bit19 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288 - ; GFX6: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX6: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288 + ; GFX6-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX6-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX7-LABEL: name: s_buffer_load_i32_offset_neg_bit19 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073610752, 0 :: (dereferenceable invariant load (s32)) - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX7: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_BUFFER_LOAD_DWORD_IMM_ci:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM_ci [[REG_SEQUENCE]], 1073610752, 0 :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_IMM_ci]] + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX7-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 ; GFX8-LABEL: name: s_buffer_load_i32_offset_neg_bit19 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288 - ; GFX8: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec - ; GFX8: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $sgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -524288 + ; GFX8-NEXT: [[S_BUFFER_LOAD_DWORD_SGPR:%[0-9]+]]:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR [[REG_SEQUENCE]], [[S_MOV_B32_]], 0 :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_BUFFER_LOAD_DWORD_SGPR]] + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec + ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 %load = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 -524288, i32 0) ret i32 %load } @@ -1483,43 +1549,46 @@ define amdgpu_ps i32 @s_buffer_load_i32_offset_neg_bit19(<4 x i32> inreg %desc) define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val } @@ -1527,52 +1596,55 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; GFX6-LABEL: name: s_buffer_load_v2f32_vgpr_offset ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; GFX7-LABEL: name: s_buffer_load_v2f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 ; GFX8-LABEL: name: s_buffer_load_v2f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX2_OFFEN:%[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s64), align 4) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX2_OFFEN]].sub1 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1 %val = call <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <2 x float> %val } @@ -1580,73 +1652,64 @@ define amdgpu_ps <2 x float> @s_buffer_load_v2f32_vgpr_offset(<4 x i32> inreg %r define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; GFX6-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX6: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]] - ; GFX6: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]] - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11 - ; GFX6: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2 - ; GFX6: $vgpr0 = COPY [[COPY8]] - ; GFX6: $vgpr1 = COPY [[COPY9]] - ; GFX6: $vgpr2 = COPY [[COPY10]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX7-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX7: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]] - ; GFX7: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]] - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11 - ; GFX7: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2 - ; GFX7: $vgpr0 = COPY [[COPY8]] - ; GFX7: $vgpr1 = COPY [[COPY9]] - ; GFX7: $vgpr2 = COPY [[COPY10]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; GFX8-LABEL: name: s_buffer_load_v3f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[DEF:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF - ; GFX8: [[COPY5:%[0-9]+]]:vreg_128 = COPY [[DEF]] - ; GFX8: [[COPY6:%[0-9]+]]:vreg_128 = COPY [[DEF]] - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[COPY5]], %subreg.sub4_sub5_sub6_sub7, [[COPY6]], %subreg.sub8_sub9_sub10_sub11 - ; GFX8: [[COPY7:%[0-9]+]]:vreg_96 = COPY [[REG_SEQUENCE1]].sub0_sub1_sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub0 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub1 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY7]].sub2 - ; GFX8: $vgpr0 = COPY [[COPY8]] - ; GFX8: $vgpr1 = COPY [[COPY9]] - ; GFX8: $vgpr2 = COPY [[COPY10]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %val = call <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <3 x float> %val } @@ -1654,64 +1717,67 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; GFX6-LABEL: name: s_buffer_load_v4f32_vgpr_offset ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: $vgpr2 = COPY [[COPY7]] - ; GFX6: $vgpr3 = COPY [[COPY8]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX7-LABEL: name: s_buffer_load_v4f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: $vgpr2 = COPY [[COPY7]] - ; GFX7: $vgpr3 = COPY [[COPY8]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; GFX8-LABEL: name: s_buffer_load_v4f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: $vgpr2 = COPY [[COPY7]] - ; GFX8: $vgpr3 = COPY [[COPY8]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub3 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %val = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <4 x float> %val } @@ -1719,94 +1785,97 @@ define amdgpu_ps <4 x float> @s_buffer_load_v4f32_vgpr_offset(<4 x i32> inreg %r define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: $vgpr2 = COPY [[COPY7]] - ; GFX6: $vgpr3 = COPY [[COPY8]] - ; GFX6: $vgpr4 = COPY [[COPY9]] - ; GFX6: $vgpr5 = COPY [[COPY10]] - ; GFX6: $vgpr6 = COPY [[COPY11]] - ; GFX6: $vgpr7 = COPY [[COPY12]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: $vgpr2 = COPY [[COPY7]] - ; GFX7: $vgpr3 = COPY [[COPY8]] - ; GFX7: $vgpr4 = COPY [[COPY9]] - ; GFX7: $vgpr5 = COPY [[COPY10]] - ; GFX7: $vgpr6 = COPY [[COPY11]] - ; GFX7: $vgpr7 = COPY [[COPY12]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: $vgpr2 = COPY [[COPY7]] - ; GFX8: $vgpr3 = COPY [[COPY8]] - ; GFX8: $vgpr4 = COPY [[COPY9]] - ; GFX8: $vgpr5 = COPY [[COPY10]] - ; GFX8: $vgpr6 = COPY [[COPY11]] - ; GFX8: $vgpr7 = COPY [[COPY12]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val } @@ -1814,148 +1883,151 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset(<4 x i32> inreg %r define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg %rsrc, i32 %soffset) { ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: $vgpr2 = COPY [[COPY7]] - ; GFX6: $vgpr3 = COPY [[COPY8]] - ; GFX6: $vgpr4 = COPY [[COPY9]] - ; GFX6: $vgpr5 = COPY [[COPY10]] - ; GFX6: $vgpr6 = COPY [[COPY11]] - ; GFX6: $vgpr7 = COPY [[COPY12]] - ; GFX6: $vgpr8 = COPY [[COPY13]] - ; GFX6: $vgpr9 = COPY [[COPY14]] - ; GFX6: $vgpr10 = COPY [[COPY15]] - ; GFX6: $vgpr11 = COPY [[COPY16]] - ; GFX6: $vgpr12 = COPY [[COPY17]] - ; GFX6: $vgpr13 = COPY [[COPY18]] - ; GFX6: $vgpr14 = COPY [[COPY19]] - ; GFX6: $vgpr15 = COPY [[COPY20]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX6-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX6-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX6-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: $vgpr2 = COPY [[COPY7]] - ; GFX7: $vgpr3 = COPY [[COPY8]] - ; GFX7: $vgpr4 = COPY [[COPY9]] - ; GFX7: $vgpr5 = COPY [[COPY10]] - ; GFX7: $vgpr6 = COPY [[COPY11]] - ; GFX7: $vgpr7 = COPY [[COPY12]] - ; GFX7: $vgpr8 = COPY [[COPY13]] - ; GFX7: $vgpr9 = COPY [[COPY14]] - ; GFX7: $vgpr10 = COPY [[COPY15]] - ; GFX7: $vgpr11 = COPY [[COPY16]] - ; GFX7: $vgpr12 = COPY [[COPY17]] - ; GFX7: $vgpr13 = COPY [[COPY18]] - ; GFX7: $vgpr14 = COPY [[COPY19]] - ; GFX7: $vgpr15 = COPY [[COPY20]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX7-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX7-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX7-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: $vgpr2 = COPY [[COPY7]] - ; GFX8: $vgpr3 = COPY [[COPY8]] - ; GFX8: $vgpr4 = COPY [[COPY9]] - ; GFX8: $vgpr5 = COPY [[COPY10]] - ; GFX8: $vgpr6 = COPY [[COPY11]] - ; GFX8: $vgpr7 = COPY [[COPY12]] - ; GFX8: $vgpr8 = COPY [[COPY13]] - ; GFX8: $vgpr9 = COPY [[COPY14]] - ; GFX8: $vgpr10 = COPY [[COPY15]] - ; GFX8: $vgpr11 = COPY [[COPY16]] - ; GFX8: $vgpr12 = COPY [[COPY17]] - ; GFX8: $vgpr13 = COPY [[COPY18]] - ; GFX8: $vgpr14 = COPY [[COPY19]] - ; GFX8: $vgpr15 = COPY [[COPY20]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX8-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX8-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX8-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x float> %val } @@ -1963,43 +2035,46 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset(<4 x i32> inreg define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4092 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -2008,43 +2083,46 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4092(<4 x i32> inreg % define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4095 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4095 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -2053,43 +2131,46 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4095(<4 x i32> inreg % define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_offset_add_4096 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -2099,94 +2180,97 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_offset_add_4096(<4 x i32> inreg % define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: $vgpr2 = COPY [[COPY7]] - ; GFX6: $vgpr3 = COPY [[COPY8]] - ; GFX6: $vgpr4 = COPY [[COPY9]] - ; GFX6: $vgpr5 = COPY [[COPY10]] - ; GFX6: $vgpr6 = COPY [[COPY11]] - ; GFX6: $vgpr7 = COPY [[COPY12]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: $vgpr2 = COPY [[COPY7]] - ; GFX7: $vgpr3 = COPY [[COPY8]] - ; GFX7: $vgpr4 = COPY [[COPY9]] - ; GFX7: $vgpr5 = COPY [[COPY10]] - ; GFX7: $vgpr6 = COPY [[COPY11]] - ; GFX7: $vgpr7 = COPY [[COPY12]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4064 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: $vgpr2 = COPY [[COPY7]] - ; GFX8: $vgpr3 = COPY [[COPY8]] - ; GFX8: $vgpr4 = COPY [[COPY9]] - ; GFX8: $vgpr5 = COPY [[COPY10]] - ; GFX8: $vgpr6 = COPY [[COPY11]] - ; GFX8: $vgpr7 = COPY [[COPY12]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -2196,94 +2280,97 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4064(<4 x i32> define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: $vgpr2 = COPY [[COPY7]] - ; GFX6: $vgpr3 = COPY [[COPY8]] - ; GFX6: $vgpr4 = COPY [[COPY9]] - ; GFX6: $vgpr5 = COPY [[COPY10]] - ; GFX6: $vgpr6 = COPY [[COPY11]] - ; GFX6: $vgpr7 = COPY [[COPY12]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: $vgpr2 = COPY [[COPY7]] - ; GFX7: $vgpr3 = COPY [[COPY8]] - ; GFX7: $vgpr4 = COPY [[COPY9]] - ; GFX7: $vgpr5 = COPY [[COPY10]] - ; GFX7: $vgpr6 = COPY [[COPY11]] - ; GFX7: $vgpr7 = COPY [[COPY12]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_add_4068 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: $vgpr2 = COPY [[COPY7]] - ; GFX8: $vgpr3 = COPY [[COPY8]] - ; GFX8: $vgpr4 = COPY [[COPY9]] - ; GFX8: $vgpr5 = COPY [[COPY10]] - ; GFX8: $vgpr6 = COPY [[COPY11]] - ; GFX8: $vgpr7 = COPY [[COPY12]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -2292,148 +2379,151 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_add_4068(<4 x i32> define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: $vgpr2 = COPY [[COPY7]] - ; GFX6: $vgpr3 = COPY [[COPY8]] - ; GFX6: $vgpr4 = COPY [[COPY9]] - ; GFX6: $vgpr5 = COPY [[COPY10]] - ; GFX6: $vgpr6 = COPY [[COPY11]] - ; GFX6: $vgpr7 = COPY [[COPY12]] - ; GFX6: $vgpr8 = COPY [[COPY13]] - ; GFX6: $vgpr9 = COPY [[COPY14]] - ; GFX6: $vgpr10 = COPY [[COPY15]] - ; GFX6: $vgpr11 = COPY [[COPY16]] - ; GFX6: $vgpr12 = COPY [[COPY17]] - ; GFX6: $vgpr13 = COPY [[COPY18]] - ; GFX6: $vgpr14 = COPY [[COPY19]] - ; GFX6: $vgpr15 = COPY [[COPY20]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX6-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX6-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX6-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: $vgpr2 = COPY [[COPY7]] - ; GFX7: $vgpr3 = COPY [[COPY8]] - ; GFX7: $vgpr4 = COPY [[COPY9]] - ; GFX7: $vgpr5 = COPY [[COPY10]] - ; GFX7: $vgpr6 = COPY [[COPY11]] - ; GFX7: $vgpr7 = COPY [[COPY12]] - ; GFX7: $vgpr8 = COPY [[COPY13]] - ; GFX7: $vgpr9 = COPY [[COPY14]] - ; GFX7: $vgpr10 = COPY [[COPY15]] - ; GFX7: $vgpr11 = COPY [[COPY16]] - ; GFX7: $vgpr12 = COPY [[COPY17]] - ; GFX7: $vgpr13 = COPY [[COPY18]] - ; GFX7: $vgpr14 = COPY [[COPY19]] - ; GFX7: $vgpr15 = COPY [[COPY20]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX7-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX7-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX7-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4032 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: $vgpr2 = COPY [[COPY7]] - ; GFX8: $vgpr3 = COPY [[COPY8]] - ; GFX8: $vgpr4 = COPY [[COPY9]] - ; GFX8: $vgpr5 = COPY [[COPY10]] - ; GFX8: $vgpr6 = COPY [[COPY11]] - ; GFX8: $vgpr7 = COPY [[COPY12]] - ; GFX8: $vgpr8 = COPY [[COPY13]] - ; GFX8: $vgpr9 = COPY [[COPY14]] - ; GFX8: $vgpr10 = COPY [[COPY15]] - ; GFX8: $vgpr11 = COPY [[COPY16]] - ; GFX8: $vgpr12 = COPY [[COPY17]] - ; GFX8: $vgpr13 = COPY [[COPY18]] - ; GFX8: $vgpr14 = COPY [[COPY19]] - ; GFX8: $vgpr15 = COPY [[COPY20]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX8-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX8-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX8-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %soffset = add i32 %soffset.base, 4032 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x float> %val @@ -2442,148 +2532,151 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4032(<4 x i3 define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i32> inreg %rsrc, i32 %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX6: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX6: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX6: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX6: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX6: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX6: $vgpr0 = COPY [[COPY5]] - ; GFX6: $vgpr1 = COPY [[COPY6]] - ; GFX6: $vgpr2 = COPY [[COPY7]] - ; GFX6: $vgpr3 = COPY [[COPY8]] - ; GFX6: $vgpr4 = COPY [[COPY9]] - ; GFX6: $vgpr5 = COPY [[COPY10]] - ; GFX6: $vgpr6 = COPY [[COPY11]] - ; GFX6: $vgpr7 = COPY [[COPY12]] - ; GFX6: $vgpr8 = COPY [[COPY13]] - ; GFX6: $vgpr9 = COPY [[COPY14]] - ; GFX6: $vgpr10 = COPY [[COPY15]] - ; GFX6: $vgpr11 = COPY [[COPY16]] - ; GFX6: $vgpr12 = COPY [[COPY17]] - ; GFX6: $vgpr13 = COPY [[COPY18]] - ; GFX6: $vgpr14 = COPY [[COPY19]] - ; GFX6: $vgpr15 = COPY [[COPY20]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX6-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX6-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX6-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX6-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX6-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX6-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX6-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX6-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX6-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX6-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX7-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX7: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX7: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX7: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX7: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX7: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX7: $vgpr0 = COPY [[COPY5]] - ; GFX7: $vgpr1 = COPY [[COPY6]] - ; GFX7: $vgpr2 = COPY [[COPY7]] - ; GFX7: $vgpr3 = COPY [[COPY8]] - ; GFX7: $vgpr4 = COPY [[COPY9]] - ; GFX7: $vgpr5 = COPY [[COPY10]] - ; GFX7: $vgpr6 = COPY [[COPY11]] - ; GFX7: $vgpr7 = COPY [[COPY12]] - ; GFX7: $vgpr8 = COPY [[COPY13]] - ; GFX7: $vgpr9 = COPY [[COPY14]] - ; GFX7: $vgpr10 = COPY [[COPY15]] - ; GFX7: $vgpr11 = COPY [[COPY16]] - ; GFX7: $vgpr12 = COPY [[COPY17]] - ; GFX7: $vgpr13 = COPY [[COPY18]] - ; GFX7: $vgpr14 = COPY [[COPY19]] - ; GFX7: $vgpr15 = COPY [[COPY20]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4036 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 32, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 48, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX7-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX7-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX7-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX7-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX7-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX7-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX7-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX7-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX7-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX7-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 ; GFX8-LABEL: name: s_buffer_load_v16f32_vgpr_offset_add_4036 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 - ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 - ; GFX8: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 - ; GFX8: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 - ; GFX8: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 - ; GFX8: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 - ; GFX8: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 - ; GFX8: $vgpr0 = COPY [[COPY5]] - ; GFX8: $vgpr1 = COPY [[COPY6]] - ; GFX8: $vgpr2 = COPY [[COPY7]] - ; GFX8: $vgpr3 = COPY [[COPY8]] - ; GFX8: $vgpr4 = COPY [[COPY9]] - ; GFX8: $vgpr5 = COPY [[COPY10]] - ; GFX8: $vgpr6 = COPY [[COPY11]] - ; GFX8: $vgpr7 = COPY [[COPY12]] - ; GFX8: $vgpr8 = COPY [[COPY13]] - ; GFX8: $vgpr9 = COPY [[COPY14]] - ; GFX8: $vgpr10 = COPY [[COPY15]] - ; GFX8: $vgpr11 = COPY [[COPY16]] - ; GFX8: $vgpr12 = COPY [[COPY17]] - ; GFX8: $vgpr13 = COPY [[COPY18]] - ; GFX8: $vgpr14 = COPY [[COPY19]] - ; GFX8: $vgpr15 = COPY [[COPY20]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4032, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4048, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 16, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4) + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_512 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7, [[BUFFER_LOAD_DWORDX4_OFFEN2]], %subreg.sub8_sub9_sub10_sub11, [[BUFFER_LOAD_DWORDX4_OFFEN3]], %subreg.sub12_sub13_sub14_sub15 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub2 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub3 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub4 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub5 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub6 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub7 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub8 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub9 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub10 + ; GFX8-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub11 + ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub12 + ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub13 + ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub14 + ; GFX8-NEXT: [[COPY20:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub15 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY5]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr8 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr9 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr10 = COPY [[COPY15]] + ; GFX8-NEXT: $vgpr11 = COPY [[COPY16]] + ; GFX8-NEXT: $vgpr12 = COPY [[COPY17]] + ; GFX8-NEXT: $vgpr13 = COPY [[COPY18]] + ; GFX8-NEXT: $vgpr14 = COPY [[COPY19]] + ; GFX8-NEXT: $vgpr15 = COPY [[COPY20]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15 %soffset = add i32 %soffset.base, 4036 %val = call <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <16 x float> %val @@ -2593,109 +2686,124 @@ define amdgpu_ps <16 x float> @s_buffer_load_v16f32_vgpr_offset_add_4036(<4 x i3 define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg %soffset) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY4]] + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val } @@ -2704,103 +2812,118 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc(<4 x i32> %rsrc, i32 inreg % define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4092 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4092, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4092 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -2810,115 +2933,130 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4092(<4 x i32> % define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_soffset_add_4096 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %soffset = add i32 %soffset.base, 4096 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret float %val @@ -2928,103 +3066,118 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_soffset_add_4096(<4 x i32> % define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4095 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4095, align 1) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4095, i32 0) ret float %val } @@ -3033,107 +3186,122 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4095(<4 x i32> %rsrc) define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) { ; GFX6-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_vgpr_rsrc_offset_4096 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; GFX8: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4096) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4095, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32) from unknown-address + 4096) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 4096, i32 0) ret float %val } @@ -3143,154 +3311,169 @@ define amdgpu_ps float @s_buffer_load_f32_vgpr_rsrc_offset_4096(<4 x i32> %rsrc) define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY7]] - ; GFX6: $vgpr1 = COPY [[COPY8]] - ; GFX6: $vgpr2 = COPY [[COPY9]] - ; GFX6: $vgpr3 = COPY [[COPY10]] - ; GFX6: $vgpr4 = COPY [[COPY11]] - ; GFX6: $vgpr5 = COPY [[COPY12]] - ; GFX6: $vgpr6 = COPY [[COPY13]] - ; GFX6: $vgpr7 = COPY [[COPY14]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY7]] - ; GFX7: $vgpr1 = COPY [[COPY8]] - ; GFX7: $vgpr2 = COPY [[COPY9]] - ; GFX7: $vgpr3 = COPY [[COPY10]] - ; GFX7: $vgpr4 = COPY [[COPY11]] - ; GFX7: $vgpr5 = COPY [[COPY12]] - ; GFX7: $vgpr6 = COPY [[COPY13]] - ; GFX7: $vgpr7 = COPY [[COPY14]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4064 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY7]] - ; GFX8: $vgpr1 = COPY [[COPY8]] - ; GFX8: $vgpr2 = COPY [[COPY9]] - ; GFX8: $vgpr3 = COPY [[COPY10]] - ; GFX8: $vgpr4 = COPY [[COPY11]] - ; GFX8: $vgpr5 = COPY [[COPY12]] - ; GFX8: $vgpr6 = COPY [[COPY13]] - ; GFX8: $vgpr7 = COPY [[COPY14]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[COPY4]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4064 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -3301,166 +3484,181 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4064(<4 x i32> % define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY8]] - ; GFX6: $vgpr1 = COPY [[COPY9]] - ; GFX6: $vgpr2 = COPY [[COPY10]] - ; GFX6: $vgpr3 = COPY [[COPY11]] - ; GFX6: $vgpr4 = COPY [[COPY12]] - ; GFX6: $vgpr5 = COPY [[COPY13]] - ; GFX6: $vgpr6 = COPY [[COPY14]] - ; GFX6: $vgpr7 = COPY [[COPY15]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY8]] - ; GFX7: $vgpr1 = COPY [[COPY9]] - ; GFX7: $vgpr2 = COPY [[COPY10]] - ; GFX7: $vgpr3 = COPY [[COPY11]] - ; GFX7: $vgpr4 = COPY [[COPY12]] - ; GFX7: $vgpr5 = COPY [[COPY13]] - ; GFX7: $vgpr6 = COPY [[COPY14]] - ; GFX7: $vgpr7 = COPY [[COPY15]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 + ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4068 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 - ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY8]] - ; GFX8: $vgpr1 = COPY [[COPY9]] - ; GFX8: $vgpr2 = COPY [[COPY10]] - ; GFX8: $vgpr3 = COPY [[COPY11]] - ; GFX8: $vgpr4 = COPY [[COPY12]] - ; GFX8: $vgpr5 = COPY [[COPY13]] - ; GFX8: $vgpr6 = COPY [[COPY14]] - ; GFX8: $vgpr7 = COPY [[COPY15]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4068 + ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4068 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -3469,166 +3667,181 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4068(<4 x i32> % define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> %rsrc, i32 inreg %soffset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY8]] - ; GFX6: $vgpr1 = COPY [[COPY9]] - ; GFX6: $vgpr2 = COPY [[COPY10]] - ; GFX6: $vgpr3 = COPY [[COPY11]] - ; GFX6: $vgpr4 = COPY [[COPY12]] - ; GFX6: $vgpr5 = COPY [[COPY13]] - ; GFX6: $vgpr6 = COPY [[COPY14]] - ; GFX6: $vgpr7 = COPY [[COPY15]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX6-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY8]] - ; GFX7: $vgpr1 = COPY [[COPY9]] - ; GFX7: $vgpr2 = COPY [[COPY10]] - ; GFX7: $vgpr3 = COPY [[COPY11]] - ; GFX7: $vgpr4 = COPY [[COPY12]] - ; GFX7: $vgpr5 = COPY [[COPY13]] - ; GFX7: $vgpr6 = COPY [[COPY14]] - ; GFX7: $vgpr7 = COPY [[COPY15]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX7-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_rsrc_add_4096 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 - ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] - ; GFX8: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY8]] - ; GFX8: $vgpr1 = COPY [[COPY9]] - ; GFX8: $vgpr2 = COPY [[COPY10]] - ; GFX8: $vgpr3 = COPY [[COPY11]] - ; GFX8: $vgpr4 = COPY [[COPY12]] - ; GFX8: $vgpr5 = COPY [[COPY13]] - ; GFX8: $vgpr6 = COPY [[COPY14]] - ; GFX8: $vgpr7 = COPY [[COPY15]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $sgpr2, $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096 + ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY4]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_]] + ; GFX8-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY7]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY5]], [[REG_SEQUENCE3]], [[S_MOV_B32_1]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY14]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY15]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %soffset.base, 4096 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -3637,157 +3850,172 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_rsrc_add_4096(<4 x i32> % define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000(<4 x i32> %rsrc, i32 %offset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY7]] - ; GFX6: $vgpr1 = COPY [[COPY8]] - ; GFX6: $vgpr2 = COPY [[COPY9]] - ; GFX6: $vgpr3 = COPY [[COPY10]] - ; GFX6: $vgpr4 = COPY [[COPY11]] - ; GFX6: $vgpr5 = COPY [[COPY12]] - ; GFX6: $vgpr6 = COPY [[COPY13]] - ; GFX6: $vgpr7 = COPY [[COPY14]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY7]] - ; GFX7: $vgpr1 = COPY [[COPY8]] - ; GFX7: $vgpr2 = COPY [[COPY9]] - ; GFX7: $vgpr3 = COPY [[COPY10]] - ; GFX7: $vgpr4 = COPY [[COPY11]] - ; GFX7: $vgpr5 = COPY [[COPY12]] - ; GFX7: $vgpr6 = COPY [[COPY13]] - ; GFX7: $vgpr7 = COPY [[COPY14]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 5000 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4064 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 936, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 952, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY7]] - ; GFX8: $vgpr1 = COPY [[COPY8]] - ; GFX8: $vgpr2 = COPY [[COPY9]] - ; GFX8: $vgpr3 = COPY [[COPY10]] - ; GFX8: $vgpr4 = COPY [[COPY11]] - ; GFX8: $vgpr5 = COPY [[COPY12]] - ; GFX8: $vgpr6 = COPY [[COPY13]] - ; GFX8: $vgpr7 = COPY [[COPY14]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4064 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 936, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 952, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 5000 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -3796,157 +4024,172 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_5000 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076(<4 x i32> %rsrc, i32 %offset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY7]] - ; GFX6: $vgpr1 = COPY [[COPY8]] - ; GFX6: $vgpr2 = COPY [[COPY9]] - ; GFX6: $vgpr3 = COPY [[COPY10]] - ; GFX6: $vgpr4 = COPY [[COPY11]] - ; GFX6: $vgpr5 = COPY [[COPY12]] - ; GFX6: $vgpr6 = COPY [[COPY13]] - ; GFX6: $vgpr7 = COPY [[COPY14]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY7]] - ; GFX7: $vgpr1 = COPY [[COPY8]] - ; GFX7: $vgpr2 = COPY [[COPY9]] - ; GFX7: $vgpr3 = COPY [[COPY10]] - ; GFX7: $vgpr4 = COPY [[COPY11]] - ; GFX7: $vgpr5 = COPY [[COPY12]] - ; GFX7: $vgpr6 = COPY [[COPY13]] - ; GFX7: $vgpr7 = COPY [[COPY14]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4076 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 12 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY7]] - ; GFX8: $vgpr1 = COPY [[COPY8]] - ; GFX8: $vgpr2 = COPY [[COPY9]] - ; GFX8: $vgpr3 = COPY [[COPY10]] - ; GFX8: $vgpr4 = COPY [[COPY11]] - ; GFX8: $vgpr5 = COPY [[COPY12]] - ; GFX8: $vgpr6 = COPY [[COPY13]] - ; GFX8: $vgpr7 = COPY [[COPY14]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 12 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4076 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -3955,157 +4198,172 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4076 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080(<4 x i32> %rsrc, i32 %offset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY7]] - ; GFX6: $vgpr1 = COPY [[COPY8]] - ; GFX6: $vgpr2 = COPY [[COPY9]] - ; GFX6: $vgpr3 = COPY [[COPY10]] - ; GFX6: $vgpr4 = COPY [[COPY11]] - ; GFX6: $vgpr5 = COPY [[COPY12]] - ; GFX6: $vgpr6 = COPY [[COPY13]] - ; GFX6: $vgpr7 = COPY [[COPY14]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY7]] - ; GFX7: $vgpr1 = COPY [[COPY8]] - ; GFX7: $vgpr2 = COPY [[COPY9]] - ; GFX7: $vgpr3 = COPY [[COPY10]] - ; GFX7: $vgpr4 = COPY [[COPY11]] - ; GFX7: $vgpr5 = COPY [[COPY12]] - ; GFX7: $vgpr6 = COPY [[COPY13]] - ; GFX7: $vgpr7 = COPY [[COPY14]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4080 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 16, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY7]] - ; GFX8: $vgpr1 = COPY [[COPY8]] - ; GFX8: $vgpr2 = COPY [[COPY9]] - ; GFX8: $vgpr3 = COPY [[COPY10]] - ; GFX8: $vgpr4 = COPY [[COPY11]] - ; GFX8: $vgpr5 = COPY [[COPY12]] - ; GFX8: $vgpr6 = COPY [[COPY13]] - ; GFX8: $vgpr7 = COPY [[COPY14]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 16 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY6]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFEN1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY4]], [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128), align 4) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFEN]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFEN1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY13]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY14]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %soffset = add i32 %offset.base, 4080 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 %soffset, i32 0) ret <8 x float> %val @@ -4114,154 +4372,169 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_add_4080 define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064(<4 x i32> %rsrc, i32 %offset.base) { ; GFX6-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX6: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX6: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX6: bb.2: - ; GFX6: successors: %bb.3, %bb.2 - ; GFX6: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX6: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX6: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX6: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX6: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX6: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX6: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX6: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX6: bb.3: - ; GFX6: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX6: bb.4: - ; GFX6: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX6: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX6: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX6: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX6: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX6: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX6: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX6: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX6: $vgpr0 = COPY [[COPY6]] - ; GFX6: $vgpr1 = COPY [[COPY7]] - ; GFX6: $vgpr2 = COPY [[COPY8]] - ; GFX6: $vgpr3 = COPY [[COPY9]] - ; GFX6: $vgpr4 = COPY [[COPY10]] - ; GFX6: $vgpr5 = COPY [[COPY11]] - ; GFX6: $vgpr6 = COPY [[COPY12]] - ; GFX6: $vgpr7 = COPY [[COPY13]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX6-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX6-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.2: + ; GFX6-NEXT: successors: %bb.3, %bb.2 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX6-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX6-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX6-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX6-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX6-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX6-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX6-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX6-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX6-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.3: + ; GFX6-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: bb.4: + ; GFX6-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX6-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX6-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX6-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX6-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX6-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX6-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX6-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX6-NEXT: $vgpr0 = COPY [[COPY6]] + ; GFX6-NEXT: $vgpr1 = COPY [[COPY7]] + ; GFX6-NEXT: $vgpr2 = COPY [[COPY8]] + ; GFX6-NEXT: $vgpr3 = COPY [[COPY9]] + ; GFX6-NEXT: $vgpr4 = COPY [[COPY10]] + ; GFX6-NEXT: $vgpr5 = COPY [[COPY11]] + ; GFX6-NEXT: $vgpr6 = COPY [[COPY12]] + ; GFX6-NEXT: $vgpr7 = COPY [[COPY13]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX7-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX7: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX7: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX7: bb.2: - ; GFX7: successors: %bb.3, %bb.2 - ; GFX7: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX7: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX7: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX7: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX7: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX7: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX7: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX7: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX7: bb.3: - ; GFX7: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX7: bb.4: - ; GFX7: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX7: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX7: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX7: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX7: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX7: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX7: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX7: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX7: $vgpr0 = COPY [[COPY6]] - ; GFX7: $vgpr1 = COPY [[COPY7]] - ; GFX7: $vgpr2 = COPY [[COPY8]] - ; GFX7: $vgpr3 = COPY [[COPY9]] - ; GFX7: $vgpr4 = COPY [[COPY10]] - ; GFX7: $vgpr5 = COPY [[COPY11]] - ; GFX7: $vgpr6 = COPY [[COPY12]] - ; GFX7: $vgpr7 = COPY [[COPY13]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX7-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX7-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.2: + ; GFX7-NEXT: successors: %bb.3, %bb.2 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX7-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX7-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX7-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX7-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX7-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX7-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX7-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX7-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX7-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.3: + ; GFX7-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: bb.4: + ; GFX7-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX7-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX7-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX7-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX7-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX7-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX7-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX7-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX7-NEXT: $vgpr0 = COPY [[COPY6]] + ; GFX7-NEXT: $vgpr1 = COPY [[COPY7]] + ; GFX7-NEXT: $vgpr2 = COPY [[COPY8]] + ; GFX7-NEXT: $vgpr3 = COPY [[COPY9]] + ; GFX7-NEXT: $vgpr4 = COPY [[COPY10]] + ; GFX7-NEXT: $vgpr5 = COPY [[COPY11]] + ; GFX7-NEXT: $vgpr6 = COPY [[COPY12]] + ; GFX7-NEXT: $vgpr7 = COPY [[COPY13]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; GFX8-LABEL: name: s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4064 ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; GFX8: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 - ; GFX8: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec - ; GFX8: bb.2: - ; GFX8: successors: %bb.3, %bb.2 - ; GFX8: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec - ; GFX8: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec - ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX8: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec - ; GFX8: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc - ; GFX8: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX8: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX8: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) - ; GFX8: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; GFX8: SI_WATERFALL_LOOP %bb.2, implicit $exec - ; GFX8: bb.3: - ; GFX8: $exec = S_MOV_B64_term [[S_MOV_B64_term]] - ; GFX8: bb.4: - ; GFX8: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 - ; GFX8: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 - ; GFX8: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 - ; GFX8: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 - ; GFX8: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 - ; GFX8: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 - ; GFX8: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 - ; GFX8: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 - ; GFX8: $vgpr0 = COPY [[COPY6]] - ; GFX8: $vgpr1 = COPY [[COPY7]] - ; GFX8: $vgpr2 = COPY [[COPY8]] - ; GFX8: $vgpr3 = COPY [[COPY9]] - ; GFX8: $vgpr4 = COPY [[COPY10]] - ; GFX8: $vgpr5 = COPY [[COPY11]] - ; GFX8: $vgpr6 = COPY [[COPY12]] - ; GFX8: $vgpr7 = COPY [[COPY13]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; GFX8-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 + ; GFX8-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.2: + ; GFX8-NEXT: successors: %bb.3, %bb.2 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[COPY4]], implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub0, implicit $exec + ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY5]].sub1, implicit $exec + ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY5]], implicit $exec + ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc + ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4064, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX8-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[REG_SEQUENCE3]], [[S_MOV_B32_]], 4080, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s128) from unknown-address + 4064, align 4) + ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.3: + ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_term]] + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: bb.4: + ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[BUFFER_LOAD_DWORDX4_OFFSET]], %subreg.sub0_sub1_sub2_sub3, [[BUFFER_LOAD_DWORDX4_OFFSET1]], %subreg.sub4_sub5_sub6_sub7 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub0 + ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub1 + ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub2 + ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub3 + ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub4 + ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub5 + ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub6 + ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE4]].sub7 + ; GFX8-NEXT: $vgpr0 = COPY [[COPY6]] + ; GFX8-NEXT: $vgpr1 = COPY [[COPY7]] + ; GFX8-NEXT: $vgpr2 = COPY [[COPY8]] + ; GFX8-NEXT: $vgpr3 = COPY [[COPY9]] + ; GFX8-NEXT: $vgpr4 = COPY [[COPY10]] + ; GFX8-NEXT: $vgpr5 = COPY [[COPY11]] + ; GFX8-NEXT: $vgpr6 = COPY [[COPY12]] + ; GFX8-NEXT: $vgpr7 = COPY [[COPY13]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %val = call <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32> %rsrc, i32 4064, i32 0) ret <8 x float> %val } @@ -4269,43 +4542,46 @@ define amdgpu_ps <8 x float> @s_buffer_load_v8f32_vgpr_offset_vgpr_rsrc_offset_4 define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.v, %offset.s %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) ret float %val @@ -4314,43 +4590,46 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr(<4 x i32> inreg % define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset = add i32 %offset.s, %offset.v %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) ret float %val @@ -4359,52 +4638,55 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr(<4 x i32> inreg % define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX6-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX7-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_vgpr_sgpr_imm ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX8-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, %offset.s %offset = add i32 %offset.base, 1024 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -4414,52 +4696,55 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX6: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX6-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX7: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX7-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_sgpr_vgpr_imm ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] - ; GFX8: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]] + ; GFX8-NEXT: %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, %offset.v %offset = add i32 %offset.base, 1024 %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -4470,49 +4755,52 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX7: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX8: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.s, 1024 %offset = add i32 %offset.base, %offset.v %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) @@ -4522,52 +4810,55 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inreg %rsrc, i32 %offset.v, i32 inreg %offset.s) { ; GFX6-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr ; GFX6: bb.1 (%ir-block.0): - ; GFX6: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX6: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX6: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX6: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX6-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX6-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX6-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX6-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX6-NEXT: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr ; GFX7: bb.1 (%ir-block.0): - ; GFX7: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX7: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX7: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX7: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX7: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX7: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX7: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX7: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX7: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX7: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX7: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX7-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX7-NEXT: {{ $}} + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX7-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX7-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX7-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX7-NEXT: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr ; GFX8: bb.1 (%ir-block.0): - ; GFX8: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 - ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 - ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 - ; GFX8: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 - ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 - ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX8: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 - ; GFX8: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 - ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] - ; GFX8: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec - ; GFX8: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) - ; GFX8: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] - ; GFX8: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0 + ; GFX8-NEXT: {{ $}} + ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 + ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 + ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3 + ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6 + ; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 + ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] + ; GFX8-NEXT: %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec + ; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32)) + ; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] + ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %offset.base = add i32 %offset.v, 1024 %offset = add i32 %offset.base, %offset.s %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll index c99da17fe964..98bc9815f001 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll @@ -530,20 +530,20 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(<3 x i32> addrspace(4)* define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(<3 x i32> addrspace(4)* inreg %ptr) { ; GFX9-LABEL: s_load_constant_v3i32_align4: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s4, s0 -; GFX9-NEXT: s_mov_b32 s5, s1 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s0, s4 +; GFX9-NEXT: s_mov_b32 s1, s5 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX7-LABEL: s_load_constant_v3i32_align4: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_mov_b32 s2, s0 -; GFX7-NEXT: s_mov_b32 s3, s1 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_load_dword s2, s[2:3], 0x2 +; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s0, s4 +; GFX7-NEXT: s_mov_b32 s1, s5 ; GFX7-NEXT: ; return to shader part epilog %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 4 ret <3 x i32> %load @@ -552,20 +552,20 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(<3 x i32> addrspace(4)* define amdgpu_ps i96 @s_load_constant_i96_align8(i96 addrspace(4)* inreg %ptr) { ; GFX9-LABEL: s_load_constant_i96_align8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s4, s0 -; GFX9-NEXT: s_mov_b32 s5, s1 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s0, s4 +; GFX9-NEXT: s_mov_b32 s1, s5 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX7-LABEL: s_load_constant_i96_align8: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_mov_b32 s2, s0 -; GFX7-NEXT: s_mov_b32 s3, s1 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_load_dword s2, s[2:3], 0x2 +; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s0, s4 +; GFX7-NEXT: s_mov_b32 s1, s5 ; GFX7-NEXT: ; return to shader part epilog %load = load i96, i96 addrspace(4)* %ptr, align 8 ret i96 %load @@ -574,20 +574,20 @@ define amdgpu_ps i96 @s_load_constant_i96_align8(i96 addrspace(4)* inreg %ptr) { define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(<3 x i32> addrspace(4)* inreg %ptr) { ; GFX9-LABEL: s_load_constant_v3i32_align8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s4, s0 -; GFX9-NEXT: s_mov_b32 s5, s1 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s0, s4 +; GFX9-NEXT: s_mov_b32 s1, s5 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX7-LABEL: s_load_constant_v3i32_align8: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_mov_b32 s2, s0 -; GFX7-NEXT: s_mov_b32 s3, s1 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_load_dword s2, s[2:3], 0x2 +; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s0, s4 +; GFX7-NEXT: s_mov_b32 s1, s5 ; GFX7-NEXT: ; return to shader part epilog %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 8 ret <3 x i32> %load @@ -596,20 +596,20 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(<3 x i32> addrspace(4)* define amdgpu_ps <3 x i32> @s_load_constant_v6i16_align8(<6 x i16> addrspace(4)* inreg %ptr) { ; GFX9-LABEL: s_load_constant_v6i16_align8: ; GFX9: ; %bb.0: -; GFX9-NEXT: s_mov_b32 s4, s0 -; GFX9-NEXT: s_mov_b32 s5, s1 -; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 -; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX9-NEXT: s_load_dword s2, s[0:1], 0x8 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 s0, s4 +; GFX9-NEXT: s_mov_b32 s1, s5 ; GFX9-NEXT: ; return to shader part epilog ; ; GFX7-LABEL: s_load_constant_v6i16_align8: ; GFX7: ; %bb.0: -; GFX7-NEXT: s_mov_b32 s2, s0 -; GFX7-NEXT: s_mov_b32 s3, s1 -; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 -; GFX7-NEXT: s_load_dword s2, s[2:3], 0x2 +; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 +; GFX7-NEXT: s_load_dword s2, s[0:1], 0x2 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: s_mov_b32 s0, s4 +; GFX7-NEXT: s_mov_b32 s1, s5 ; GFX7-NEXT: ; return to shader part epilog %load = load <6 x i16>, <6 x i16> addrspace(4)* %ptr, align 8 %cast = bitcast <6 x i16> %load to <3 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll index 169507619a12..c11a19481949 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll @@ -93,12 +93,7 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; CHECK-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s96) = G_TRUNC [[BITCAST]](s384) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) - ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) ; CHECK-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) ; CHECK-NEXT: $sgpr0 = COPY [[INT]](s32) @@ -120,12 +115,7 @@ define amdgpu_ps <3 x i32> @s_buffer_load_v3i32(<4 x i32> inreg %rsrc, i32 inreg ; GREEDY-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) ; GREEDY-NEXT: [[COPY4:%[0-9]+]]:sgpr(s32) = COPY $sgpr6 ; GREEDY-NEXT: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[COPY4]](s32), 0 :: (dereferenceable invariant load (s96), align 4) - ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF - ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:sgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>), [[DEF]](<4 x s32>), [[DEF]](<4 x s32>) - ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s96) = G_TRUNC [[BITCAST]](s384) - ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) - ; GREEDY-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_S_BUFFER_LOAD]](<4 x s32>) ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[UV]](s32) ; GREEDY-NEXT: [[INT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.readfirstlane), [[COPY5]](s32) ; GREEDY-NEXT: $sgpr0 = COPY [[INT]](s32) @@ -435,14 +425,7 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[COPY5]](<4 x s32>), [[COPY6]](<4 x s32>) - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[BITCAST]](s384) - ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) - ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) @@ -460,14 +443,7 @@ define amdgpu_ps <3 x float> @s_buffer_load_v3f32_vgpr_offset(<4 x i32> inreg %r ; GREEDY-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 ; GREEDY-NEXT: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4) - ; GREEDY-NEXT: [[DEF:%[0-9]+]]:sgpr(<4 x s32>) = G_IMPLICIT_DEF - ; GREEDY-NEXT: [[COPY5:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) - ; GREEDY-NEXT: [[COPY6:%[0-9]+]]:vgpr(<4 x s32>) = COPY [[DEF]](<4 x s32>) - ; GREEDY-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<12 x s32>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<4 x s32>), [[COPY5]](<4 x s32>), [[COPY6]](<4 x s32>) - ; GREEDY-NEXT: [[BITCAST:%[0-9]+]]:vgpr(s384) = G_BITCAST [[CONCAT_VECTORS]](<12 x s32>) - ; GREEDY-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[BITCAST]](s384) - ; GREEDY-NEXT: [[BITCAST1:%[0-9]+]]:vgpr(<3 x s32>) = G_BITCAST [[TRUNC]](s96) - ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[BITCAST1]](<3 x s32>) + ; GREEDY-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32), [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[AMDGPU_BUFFER_LOAD]](<4 x s32>) ; GREEDY-NEXT: $vgpr0 = COPY [[UV]](s32) ; GREEDY-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GREEDY-NEXT: $vgpr2 = COPY [[UV2]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir index 3036df730a46..d8a19824fde9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir @@ -680,10 +680,9 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, addrspace 4) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (invariant load (<3 x s32>), addrspace 4, align 4) S_ENDPGM 0, implicit %1 @@ -702,10 +701,9 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, align 8, addrspace 4) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (invariant load (<3 x s32>), addrspace 4, align 8) S_ENDPGM 0, implicit %1 @@ -721,8 +719,9 @@ body: | ; CHECK-LABEL: name: load_constant_v3i32_align16 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_LOAD [[COPY]](p4) :: (invariant load (<4 x s32>), addrspace 4) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:sgpr(<3 x s32>) = G_EXTRACT [[LOAD]](<4 x s32>), 0 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[EXTRACT]](<3 x s32>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32), [[UV2:%[0-9]+]]:sgpr(s32), [[UV3:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (invariant load (<3 x s32>), addrspace 4, align 16) S_ENDPGM 0, implicit %1 @@ -741,10 +740,10 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(<2 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (<2 x s16>) from unknown-address + 8, addrspace 4) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[DEF]], [[LOAD]](<4 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[INSERT]], [[LOAD1]](<2 x s16>), 64 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](<6 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s16), [[UV1:%[0-9]+]]:sgpr(s16), [[UV2:%[0-9]+]]:sgpr(s16), [[UV3:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(s16), [[UV5:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD1]](<2 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x s16>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<6 x s16>) = G_LOAD %0 :: (invariant load (<6 x s16>), addrspace 4, align 4) S_ENDPGM 0, implicit %1 @@ -763,10 +762,10 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(<2 x s16>) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (<2 x s16>) from unknown-address + 8, align 8, addrspace 4) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(<6 x s16>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[DEF]], [[LOAD]](<4 x s16>), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<6 x s16>) = G_INSERT [[INSERT]], [[LOAD1]](<2 x s16>), 64 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](<6 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s16), [[UV1:%[0-9]+]]:sgpr(s16), [[UV2:%[0-9]+]]:sgpr(s16), [[UV3:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>) + ; CHECK-NEXT: [[UV4:%[0-9]+]]:sgpr(s16), [[UV5:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD1]](<2 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x s16>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<6 x s16>) = G_LOAD %0 :: (invariant load (<6 x s16>), addrspace 4, align 8) S_ENDPGM 0, implicit %1 @@ -782,8 +781,9 @@ body: | ; CHECK-LABEL: name: load_constant_v6i16_align16 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(<8 x s16>) = G_LOAD [[COPY]](p4) :: (invariant load (<8 x s16>), addrspace 4) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:sgpr(<6 x s16>) = G_EXTRACT [[LOAD]](<8 x s16>), 0 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[EXTRACT]](<6 x s16>) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s16), [[UV1:%[0-9]+]]:sgpr(s16), [[UV2:%[0-9]+]]:sgpr(s16), [[UV3:%[0-9]+]]:sgpr(s16), [[UV4:%[0-9]+]]:sgpr(s16), [[UV5:%[0-9]+]]:sgpr(s16), [[UV6:%[0-9]+]]:sgpr(s16), [[UV7:%[0-9]+]]:sgpr(s16) = G_UNMERGE_VALUES [[LOAD]](<8 x s16>) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<6 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR]](<6 x s16>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<6 x s16>) = G_LOAD %0 :: (invariant load (<6 x s16>), addrspace 4, align 16) S_ENDPGM 0, implicit %1 @@ -802,10 +802,9 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, addrspace 4) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(s96) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](s96) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](s64) + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s96) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s96) = G_LOAD %0 :: (invariant load (s96), addrspace 4, align 4) S_ENDPGM 0, implicit %1 @@ -824,10 +823,9 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (invariant load (s32) from unknown-address + 8, align 8, addrspace 4) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sgpr(s96) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[INSERT:%[0-9]+]]:sgpr(s96) = G_INSERT [[DEF]], [[LOAD]](s64), 0 - ; CHECK-NEXT: [[INSERT1:%[0-9]+]]:sgpr(s96) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[INSERT1]](s96) + ; CHECK-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](s64) + ; CHECK-NEXT: [[MV:%[0-9]+]]:sgpr(s96) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[MV]](s96) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s96) = G_LOAD %0 :: (invariant load (s96), addrspace 4, align 8) S_ENDPGM 0, implicit %1 @@ -843,8 +841,8 @@ body: | ; CHECK-LABEL: name: load_constant_i96_align16 ; CHECK: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK-NEXT: [[LOAD:%[0-9]+]]:sgpr(s128) = G_LOAD [[COPY]](p4) :: (invariant load (s128), addrspace 4) - ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:sgpr(s96) = G_EXTRACT [[LOAD]](s128), 0 - ; CHECK-NEXT: S_ENDPGM 0, implicit [[EXTRACT]](s96) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s96) = G_TRUNC [[LOAD]](s128) + ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC]](s96) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s96) = G_LOAD %0 :: (invariant load (s96), addrspace 4, align 16) S_ENDPGM 0, implicit %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir index ea5a841e32d9..bf150a54f434 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-split-scalar-load-metadata.mir @@ -34,10 +34,9 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 8, align 8, addrspace 4) - ; SI-NEXT: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 - ; SI-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; SI-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[INSERT1]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) + ; SI-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 8, addrspace 4, !range !0) $sgpr0_sgpr1_sgpr2 = COPY %1 @@ -57,10 +56,9 @@ body: | ; SI-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 8 ; SI-NEXT: [[PTR_ADD:%[0-9]+]]:sgpr(p4) = G_PTR_ADD [[COPY]], [[C]](s64) ; SI-NEXT: [[LOAD1:%[0-9]+]]:sgpr(s32) = G_LOAD [[PTR_ADD]](p4) :: (load (s32) from unknown-address + 8, align 8, !tbaa !2, addrspace 4) - ; SI-NEXT: [[DEF:%[0-9]+]]:sgpr(<3 x s32>) = G_IMPLICIT_DEF - ; SI-NEXT: [[INSERT:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[DEF]], [[LOAD]](<2 x s32>), 0 - ; SI-NEXT: [[INSERT1:%[0-9]+]]:sgpr(<3 x s32>) = G_INSERT [[INSERT]], [[LOAD1]](s32), 64 - ; SI-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[INSERT1]](<3 x s32>) + ; SI-NEXT: [[UV:%[0-9]+]]:sgpr(s32), [[UV1:%[0-9]+]]:sgpr(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[LOAD1]](s32) + ; SI-NEXT: $sgpr0_sgpr1_sgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>) %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(<3 x s32>) = G_LOAD %0 :: (load (<3 x s32>), align 8, addrspace 4, !tbaa !1) $sgpr0_sgpr1_sgpr2 = COPY %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll index 740fc68c4b3d..65c62c47a823 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -1051,10 +1051,11 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) { ; CHECK-LABEL: v_sdiv_i64_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000 +; CHECK-NEXT: s_movk_i32 s4, 0x1000 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 -; CHECK-NEXT: s_movk_i32 s4, 0xf000 -; CHECK-NEXT: s_movk_i32 s6, 0x1000 +; CHECK-NEXT: s_movk_i32 s5, 0xf000 +; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 @@ -1066,10 +1067,10 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 @@ -1098,10 +1099,9 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 -; CHECK-NEXT: s_bfe_i32 s7, -1, 0x10000 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1129,55 +1129,56 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, v1, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, v0, v2 ; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 +; CHECK-NEXT: v_mov_b32_e32 v7, 0x1000 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v1, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_mul_hi_u32 v6, v0, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 ; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; CHECK-NEXT: v_mul_lo_u32 v5, 0, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s6, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s6, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 +; CHECK-NEXT: v_mul_hi_u32 v9, s4, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s4, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 ; CHECK-NEXT: v_subb_u32_e64 v6, s[4:5], v1, v5, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v5 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 -; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] -; CHECK-NEXT: v_mov_b32_e32 v7, s7 +; CHECK-NEXT: v_mov_b32_e32 v8, s6 ; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v6 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, 1, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[4:5] -; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v4, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v4, vcc ; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; CHECK-NEXT: v_mov_b32_e32 v8, s4 +; CHECK-NEXT: v_mov_b32_e32 v7, s4 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v6 -; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v7, vcc +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc @@ -1490,41 +1491,169 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-LABEL: v_sdiv_v2i64_pow2k_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0x1000 -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_movk_i32 s6, 0xf000 -; CGP-NEXT: s_movk_i32 s7, 0x1000 -; CGP-NEXT: v_mov_b32_e32 v4, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 -; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v4 +; CGP-NEXT: s_movk_i32 s6, 0x1000 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 +; CGP-NEXT: s_movk_i32 s7, 0xf000 +; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v4 ; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 -; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v7 +; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 +; CGP-NEXT: v_trunc_f32_e32 v6, v6 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 +; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, v1, v8 +; CGP-NEXT: v_mul_lo_u32 v9, v0, v6 +; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 +; CGP-NEXT: v_mov_b32_e32 v5, 0x1000 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v10, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CGP-NEXT: v_mul_hi_u32 v9, v0, v6 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_mul_hi_u32 v6, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v7 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v6 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v1, v8, vcc +; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v8 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v10, s8 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v7 +; CGP-NEXT: v_cndmask_b32_e64 v8, v10, v8, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc +; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; CGP-NEXT: v_mov_b32_e32 v11, s4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v0, v11, v0, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v9 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v10, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CGP-NEXT: v_cndmask_b32_e32 v0, v9, v1, vcc +; CGP-NEXT: v_cvt_f32_u32_e32 v9, v5 +; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v11, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; CGP-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 +; CGP-NEXT: v_mac_f32_e32 v9, 0x4f800000, v6 +; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v9 +; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 +; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 ; CGP-NEXT: v_trunc_f32_e32 v8, v8 -; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v8 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 -; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -1535,27 +1664,27 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 -; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -1566,195 +1695,69 @@ define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 -; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 -; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; CGP-NEXT: v_mul_lo_u32 v9, v3, v6 +; CGP-NEXT: v_mul_lo_u32 v10, v2, v8 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_mul_hi_u32 v4, v2, v6 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 +; CGP-NEXT: v_mul_hi_u32 v10, v2, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v1, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 -; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v11 -; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v9, vcc -; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v9 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 -; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v11, s8 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10 -; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v7 -; CGP-NEXT: v_cndmask_b32_e64 v9, v11, v9, s[4:5] -; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v8, vcc -; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v0 -; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v12, s4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v10 -; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; CGP-NEXT: v_cndmask_b32_e32 v0, v10, v1, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v11, v12, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc -; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 -; CGP-NEXT: v_trunc_f32_e32 v7, v7 -; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 -; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 +; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v6 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v4 +; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 -; CGP-NEXT: v_mul_lo_u32 v8, v3, v5 -; CGP-NEXT: v_mul_lo_u32 v9, v2, v7 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v2, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v8, v3, v7 -; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v9, v2, v7 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_hi_u32 v7, v3, v7 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v5 -; CGP-NEXT: v_mul_hi_u32 v10, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v4 -; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 -; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v7, vcc -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v7 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v3, v8, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v8 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 -; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s7, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v9, s6 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 -; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4 -; CGP-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[4:5] -; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v5, vcc +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v10, s6 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v4 +; CGP-NEXT: v_cndmask_b32_e64 v8, v10, v8, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc ; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v2 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v10, s4 +; CGP-NEXT: v_mov_b32_e32 v5, s4 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; CGP-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v8 -; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v9, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v9 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v10, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CGP-NEXT: v_cndmask_b32_e32 v2, v8, v3, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 +; CGP-NEXT: v_cndmask_b32_e32 v2, v9, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v5, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc ; CGP-NEXT: s_setpc_b64 s[30:31] %result = sdiv <2 x i64> %num, ret <2 x i64> %result @@ -1764,10 +1767,11 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) { ; CHECK-LABEL: v_sdiv_i64_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb +; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 -; CHECK-NEXT: s_mov_b32 s4, 0xffed2705 -; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb +; CHECK-NEXT: s_mov_b32 s5, 0xffed2705 +; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 @@ -1779,10 +1783,10 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 @@ -1811,10 +1815,9 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 -; CHECK-NEXT: s_bfe_i32 s7, -1, 0x10000 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1842,55 +1845,56 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, v1, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, v0, v2 ; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 +; CHECK-NEXT: v_mov_b32_e32 v7, 0x12d8fb ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v1, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_mul_hi_u32 v6, v0, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 ; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; CHECK-NEXT: v_mul_lo_u32 v5, 0, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s6, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s6, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 +; CHECK-NEXT: v_mul_hi_u32 v9, s4, v2 +; CHECK-NEXT: v_mul_lo_u32 v8, s4, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 ; CHECK-NEXT: v_subb_u32_e64 v6, s[4:5], v1, v5, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v5 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 -; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] -; CHECK-NEXT: v_mov_b32_e32 v7, s7 +; CHECK-NEXT: v_mov_b32_e32 v8, s6 ; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v6 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, 1, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[4:5] -; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v4, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[4:5] +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v4, vcc ; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; CHECK-NEXT: v_mov_b32_e32 v8, s4 +; CHECK-NEXT: v_mov_b32_e32 v7, s4 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc ; CHECK-NEXT: v_add_i32_e32 v1, vcc, 1, v6 -; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v7, vcc +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v7, v8, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v7, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc @@ -2203,41 +2207,169 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-LABEL: v_sdiv_v2i64_oddk_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0x12d8fb -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_mov_b32 s6, 0xffed2705 -; CGP-NEXT: s_mov_b32 s7, 0x12d8fb -; CGP-NEXT: v_mov_b32_e32 v4, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 -; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v4 +; CGP-NEXT: s_mov_b32 s6, 0x12d8fb +; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 +; CGP-NEXT: s_mov_b32 s7, 0xffed2705 +; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v4 ; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 -; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v7 +; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 +; CGP-NEXT: v_trunc_f32_e32 v6, v6 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 +; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, v1, v8 +; CGP-NEXT: v_mul_lo_u32 v9, v0, v6 +; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 +; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v10, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CGP-NEXT: v_mul_hi_u32 v9, v0, v6 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_mul_hi_u32 v6, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v7 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v6 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v1, v8, vcc +; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v8 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v10, s8 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v7 +; CGP-NEXT: v_cndmask_b32_e64 v8, v10, v8, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc +; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; CGP-NEXT: v_mov_b32_e32 v11, s4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v0, v11, v0, vcc +; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v9 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v10, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CGP-NEXT: v_cndmask_b32_e32 v0, v9, v1, vcc +; CGP-NEXT: v_cvt_f32_u32_e32 v9, v5 +; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v11, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; CGP-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 +; CGP-NEXT: v_mac_f32_e32 v9, 0x4f800000, v6 +; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v9 +; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 +; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 ; CGP-NEXT: v_trunc_f32_e32 v8, v8 -; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v8 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 -; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -2248,27 +2380,27 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 -; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -2279,195 +2411,69 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 -; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 -; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; CGP-NEXT: v_mul_lo_u32 v9, v3, v6 +; CGP-NEXT: v_mul_lo_u32 v10, v2, v8 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_mul_hi_u32 v4, v2, v6 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 +; CGP-NEXT: v_mul_hi_u32 v10, v2, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v1, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s7, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s7, v7 -; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v11 -; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v9, vcc -; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v9 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 -; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s7, v0 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v11, s8 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10 -; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v7 -; CGP-NEXT: v_cndmask_b32_e64 v9, v11, v9, s[4:5] -; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v8, vcc -; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v0 -; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v12, s4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v0, v12, v0, vcc -; CGP-NEXT: v_add_i32_e32 v1, vcc, 1, v10 -; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 -; CGP-NEXT: v_cndmask_b32_e32 v0, v10, v1, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v11, v12, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc -; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 -; CGP-NEXT: v_trunc_f32_e32 v7, v7 -; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 -; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 +; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v4 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v6 +; CGP-NEXT: v_mul_hi_u32 v11, s6, v4 +; CGP-NEXT: v_mul_lo_u32 v10, s6, v4 +; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 -; CGP-NEXT: v_mul_lo_u32 v8, v3, v5 -; CGP-NEXT: v_mul_lo_u32 v9, v2, v7 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v2, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v8, v3, v7 -; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v9, v2, v7 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_hi_u32 v7, v3, v7 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v5 -; CGP-NEXT: v_mul_hi_u32 v10, s7, v4 -; CGP-NEXT: v_mul_lo_u32 v9, s7, v4 -; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 -; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v7, vcc -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v7 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; CGP-NEXT: v_subb_u32_e64 v9, s[4:5], v3, v8, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v8 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 -; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s7, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v9, s6 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 -; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4 -; CGP-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[4:5] -; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v5, vcc +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v10, s6 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; CGP-NEXT: v_add_i32_e32 v9, vcc, 1, v4 +; CGP-NEXT: v_cndmask_b32_e64 v8, v10, v8, s[4:5] +; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v6, vcc ; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v2 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v10, s4 +; CGP-NEXT: v_mov_b32_e32 v5, s4 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; CGP-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc -; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v8 -; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v9, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc +; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v9 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, 0, v10, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CGP-NEXT: v_cndmask_b32_e32 v2, v8, v3, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v9, v10, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 +; CGP-NEXT: v_cndmask_b32_e32 v2, v9, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v5, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 ; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc ; CGP-NEXT: s_setpc_b64 s[30:31] %result = sdiv <2 x i64> %num, ret <2 x i64> %result diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll index 5323967b4fc4..e75db654647b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -1031,10 +1031,11 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) { ; CHECK-LABEL: v_srem_i64_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x1000 +; CHECK-NEXT: s_movk_i32 s4, 0x1000 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 -; CHECK-NEXT: s_movk_i32 s4, 0xf000 -; CHECK-NEXT: s_movk_i32 s6, 0x1000 +; CHECK-NEXT: s_movk_i32 s5, 0xf000 +; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 @@ -1046,10 +1047,10 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 @@ -1078,10 +1079,9 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 -; CHECK-NEXT: s_bfe_i32 s7, -1, 0x10000 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1109,49 +1109,50 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) { ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, v1, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, v0, v2 ; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 +; CHECK-NEXT: v_mov_b32_e32 v7, 0x1000 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v1, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_mul_hi_u32 v6, v0, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 ; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; CHECK-NEXT: v_mul_lo_u32 v5, 0, v2 -; CHECK-NEXT: v_mul_lo_u32 v4, s6, v4 -; CHECK-NEXT: v_mul_lo_u32 v6, s6, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s4, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, s4, v2 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 ; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v1, v2, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] -; CHECK-NEXT: v_mov_b32_e32 v5, s7 +; CHECK-NEXT: v_mov_b32_e32 v5, s6 ; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[4:5] -; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, s6, v0 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v7 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v5 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; CHECK-NEXT: v_mov_b32_e32 v7, s4 +; CHECK-NEXT: v_mov_b32_e32 v8, s4 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc -; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s6, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc +; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v5, v7 ; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc @@ -1466,41 +1467,166 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-LABEL: v_srem_v2i64_pow2k_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0x1000 -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_movk_i32 s6, 0xf000 -; CGP-NEXT: s_movk_i32 s7, 0x1000 -; CGP-NEXT: v_mov_b32_e32 v4, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 -; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v4 +; CGP-NEXT: s_movk_i32 s6, 0x1000 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 +; CGP-NEXT: s_movk_i32 s7, 0xf000 +; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v4 ; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 -; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v7 -; CGP-NEXT: v_trunc_f32_e32 v8, v8 -; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v8 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 +; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 +; CGP-NEXT: v_trunc_f32_e32 v6, v6 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, v1, v8 +; CGP-NEXT: v_mul_lo_u32 v9, v0, v6 +; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 +; CGP-NEXT: v_mov_b32_e32 v5, 0x1000 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v10, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CGP-NEXT: v_mul_hi_u32 v9, v0, v6 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_mul_hi_u32 v6, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v7 +; CGP-NEXT: v_mul_lo_u32 v6, s6, v6 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 +; CGP-NEXT: v_mul_hi_u32 v7, s6, v7 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v9 +; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v1, v6, vcc +; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v8, s8 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v7 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v0, v5 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; CGP-NEXT: v_mov_b32_e32 v10, s4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc +; CGP-NEXT: v_sub_i32_e32 v10, vcc, v8, v5 +; CGP-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v1, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cvt_f32_u32_e32 v9, v5 +; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 +; CGP-NEXT: v_mac_f32_e32 v9, 0x4f800000, v6 +; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v9 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 +; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 +; CGP-NEXT: v_trunc_f32_e32 v8, v8 +; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -1511,27 +1637,27 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 -; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -1542,191 +1668,68 @@ define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 -; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 -; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v1, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v8 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v7 -; CGP-NEXT: v_mul_hi_u32 v7, s7, v7 -; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 -; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v1, v7, vcc -; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v7 -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v9, s8 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v0 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v11, s4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc -; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s7, v9 -; CGP-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v1, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; CGP-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 -; CGP-NEXT: v_trunc_f32_e32 v7, v7 -; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 -; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc ; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 -; CGP-NEXT: v_mul_lo_u32 v8, v3, v5 -; CGP-NEXT: v_mul_lo_u32 v9, v2, v7 +; CGP-NEXT: v_mul_lo_u32 v9, v3, v6 +; CGP-NEXT: v_mul_lo_u32 v10, v2, v8 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v2, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v8, v3, v7 -; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 +; CGP-NEXT: v_mul_hi_u32 v4, v2, v6 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v9, v2, v7 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 +; CGP-NEXT: v_mul_hi_u32 v10, v2, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_hi_u32 v7, v3, v7 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v5, s7, v5 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v4 -; CGP-NEXT: v_mul_hi_u32 v4, s7, v4 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v4 +; CGP-NEXT: v_mul_lo_u32 v6, s6, v6 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v4 +; CGP-NEXT: v_mul_hi_u32 v4, s6, v4 ; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 -; CGP-NEXT: v_subb_u32_e64 v5, s[4:5], v3, v4, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 +; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc ; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v7, s6 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 +; CGP-NEXT: v_mov_b32_e32 v8, s6 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v6 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cndmask_b32_e64 v4, v7, v4, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s7, v2 +; CGP-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v5 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v7 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v9, s4 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; CGP-NEXT: v_mov_b32_e32 v10, s4 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; CGP-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v7 +; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc +; CGP-NEXT: v_sub_i32_e32 v5, vcc, v8, v5 ; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc ; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc ; CGP-NEXT: s_setpc_b64 s[30:31] %result = srem <2 x i64> %num, ret <2 x i64> %result @@ -1736,10 +1739,11 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) { ; CHECK-LABEL: v_srem_i64_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb +; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 -; CHECK-NEXT: s_mov_b32 s4, 0xffed2705 -; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb +; CHECK-NEXT: s_mov_b32 s5, 0xffed2705 +; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 @@ -1751,10 +1755,10 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v3 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 @@ -1783,10 +1787,9 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, -1, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 -; CHECK-NEXT: s_bfe_i32 s7, -1, 0x10000 +; CHECK-NEXT: v_mul_lo_u32 v6, s5, v4 +; CHECK-NEXT: v_mul_hi_u32 v8, s5, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v2 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v7 @@ -1814,49 +1817,50 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc ; CHECK-NEXT: v_mul_lo_u32 v5, v1, v2 ; CHECK-NEXT: v_mul_lo_u32 v6, v0, v4 -; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, v0, v2 ; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 +; CHECK-NEXT: v_mov_b32_e32 v7, 0x12d8fb ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CHECK-NEXT: v_mul_lo_u32 v7, v1, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_mul_hi_u32 v6, v0, v4 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2 -; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 ; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v5 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; CHECK-NEXT: v_mul_lo_u32 v5, 0, v2 -; CHECK-NEXT: v_mul_lo_u32 v4, s6, v4 -; CHECK-NEXT: v_mul_lo_u32 v6, s6, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, s6, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, s4, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s4, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, s4, v2 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v2 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 ; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v1, v2, vcc ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] -; CHECK-NEXT: v_mov_b32_e32 v5, s7 +; CHECK-NEXT: v_mov_b32_e32 v5, s6 ; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[4:5] -; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, s6, v0 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v7 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CHECK-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v5 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; CHECK-NEXT: v_mov_b32_e32 v7, s4 +; CHECK-NEXT: v_mov_b32_e32 v8, s4 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc -; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, s6, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc +; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v5, v7 ; CHECK-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 ; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc @@ -2171,41 +2175,166 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-LABEL: v_srem_v2i64_oddk_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0x12d8fb -; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 -; CGP-NEXT: s_mov_b32 s6, 0xffed2705 -; CGP-NEXT: s_mov_b32 s7, 0x12d8fb -; CGP-NEXT: v_mov_b32_e32 v4, v5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v6 -; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v4 +; CGP-NEXT: s_mov_b32 s6, 0x12d8fb +; CGP-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 +; CGP-NEXT: s_mov_b32 s7, 0xffed2705 +; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v4 ; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 -; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 -; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v7 -; CGP-NEXT: v_trunc_f32_e32 v8, v8 -; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v8 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 +; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5 +; CGP-NEXT: v_trunc_f32_e32 v6, v6 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, -1, v5 +; CGP-NEXT: v_mul_lo_u32 v8, s7, v6 +; CGP-NEXT: v_mul_hi_u32 v10, s7, v5 +; CGP-NEXT: v_mul_lo_u32 v9, s7, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_mul_lo_u32 v8, v6, v9 +; CGP-NEXT: v_mul_lo_u32 v10, v5, v7 +; CGP-NEXT: v_mul_hi_u32 v11, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v6, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; CGP-NEXT: v_mul_hi_u32 v7, v6, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v5, v8 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v7, v1, v8 +; CGP-NEXT: v_mul_lo_u32 v9, v0, v6 +; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 +; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v10, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CGP-NEXT: v_mul_hi_u32 v9, v0, v6 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_mul_hi_u32 v6, v1, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v7 +; CGP-NEXT: v_mul_lo_u32 v6, s6, v6 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 +; CGP-NEXT: v_mul_hi_u32 v7, s6, v7 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v9 +; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v1, v6, vcc +; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v8, s8 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v7 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v0, v5 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; CGP-NEXT: v_mov_b32_e32 v10, s4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc +; CGP-NEXT: v_sub_i32_e32 v10, vcc, v8, v5 +; CGP-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v1, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cvt_f32_u32_e32 v9, v5 +; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 +; CGP-NEXT: v_mac_f32_e32 v9, 0x4f800000, v6 +; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v9 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 +; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6 +; CGP-NEXT: v_trunc_f32_e32 v8, v8 +; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -2216,27 +2345,27 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, -1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v8 -; CGP-NEXT: v_mul_hi_u32 v12, s6, v7 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 -; CGP-NEXT: s_bfe_i32 s8, -1, 0x10000 +; CGP-NEXT: v_mul_lo_u32 v9, -1, v6 +; CGP-NEXT: v_mul_lo_u32 v10, s7, v8 +; CGP-NEXT: v_mul_hi_u32 v12, s7, v6 +; CGP-NEXT: v_mul_lo_u32 v11, s7, v6 +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 -; CGP-NEXT: v_mul_hi_u32 v13, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v13, v6, v11 ; CGP-NEXT: v_mul_hi_u32 v11, v8, v11 -; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_mul_hi_u32 v12, v7, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 @@ -2247,191 +2376,68 @@ define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc -; CGP-NEXT: v_mul_lo_u32 v9, v1, v7 -; CGP-NEXT: v_mul_lo_u32 v10, v0, v8 -; CGP-NEXT: v_mul_hi_u32 v11, v0, v7 -; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v1, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_mul_hi_u32 v10, v0, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_mul_hi_u32 v8, v1, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_lo_u32 v9, 0, v7 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v8 -; CGP-NEXT: v_mul_lo_u32 v10, s7, v7 -; CGP-NEXT: v_mul_hi_u32 v7, s7, v7 -; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 -; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v1, v7, vcc -; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v7 -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 -; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v9, s8 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v0 -; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v11, s4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc -; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s7, v9 -; CGP-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v1, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 -; CGP-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 -; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 -; CGP-NEXT: v_trunc_f32_e32 v7, v7 -; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 -; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc -; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc -; CGP-NEXT: v_mul_lo_u32 v8, -1, v5 -; CGP-NEXT: v_mul_lo_u32 v9, s6, v7 -; CGP-NEXT: v_mul_hi_u32 v11, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v10, s6, v5 -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CGP-NEXT: v_mul_lo_u32 v9, v7, v10 -; CGP-NEXT: v_mul_lo_u32 v11, v5, v8 -; CGP-NEXT: v_mul_hi_u32 v12, v5, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v10 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9 -; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; CGP-NEXT: v_mul_hi_u32 v8, v7, v8 -; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 -; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc ; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 -; CGP-NEXT: v_mul_lo_u32 v8, v3, v5 -; CGP-NEXT: v_mul_lo_u32 v9, v2, v7 +; CGP-NEXT: v_mul_lo_u32 v9, v3, v6 +; CGP-NEXT: v_mul_lo_u32 v10, v2, v8 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v2, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc -; CGP-NEXT: v_mul_lo_u32 v8, v3, v7 -; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 +; CGP-NEXT: v_mul_hi_u32 v4, v2, v6 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, v9, v4 -; CGP-NEXT: v_mul_hi_u32 v9, v2, v7 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CGP-NEXT: v_mul_lo_u32 v9, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 +; CGP-NEXT: v_mul_hi_u32 v10, v2, v8 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; CGP-NEXT: v_mul_hi_u32 v7, v3, v7 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_mul_lo_u32 v7, 0, v4 -; CGP-NEXT: v_mul_lo_u32 v5, s7, v5 -; CGP-NEXT: v_mul_lo_u32 v8, s7, v4 -; CGP-NEXT: v_mul_hi_u32 v4, s7, v4 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v9, v6 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_mul_lo_u32 v8, 0, v4 +; CGP-NEXT: v_mul_lo_u32 v6, s6, v6 +; CGP-NEXT: v_mul_lo_u32 v9, s6, v4 +; CGP-NEXT: v_mul_hi_u32 v4, s6, v4 ; CGP-NEXT: s_bfe_i32 s6, -1, 0x10000 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v8 -; CGP-NEXT: v_subb_u32_e64 v5, s[4:5], v3, v4, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 +; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc ; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v7, s6 -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 +; CGP-NEXT: v_mov_b32_e32 v8, s6 +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v6 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cndmask_b32_e64 v4, v7, v4, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s7, v2 +; CGP-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v5 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc ; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s7, v7 -; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; CGP-NEXT: v_mov_b32_e32 v9, s4 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc +; CGP-NEXT: v_mov_b32_e32 v10, s4 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; CGP-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, s7, v7 +; CGP-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc +; CGP-NEXT: v_sub_i32_e32 v5, vcc, v8, v5 ; CGP-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v3, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 -; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v9, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc ; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v10, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc -; CGP-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc -; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 -; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 -; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v7 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v7 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc ; CGP-NEXT: s_setpc_b64 s[30:31] %result = srem <2 x i64> %num, ret <2 x i64> %result diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll index ce21d2546b8a..fdf5ba53330c 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -968,129 +968,130 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) { ; CHECK-LABEL: v_urem_i64_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, 0x12d8fb +; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb +; CHECK-NEXT: v_mov_b32_e32 v2, 0x12d8fb ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0 -; CHECK-NEXT: s_mov_b32 s4, 0xffed2705 -; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb -; CHECK-NEXT: s_bfe_i32 s5, -1, 0x10000 +; CHECK-NEXT: s_mov_b32 s5, 0xffed2705 +; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000 ; CHECK-NEXT: s_bfe_i32 s7, -1, 0x10000 -; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 -; CHECK-NEXT: v_mov_b32_e32 v3, s5 -; CHECK-NEXT: v_mov_b32_e32 v4, s7 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 -; CHECK-NEXT: v_mul_f32_e32 v5, 0x2f800000, v2 -; CHECK-NEXT: v_trunc_f32_e32 v5, v5 -; CHECK-NEXT: v_mac_f32_e32 v2, 0xcf800000, v5 -; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v5 -; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v5 -; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v9, s4, v2 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 -; CHECK-NEXT: v_mul_lo_u32 v8, v5, v7 -; CHECK-NEXT: v_mul_hi_u32 v10, v2, v7 -; CHECK-NEXT: v_mul_hi_u32 v7, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9 -; CHECK-NEXT: v_mul_lo_u32 v9, v2, v6 -; CHECK-NEXT: v_mul_lo_u32 v11, v5, v6 -; CHECK-NEXT: v_mul_hi_u32 v12, v2, v6 -; CHECK-NEXT: v_mul_hi_u32 v6, v5, v6 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CHECK-NEXT: v_cvt_f32_u32_e32 v4, s4 +; CHECK-NEXT: v_mov_b32_e32 v5, s6 +; CHECK-NEXT: v_mov_b32_e32 v6, s7 +; CHECK-NEXT: v_mac_f32_e32 v4, 0x4f800000, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v4 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 +; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 +; CHECK-NEXT: v_trunc_f32_e32 v4, v4 +; CHECK-NEXT: v_mac_f32_e32 v3, 0xcf800000, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, s5, v3 +; CHECK-NEXT: v_mul_lo_u32 v9, -1, v3 +; CHECK-NEXT: v_mul_hi_u32 v10, s5, v3 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_mul_lo_u32 v9, v4, v8 +; CHECK-NEXT: v_mul_hi_u32 v11, v3, v8 +; CHECK-NEXT: v_mul_hi_u32 v8, v4, v8 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CHECK-NEXT: v_mul_lo_u32 v10, v3, v7 +; CHECK-NEXT: v_mul_lo_u32 v12, v4, v7 +; CHECK-NEXT: v_mul_hi_u32 v13, v3, v7 +; CHECK-NEXT: v_mul_hi_u32 v7, v4, v7 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v12, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v11, v7 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v13 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v12, v11 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v8 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v7, vcc +; CHECK-NEXT: v_mul_lo_u32 v7, s5, v3 +; CHECK-NEXT: v_mul_lo_u32 v8, -1, v3 +; CHECK-NEXT: v_mul_hi_u32 v9, s5, v3 +; CHECK-NEXT: v_mul_lo_u32 v10, s5, v4 +; CHECK-NEXT: v_mul_lo_u32 v11, v4, v7 +; CHECK-NEXT: v_mul_hi_u32 v12, v3, v7 +; CHECK-NEXT: v_mul_hi_u32 v7, v4, v7 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v12 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CHECK-NEXT: v_mul_lo_u32 v9, v3, v8 +; CHECK-NEXT: v_mul_lo_u32 v10, v4, v8 +; CHECK-NEXT: v_mul_hi_u32 v13, v3, v8 +; CHECK-NEXT: v_mul_hi_u32 v8, v4, v8 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v10 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 -; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v6, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v7, -1, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, s4, v2 -; CHECK-NEXT: v_mul_lo_u32 v9, s4, v5 -; CHECK-NEXT: v_mul_lo_u32 v10, v5, v6 -; CHECK-NEXT: v_mul_hi_u32 v11, v2, v6 -; CHECK-NEXT: v_mul_hi_u32 v6, v5, v6 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v13 +; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CHECK-NEXT: v_mul_lo_u32 v8, v2, v7 -; CHECK-NEXT: v_mul_lo_u32 v9, v5, v7 -; CHECK-NEXT: v_mul_hi_u32 v12, v2, v7 -; CHECK-NEXT: v_mul_hi_u32 v7, v5, v7 -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v9, v6 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v12 -; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc -; CHECK-NEXT: v_mul_lo_u32 v6, v1, v2 -; CHECK-NEXT: v_mul_hi_u32 v7, v0, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, v0, v5 -; CHECK-NEXT: v_mul_lo_u32 v9, v1, v5 -; CHECK-NEXT: v_mul_hi_u32 v10, v0, v5 -; CHECK-NEXT: v_mul_hi_u32 v5, v1, v5 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v9, v2 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc +; CHECK-NEXT: v_mul_lo_u32 v7, v1, v3 +; CHECK-NEXT: v_mul_hi_u32 v8, v0, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 +; CHECK-NEXT: v_mul_lo_u32 v9, v0, v4 +; CHECK-NEXT: v_mul_lo_u32 v10, v1, v4 +; CHECK-NEXT: v_mul_hi_u32 v11, v0, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v10 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v10, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc -; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CHECK-NEXT: v_mul_lo_u32 v7, s6, v2 -; CHECK-NEXT: v_mul_lo_u32 v8, 0, v2 -; CHECK-NEXT: v_mul_hi_u32 v2, s6, v2 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 -; CHECK-NEXT: v_mul_lo_u32 v5, s6, v5 -; CHECK-NEXT: v_add_i32_e32 v5, vcc, v8, v5 -; CHECK-NEXT: v_add_i32_e32 v2, vcc, v5, v2 -; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 -; CHECK-NEXT: v_subb_u32_e64 v5, s[4:5], v1, v2, vcc -; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 -; CHECK-NEXT: v_cndmask_b32_e64 v2, v3, v2, s[4:5] +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_mul_lo_u32 v8, s4, v3 +; CHECK-NEXT: v_mul_lo_u32 v9, 0, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, s4, v3 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CHECK-NEXT: v_mul_lo_u32 v4, s4, v4 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v9, v4 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 +; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v1, v3, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v3 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5] ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s6, v0 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 -; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, s6, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc +; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v5, v2 ; CHECK-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v1, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc -; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc -; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc ; CHECK-NEXT: s_setpc_b64 s[30:31] %result = urem i64 %num, 1235195 ret i64 %result @@ -1357,63 +1358,64 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-LABEL: v_urem_v2i64_oddk_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: v_cvt_f32_u32_e32 v4, 0x12d8fb +; CGP-NEXT: s_mov_b32 s8, 0x12d8fb +; CGP-NEXT: v_mov_b32_e32 v4, 0x12d8fb ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0 ; CGP-NEXT: s_mov_b32 s6, 0xffed2705 -; CGP-NEXT: s_mov_b32 s8, 0x12d8fb ; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000 ; CGP-NEXT: s_bfe_i32 s5, -1, 0x10000 +; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0 ; CGP-NEXT: s_bfe_i32 s7, -1, 0x10000 ; CGP-NEXT: s_bfe_i32 s9, -1, 0x10000 -; CGP-NEXT: v_mov_b32_e32 v6, v4 -; CGP-NEXT: v_mov_b32_e32 v7, s4 -; CGP-NEXT: v_mov_b32_e32 v8, s5 -; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 -; CGP-NEXT: v_mov_b32_e32 v9, s7 -; CGP-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5 -; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v6 -; CGP-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CGP-NEXT: v_cvt_f32_u32_e32 v7, s8 +; CGP-NEXT: v_mov_b32_e32 v8, s4 +; CGP-NEXT: v_mov_b32_e32 v9, s5 +; CGP-NEXT: v_cvt_f32_u32_e32 v10, v4 +; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v5 +; CGP-NEXT: v_mac_f32_e32 v10, 0x4f800000, v6 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v7 +; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v10 ; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 -; CGP-NEXT: v_mul_f32_e32 v6, 0x2f800000, v4 -; CGP-NEXT: v_mul_f32_e32 v10, 0x2f800000, v5 -; CGP-NEXT: v_trunc_f32_e32 v6, v6 +; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 +; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 +; CGP-NEXT: v_mul_f32_e32 v10, 0x2f800000, v6 +; CGP-NEXT: v_trunc_f32_e32 v7, v7 ; CGP-NEXT: v_trunc_f32_e32 v10, v10 -; CGP-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v10 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v10 ; CGP-NEXT: v_cvt_u32_f32_e32 v10, v10 -; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 -; CGP-NEXT: v_mul_lo_u32 v11, s6, v6 ; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_mul_lo_u32 v11, s6, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 ; CGP-NEXT: v_mul_lo_u32 v12, s6, v10 -; CGP-NEXT: v_mul_lo_u32 v13, s6, v4 -; CGP-NEXT: v_mul_lo_u32 v14, -1, v4 -; CGP-NEXT: v_mul_hi_u32 v15, s6, v4 -; CGP-NEXT: v_mul_lo_u32 v16, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v17, -1, v5 -; CGP-NEXT: v_mul_hi_u32 v18, s6, v5 +; CGP-NEXT: v_mul_lo_u32 v13, s6, v5 +; CGP-NEXT: v_mul_lo_u32 v14, -1, v5 +; CGP-NEXT: v_mul_hi_u32 v15, s6, v5 +; CGP-NEXT: v_mul_lo_u32 v16, s6, v6 +; CGP-NEXT: v_mul_lo_u32 v17, -1, v6 +; CGP-NEXT: v_mul_hi_u32 v18, s6, v6 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v14, v11 -; CGP-NEXT: v_mul_lo_u32 v14, v6, v13 -; CGP-NEXT: v_mul_hi_u32 v19, v4, v13 -; CGP-NEXT: v_mul_hi_u32 v13, v6, v13 +; CGP-NEXT: v_mul_lo_u32 v14, v7, v13 +; CGP-NEXT: v_mul_hi_u32 v19, v5, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v7, v13 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v17, v12 ; CGP-NEXT: v_mul_lo_u32 v17, v10, v16 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v15 -; CGP-NEXT: v_mul_hi_u32 v15, v5, v16 +; CGP-NEXT: v_mul_hi_u32 v15, v6, v16 ; CGP-NEXT: v_mul_hi_u32 v16, v10, v16 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v18 -; CGP-NEXT: v_mul_lo_u32 v18, v5, v12 +; CGP-NEXT: v_mul_lo_u32 v18, v6, v12 ; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v18 ; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v15 -; CGP-NEXT: v_mul_lo_u32 v15, v4, v11 -; CGP-NEXT: v_mul_lo_u32 v17, v6, v11 +; CGP-NEXT: v_mul_lo_u32 v15, v5, v11 +; CGP-NEXT: v_mul_lo_u32 v17, v7, v11 ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v14, v15 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v14, v19 -; CGP-NEXT: v_mul_hi_u32 v14, v4, v11 -; CGP-NEXT: v_mul_hi_u32 v11, v6, v11 +; CGP-NEXT: v_mul_hi_u32 v14, v5, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v7, v11 ; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v15, s[4:5], v15, v19 ; CGP-NEXT: v_mul_lo_u32 v19, v10, v12 @@ -1424,7 +1426,7 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v17, v14 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v17, vcc, v18, v17 -; CGP-NEXT: v_mul_hi_u32 v18, v5, v12 +; CGP-NEXT: v_mul_hi_u32 v18, v6, v12 ; CGP-NEXT: v_mul_hi_u32 v12, v10, v12 ; CGP-NEXT: v_add_i32_e32 v16, vcc, v19, v16 ; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc @@ -1439,38 +1441,38 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e32 v15, vcc, v18, v17 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v14 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v11, vcc -; CGP-NEXT: v_mul_lo_u32 v11, s6, v4 -; CGP-NEXT: v_mul_lo_u32 v13, -1, v4 -; CGP-NEXT: v_mul_hi_u32 v14, s6, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v16 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v13 +; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc +; CGP-NEXT: v_mul_lo_u32 v11, s6, v5 +; CGP-NEXT: v_mul_lo_u32 v13, -1, v5 +; CGP-NEXT: v_mul_hi_u32 v14, s6, v5 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v16 ; CGP-NEXT: v_addc_u32_e32 v10, vcc, v10, v12, vcc -; CGP-NEXT: v_mul_lo_u32 v12, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v15, -1, v5 -; CGP-NEXT: v_mul_hi_u32 v16, s6, v5 -; CGP-NEXT: v_mul_lo_u32 v17, s6, v6 -; CGP-NEXT: v_mul_lo_u32 v18, v6, v11 -; CGP-NEXT: v_mul_hi_u32 v19, v4, v11 -; CGP-NEXT: v_mul_hi_u32 v11, v6, v11 +; CGP-NEXT: v_mul_lo_u32 v12, s6, v6 +; CGP-NEXT: v_mul_lo_u32 v15, -1, v6 +; CGP-NEXT: v_mul_hi_u32 v16, s6, v6 +; CGP-NEXT: v_mul_lo_u32 v17, s6, v7 +; CGP-NEXT: v_mul_lo_u32 v18, v7, v11 +; CGP-NEXT: v_mul_hi_u32 v19, v5, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v7, v11 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17 ; CGP-NEXT: v_mul_lo_u32 v17, s6, v10 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 ; CGP-NEXT: v_mul_lo_u32 v17, v10, v12 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; CGP-NEXT: v_mul_hi_u32 v14, v5, v12 +; CGP-NEXT: v_mul_hi_u32 v14, v6, v12 ; CGP-NEXT: v_mul_hi_u32 v12, v10, v12 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16 -; CGP-NEXT: v_mul_lo_u32 v16, v5, v15 +; CGP-NEXT: v_mul_lo_u32 v16, v6, v15 ; CGP-NEXT: v_add_i32_e32 v16, vcc, v17, v16 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 -; CGP-NEXT: v_mul_lo_u32 v14, v4, v13 -; CGP-NEXT: v_mul_lo_u32 v16, v6, v13 +; CGP-NEXT: v_mul_lo_u32 v14, v5, v13 +; CGP-NEXT: v_mul_lo_u32 v16, v7, v13 ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v18, v14 ; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v14, v19 -; CGP-NEXT: v_mul_hi_u32 v14, v4, v13 +; CGP-NEXT: v_mul_hi_u32 v14, v5, v13 ; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] ; CGP-NEXT: v_add_i32_e64 v18, s[4:5], v18, v19 ; CGP-NEXT: v_mul_lo_u32 v19, v10, v15 @@ -1481,125 +1483,126 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v16, v14 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v16, vcc, v17, v16 -; CGP-NEXT: v_mul_hi_u32 v17, v5, v15 +; CGP-NEXT: v_mul_hi_u32 v17, v6, v15 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v19, v12 ; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v17 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v17, vcc, v19, v17 -; CGP-NEXT: v_mov_b32_e32 v19, s9 -; CGP-NEXT: v_mul_hi_u32 v13, v6, v13 -; CGP-NEXT: v_mul_hi_u32 v15, v10, v15 +; CGP-NEXT: v_mov_b32_e32 v19, s7 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v18 ; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v18 +; CGP-NEXT: v_mov_b32_e32 v18, s9 +; CGP-NEXT: v_mul_hi_u32 v13, v7, v13 +; CGP-NEXT: v_mul_hi_u32 v15, v10, v15 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v18 ; CGP-NEXT: v_add_i32_e32 v16, vcc, v17, v16 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v16 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 -; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v13, vcc -; CGP-NEXT: v_mul_lo_u32 v11, v3, v4 -; CGP-NEXT: v_mul_hi_u32 v13, v2, v4 -; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12 -; CGP-NEXT: v_addc_u32_e32 v10, vcc, v10, v14, vcc -; CGP-NEXT: v_mul_lo_u32 v12, v1, v5 -; CGP-NEXT: v_mul_hi_u32 v14, v0, v5 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v11 +; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v13, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v1, v5 +; CGP-NEXT: v_mul_hi_u32 v13, v0, v5 ; CGP-NEXT: v_mul_hi_u32 v5, v1, v5 -; CGP-NEXT: v_mul_lo_u32 v15, v2, v6 -; CGP-NEXT: v_mul_lo_u32 v16, v3, v6 -; CGP-NEXT: v_mul_hi_u32 v17, v2, v6 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v12 +; CGP-NEXT: v_addc_u32_e32 v10, vcc, v10, v14, vcc +; CGP-NEXT: v_mul_lo_u32 v12, v3, v6 +; CGP-NEXT: v_mul_hi_u32 v14, v2, v6 ; CGP-NEXT: v_mul_hi_u32 v6, v3, v6 -; CGP-NEXT: v_mul_lo_u32 v18, v0, v10 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v18 -; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; CGP-NEXT: v_mul_lo_u32 v12, v1, v10 -; CGP-NEXT: v_mul_hi_u32 v14, v0, v10 -; CGP-NEXT: v_mul_hi_u32 v10, v1, v10 -; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 -; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] -; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v16, v4 -; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] -; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v12, v5 +; CGP-NEXT: v_mul_lo_u32 v15, v0, v7 +; CGP-NEXT: v_mul_lo_u32 v16, v1, v7 +; CGP-NEXT: v_mul_hi_u32 v17, v0, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; CGP-NEXT: v_mul_lo_u32 v11, v2, v10 +; CGP-NEXT: v_mul_lo_u32 v13, v3, v10 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] -; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v13 -; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] -; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v4, v17 -; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; CGP-NEXT: v_mul_hi_u32 v11, v2, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v3, v10 +; CGP-NEXT: v_add_i32_e64 v5, s[6:7], v16, v5 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v6, s[6:7], v13, v6 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[6:7] +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v17 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v14 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11 -; CGP-NEXT: v_add_i32_e32 v13, vcc, v16, v13 -; CGP-NEXT: v_add_i32_e32 v15, vcc, v18, v17 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v11 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc -; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v15 -; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 -; CGP-NEXT: v_mul_lo_u32 v13, s8, v4 -; CGP-NEXT: v_mul_lo_u32 v15, 0, v4 -; CGP-NEXT: v_mul_hi_u32 v4, s8, v4 -; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v15 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v12 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 ; CGP-NEXT: v_mul_lo_u32 v14, s8, v5 -; CGP-NEXT: v_mul_lo_u32 v16, 0, v5 +; CGP-NEXT: v_mul_lo_u32 v15, 0, v5 ; CGP-NEXT: v_mul_hi_u32 v5, s8, v5 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v11 -; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 -; CGP-NEXT: v_mul_lo_u32 v6, s8, v6 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; CGP-NEXT: v_mul_lo_u32 v12, s8, v6 +; CGP-NEXT: v_mul_lo_u32 v16, 0, v6 +; CGP-NEXT: v_mul_hi_u32 v6, s8, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v13 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; CGP-NEXT: v_mul_lo_u32 v7, s8, v7 ; CGP-NEXT: v_mul_lo_u32 v10, s8, v10 -; CGP-NEXT: v_add_i32_e32 v6, vcc, v15, v6 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v15, v7 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v16, v10 -; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4 -; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5 -; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v13 -; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v3, v4, vcc -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v2 -; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] -; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v14 -; CGP-NEXT: v_subb_u32_e64 v10, s[6:7], v1, v5, s[4:5] -; CGP-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v5 -; CGP-NEXT: v_cmp_le_u32_e64 s[6:7], s8, v0 -; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[6:7] -; CGP-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v6 -; CGP-NEXT: v_cndmask_b32_e64 v4, v9, v4, s[6:7] -; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v14 +; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v1, v5, vcc +; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v5 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v12 +; CGP-NEXT: v_subb_u32_e64 v10, s[6:7], v3, v6, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v6 +; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v4 +; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[6:7] +; CGP-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7 +; CGP-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[6:7] +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10 -; CGP-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc -; CGP-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] -; CGP-NEXT: v_subrev_i32_e32 v7, vcc, s8, v2 -; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v7 -; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc -; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s8, v0 +; CGP-NEXT: v_cndmask_b32_e32 v6, v19, v6, vcc +; CGP-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v0, v4 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v11 -; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; CGP-NEXT: v_cndmask_b32_e32 v9, v19, v9, vcc -; CGP-NEXT: v_subrev_i32_e32 v13, vcc, s8, v7 -; CGP-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; CGP-NEXT: v_sub_i32_e32 v12, vcc, v2, v4 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v12, v4 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc -; CGP-NEXT: v_subrev_i32_e32 v12, vcc, s8, v11 -; CGP-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v1, vcc +; CGP-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc +; CGP-NEXT: v_sub_i32_e32 v11, vcc, v8, v4 +; CGP-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e32 v13, v18, v13, vcc +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v12, v4 +; CGP-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v3, vcc ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 -; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v13, vcc -; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v8 -; CGP-NEXT: v_cndmask_b32_e64 v8, v11, v12, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v14, vcc -; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 -; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc -; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v15, s[4:5] -; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5 -; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v8, s[4:5] -; CGP-NEXT: v_cndmask_b32_e64 v1, v10, v1, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v11, vcc +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v13 +; CGP-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v14, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc +; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v15, s[4:5] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v6 +; CGP-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v3, v10, v3, s[4:5] ; CGP-NEXT: s_setpc_b64 s[30:31] %result = urem <2 x i64> %num, ret <2 x i64> %result diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-atomics.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-atomics.ll index e04eec3e3ab7..b27943195857 100644 --- a/llvm/test/CodeGen/AMDGPU/fp-min-max-atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/fp-min-max-atomics.ll @@ -428,30 +428,31 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f32_off4_slc(<4 x i32> inre ; ; G_GFX7-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: ; G_GFX7: ; %bb.0: ; %main_body -; G_GFX7-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0xd +; G_GFX7-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd ; G_GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 -; G_GFX7-NEXT: s_load_dword s2, s[0:1], 0xf +; G_GFX7-NEXT: s_load_dword s0, s[0:1], 0xf ; G_GFX7-NEXT: s_mov_b32 m0, -1 ; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX7-NEXT: v_mov_b32_e32 v0, s12 -; G_GFX7-NEXT: v_mov_b32_e32 v1, s13 +; G_GFX7-NEXT: v_mov_b32_e32 v0, s2 +; G_GFX7-NEXT: v_mov_b32_e32 v1, s3 ; G_GFX7-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc -; G_GFX7-NEXT: v_mov_b32_e32 v1, s2 +; G_GFX7-NEXT: v_mov_b32_e32 v1, s0 ; G_GFX7-NEXT: s_waitcnt vmcnt(0) ; G_GFX7-NEXT: ds_write_b32 v1, v0 ; G_GFX7-NEXT: s_endpgm ; ; G_GFX10-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: ; G_GFX10: ; %bb.0: ; %main_body -; G_GFX10-NEXT: s_clause 0x2 -; G_GFX10-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x34 +; G_GFX10-NEXT: s_clause 0x1 +; G_GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 ; G_GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; G_GFX10-NEXT: s_load_dword s2, s[0:1], 0x3c ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX10-NEXT: v_mov_b32_e32 v0, s12 -; G_GFX10-NEXT: v_mov_b32_e32 v1, s13 +; G_GFX10-NEXT: v_mov_b32_e32 v0, s2 +; G_GFX10-NEXT: v_mov_b32_e32 v1, s3 +; G_GFX10-NEXT: s_load_dword s0, s[0:1], 0x3c ; G_GFX10-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc -; G_GFX10-NEXT: v_mov_b32_e32 v1, s2 +; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) +; G_GFX10-NEXT: v_mov_b32_e32 v1, s0 ; G_GFX10-NEXT: s_waitcnt vmcnt(0) ; G_GFX10-NEXT: ds_write_b32 v1, v0 ; G_GFX10-NEXT: s_endpgm @@ -459,14 +460,14 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f32_off4_slc(<4 x i32> inre ; G_GFX1030-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc: ; G_GFX1030: ; %bb.0: ; %main_body ; G_GFX1030-NEXT: s_clause 0x2 -; G_GFX1030-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x34 +; G_GFX1030-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 ; G_GFX1030-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 -; G_GFX1030-NEXT: s_load_dword s2, s[0:1], 0x3c +; G_GFX1030-NEXT: s_load_dword s0, s[0:1], 0x3c ; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX1030-NEXT: v_mov_b32_e32 v0, s12 -; G_GFX1030-NEXT: v_mov_b32_e32 v1, s13 +; G_GFX1030-NEXT: v_mov_b32_e32 v0, s2 +; G_GFX1030-NEXT: v_mov_b32_e32 v1, s3 ; G_GFX1030-NEXT: buffer_atomic_fmin v0, v1, s[4:7], 4 offen glc slc -; G_GFX1030-NEXT: v_mov_b32_e32 v1, s2 +; G_GFX1030-NEXT: v_mov_b32_e32 v1, s0 ; G_GFX1030-NEXT: s_waitcnt vmcnt(0) ; G_GFX1030-NEXT: ds_write_b32 v1, v0 ; G_GFX1030-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll index e879fce3f4af..91a403ff983a 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll @@ -244,25 +244,25 @@ define amdgpu_kernel void @lds_ds_fmin(float addrspace(5)* %out, float addrspace ; G_GFX9: ; %bb.0: ; G_GFX9-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX9-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; G_GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c -; G_GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24 ; G_GFX9-NEXT: s_mov_b32 s10, -1 ; G_GFX9-NEXT: s_mov_b32 s11, 0xe00000 ; G_GFX9-NEXT: s_add_u32 s8, s8, s3 +; G_GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c +; G_GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; G_GFX9-NEXT: s_addc_u32 s9, s9, 0 -; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX9-NEXT: s_add_i32 s0, s2, 4 -; G_GFX9-NEXT: s_lshl_b32 s1, s0, 3 -; G_GFX9-NEXT: v_mov_b32_e32 v0, s1 ; G_GFX9-NEXT: v_mov_b32_e32 v1, 0x42280000 +; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) +; G_GFX9-NEXT: s_add_i32 s4, s4, 4 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 3 +; G_GFX9-NEXT: v_mov_b32_e32 v0, s0 ; G_GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1 -; G_GFX9-NEXT: s_lshl_b32 s0, s0, 4 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 4 ; G_GFX9-NEXT: v_mov_b32_e32 v2, s0 ; G_GFX9-NEXT: ds_min_rtn_f32 v1, v2, v1 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX9-NEXT: v_mov_b32_e32 v1, s7 +; G_GFX9-NEXT: v_mov_b32_e32 v1, s3 ; G_GFX9-NEXT: ds_min_rtn_f32 v0, v1, v0 -; G_GFX9-NEXT: v_mov_b32_e32 v1, s6 +; G_GFX9-NEXT: v_mov_b32_e32 v1, s2 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen ; G_GFX9-NEXT: s_endpgm @@ -533,25 +533,25 @@ define amdgpu_kernel void @lds_ds_fmax(float addrspace(5)* %out, float addrspace ; G_GFX9: ; %bb.0: ; G_GFX9-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX9-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; G_GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c -; G_GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24 ; G_GFX9-NEXT: s_mov_b32 s10, -1 ; G_GFX9-NEXT: s_mov_b32 s11, 0xe00000 ; G_GFX9-NEXT: s_add_u32 s8, s8, s3 +; G_GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c +; G_GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; G_GFX9-NEXT: s_addc_u32 s9, s9, 0 -; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX9-NEXT: s_add_i32 s0, s2, 4 -; G_GFX9-NEXT: s_lshl_b32 s1, s0, 3 -; G_GFX9-NEXT: v_mov_b32_e32 v0, s1 ; G_GFX9-NEXT: v_mov_b32_e32 v1, 0x42280000 +; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) +; G_GFX9-NEXT: s_add_i32 s4, s4, 4 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 3 +; G_GFX9-NEXT: v_mov_b32_e32 v0, s0 ; G_GFX9-NEXT: ds_max_rtn_f32 v0, v0, v1 -; G_GFX9-NEXT: s_lshl_b32 s0, s0, 4 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 4 ; G_GFX9-NEXT: v_mov_b32_e32 v2, s0 ; G_GFX9-NEXT: ds_max_rtn_f32 v1, v2, v1 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX9-NEXT: v_mov_b32_e32 v1, s7 +; G_GFX9-NEXT: v_mov_b32_e32 v1, s3 ; G_GFX9-NEXT: ds_max_rtn_f32 v0, v1, v0 -; G_GFX9-NEXT: v_mov_b32_e32 v1, s6 +; G_GFX9-NEXT: v_mov_b32_e32 v1, s2 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX9-NEXT: buffer_store_dword v0, v1, s[8:11], 0 offen ; G_GFX9-NEXT: s_endpgm @@ -800,23 +800,23 @@ define amdgpu_kernel void @lds_ds_fmin_f64(double addrspace(5)* %out, double add ; G_GFX7: ; %bb.0: ; G_GFX7-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX7-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; G_GFX7-NEXT: s_load_dword s2, s[0:1], 0xb +; G_GFX7-NEXT: s_load_dword s4, s[0:1], 0xb ; G_GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; G_GFX7-NEXT: s_mov_b32 s10, -1 ; G_GFX7-NEXT: s_mov_b32 s11, 0xe8f000 ; G_GFX7-NEXT: s_add_u32 s8, s8, s3 +; G_GFX7-NEXT: s_mov_b32 s2, 0 ; G_GFX7-NEXT: s_addc_u32 s9, s9, 0 -; G_GFX7-NEXT: s_mov_b32 s4, 0 +; G_GFX7-NEXT: s_mov_b32 s3, 0x40450000 +; G_GFX7-NEXT: v_mov_b32_e32 v0, s2 ; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX7-NEXT: s_add_i32 s2, s2, 4 -; G_GFX7-NEXT: s_mov_b32 s5, 0x40450000 -; G_GFX7-NEXT: v_mov_b32_e32 v0, s4 -; G_GFX7-NEXT: s_lshl_b32 s3, s2, 3 -; G_GFX7-NEXT: v_mov_b32_e32 v1, s5 -; G_GFX7-NEXT: v_mov_b32_e32 v2, s3 +; G_GFX7-NEXT: s_add_i32 s4, s4, 4 +; G_GFX7-NEXT: v_mov_b32_e32 v1, s3 +; G_GFX7-NEXT: s_lshl_b32 s2, s4, 3 +; G_GFX7-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX7-NEXT: s_mov_b32 m0, -1 ; G_GFX7-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] -; G_GFX7-NEXT: s_lshl_b32 s2, s2, 4 +; G_GFX7-NEXT: s_lshl_b32 s2, s4, 4 ; G_GFX7-NEXT: v_mov_b32_e32 v4, s2 ; G_GFX7-NEXT: ds_min_rtn_f64 v[0:1], v4, v[0:1] ; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) @@ -834,23 +834,23 @@ define amdgpu_kernel void @lds_ds_fmin_f64(double addrspace(5)* %out, double add ; G_VI: ; %bb.0: ; G_VI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0 ; G_VI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1 -; G_VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; G_VI-NEXT: s_load_dword s4, s[0:1], 0x2c ; G_VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; G_VI-NEXT: s_mov_b32 s90, -1 ; G_VI-NEXT: s_mov_b32 s91, 0xe80000 ; G_VI-NEXT: s_add_u32 s88, s88, s3 +; G_VI-NEXT: s_mov_b32 s2, 0 ; G_VI-NEXT: s_addc_u32 s89, s89, 0 -; G_VI-NEXT: s_mov_b32 s4, 0 +; G_VI-NEXT: s_mov_b32 s3, 0x40450000 +; G_VI-NEXT: v_mov_b32_e32 v0, s2 ; G_VI-NEXT: s_waitcnt lgkmcnt(0) -; G_VI-NEXT: s_add_i32 s2, s2, 4 -; G_VI-NEXT: s_mov_b32 s5, 0x40450000 -; G_VI-NEXT: v_mov_b32_e32 v0, s4 -; G_VI-NEXT: s_lshl_b32 s3, s2, 3 -; G_VI-NEXT: v_mov_b32_e32 v1, s5 -; G_VI-NEXT: v_mov_b32_e32 v2, s3 +; G_VI-NEXT: s_add_i32 s4, s4, 4 +; G_VI-NEXT: v_mov_b32_e32 v1, s3 +; G_VI-NEXT: s_lshl_b32 s2, s4, 3 +; G_VI-NEXT: v_mov_b32_e32 v2, s2 ; G_VI-NEXT: s_mov_b32 m0, -1 ; G_VI-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] -; G_VI-NEXT: s_lshl_b32 s2, s2, 4 +; G_VI-NEXT: s_lshl_b32 s2, s4, 4 ; G_VI-NEXT: v_mov_b32_e32 v4, s2 ; G_VI-NEXT: ds_min_rtn_f64 v[0:1], v4, v[0:1] ; G_VI-NEXT: s_waitcnt lgkmcnt(0) @@ -868,28 +868,28 @@ define amdgpu_kernel void @lds_ds_fmin_f64(double addrspace(5)* %out, double add ; G_GFX9: ; %bb.0: ; G_GFX9-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX9-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; G_GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c -; G_GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24 ; G_GFX9-NEXT: s_mov_b32 s10, -1 ; G_GFX9-NEXT: s_mov_b32 s11, 0xe00000 -; G_GFX9-NEXT: s_mov_b32 s0, 0 ; G_GFX9-NEXT: s_add_u32 s8, s8, s3 +; G_GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c +; G_GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; G_GFX9-NEXT: s_mov_b32 s0, 0 +; G_GFX9-NEXT: s_addc_u32 s9, s9, 0 ; G_GFX9-NEXT: s_mov_b32 s1, 0x40450000 ; G_GFX9-NEXT: v_mov_b32_e32 v0, s0 -; G_GFX9-NEXT: s_addc_u32 s9, s9, 0 -; G_GFX9-NEXT: v_mov_b32_e32 v1, s1 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX9-NEXT: s_add_i32 s0, s2, 4 -; G_GFX9-NEXT: s_lshl_b32 s1, s0, 3 -; G_GFX9-NEXT: v_mov_b32_e32 v2, s1 +; G_GFX9-NEXT: s_add_i32 s4, s4, 4 +; G_GFX9-NEXT: v_mov_b32_e32 v1, s1 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 3 +; G_GFX9-NEXT: v_mov_b32_e32 v2, s0 ; G_GFX9-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] -; G_GFX9-NEXT: s_lshl_b32 s0, s0, 4 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 4 ; G_GFX9-NEXT: v_mov_b32_e32 v5, s0 -; G_GFX9-NEXT: v_mov_b32_e32 v4, s7 +; G_GFX9-NEXT: v_mov_b32_e32 v4, s3 ; G_GFX9-NEXT: ds_min_rtn_f64 v[0:1], v5, v[0:1] ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX9-NEXT: ds_min_rtn_f64 v[0:1], v4, v[2:3] -; G_GFX9-NEXT: v_mov_b32_e32 v2, s6 +; G_GFX9-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX9-NEXT: buffer_store_dword v0, v2, s[8:11], 0 offen ; G_GFX9-NEXT: buffer_store_dword v1, v2, s[8:11], 0 offen offset:4 @@ -897,31 +897,31 @@ define amdgpu_kernel void @lds_ds_fmin_f64(double addrspace(5)* %out, double add ; ; G_GFX10-LABEL: lds_ds_fmin_f64: ; G_GFX10: ; %bb.0: -; G_GFX10-NEXT: s_clause 0x1 -; G_GFX10-NEXT: s_load_dword s2, s[0:1], 0x2c -; G_GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24 ; G_GFX10-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX10-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 ; G_GFX10-NEXT: s_mov_b32 s10, -1 ; G_GFX10-NEXT: s_mov_b32 s11, 0x31c16000 ; G_GFX10-NEXT: s_add_u32 s8, s8, s3 +; G_GFX10-NEXT: s_clause 0x1 +; G_GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c +; G_GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; G_GFX10-NEXT: s_addc_u32 s9, s9, 0 ; G_GFX10-NEXT: s_mov_b32 s0, 0 ; G_GFX10-NEXT: s_mov_b32 s1, 0x40450000 ; G_GFX10-NEXT: v_mov_b32_e32 v0, s0 ; G_GFX10-NEXT: v_mov_b32_e32 v1, s1 ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX10-NEXT: s_add_i32 s2, s2, 4 -; G_GFX10-NEXT: v_mov_b32_e32 v5, s7 -; G_GFX10-NEXT: s_lshl_b32 s3, s2, 3 -; G_GFX10-NEXT: s_lshl_b32 s0, s2, 4 -; G_GFX10-NEXT: v_mov_b32_e32 v2, s3 +; G_GFX10-NEXT: s_add_i32 s4, s4, 4 +; G_GFX10-NEXT: v_mov_b32_e32 v5, s3 +; G_GFX10-NEXT: s_lshl_b32 s5, s4, 3 +; G_GFX10-NEXT: s_lshl_b32 s0, s4, 4 +; G_GFX10-NEXT: v_mov_b32_e32 v2, s5 ; G_GFX10-NEXT: v_mov_b32_e32 v4, s0 ; G_GFX10-NEXT: ds_min_rtn_f64 v[2:3], v2, v[0:1] ; G_GFX10-NEXT: ds_min_rtn_f64 v[0:1], v4, v[0:1] ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX10-NEXT: ds_min_rtn_f64 v[0:1], v5, v[2:3] -; G_GFX10-NEXT: v_mov_b32_e32 v2, s6 +; G_GFX10-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX10-NEXT: buffer_store_dword v0, v2, s[8:11], 0 offen ; G_GFX10-NEXT: buffer_store_dword v1, v2, s[8:11], 0 offen offset:4 @@ -1143,23 +1143,23 @@ define amdgpu_kernel void @lds_ds_fmax_f64(double addrspace(5)* %out, double add ; G_GFX7: ; %bb.0: ; G_GFX7-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX7-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; G_GFX7-NEXT: s_load_dword s2, s[0:1], 0xb +; G_GFX7-NEXT: s_load_dword s4, s[0:1], 0xb ; G_GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; G_GFX7-NEXT: s_mov_b32 s10, -1 ; G_GFX7-NEXT: s_mov_b32 s11, 0xe8f000 ; G_GFX7-NEXT: s_add_u32 s8, s8, s3 +; G_GFX7-NEXT: s_mov_b32 s2, 0 ; G_GFX7-NEXT: s_addc_u32 s9, s9, 0 -; G_GFX7-NEXT: s_mov_b32 s4, 0 +; G_GFX7-NEXT: s_mov_b32 s3, 0x40450000 +; G_GFX7-NEXT: v_mov_b32_e32 v0, s2 ; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX7-NEXT: s_add_i32 s2, s2, 4 -; G_GFX7-NEXT: s_mov_b32 s5, 0x40450000 -; G_GFX7-NEXT: v_mov_b32_e32 v0, s4 -; G_GFX7-NEXT: s_lshl_b32 s3, s2, 3 -; G_GFX7-NEXT: v_mov_b32_e32 v1, s5 -; G_GFX7-NEXT: v_mov_b32_e32 v2, s3 +; G_GFX7-NEXT: s_add_i32 s4, s4, 4 +; G_GFX7-NEXT: v_mov_b32_e32 v1, s3 +; G_GFX7-NEXT: s_lshl_b32 s2, s4, 3 +; G_GFX7-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX7-NEXT: s_mov_b32 m0, -1 ; G_GFX7-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] -; G_GFX7-NEXT: s_lshl_b32 s2, s2, 4 +; G_GFX7-NEXT: s_lshl_b32 s2, s4, 4 ; G_GFX7-NEXT: v_mov_b32_e32 v4, s2 ; G_GFX7-NEXT: ds_max_rtn_f64 v[0:1], v4, v[0:1] ; G_GFX7-NEXT: s_waitcnt lgkmcnt(0) @@ -1177,23 +1177,23 @@ define amdgpu_kernel void @lds_ds_fmax_f64(double addrspace(5)* %out, double add ; G_VI: ; %bb.0: ; G_VI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0 ; G_VI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1 -; G_VI-NEXT: s_load_dword s2, s[0:1], 0x2c +; G_VI-NEXT: s_load_dword s4, s[0:1], 0x2c ; G_VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; G_VI-NEXT: s_mov_b32 s90, -1 ; G_VI-NEXT: s_mov_b32 s91, 0xe80000 ; G_VI-NEXT: s_add_u32 s88, s88, s3 +; G_VI-NEXT: s_mov_b32 s2, 0 ; G_VI-NEXT: s_addc_u32 s89, s89, 0 -; G_VI-NEXT: s_mov_b32 s4, 0 +; G_VI-NEXT: s_mov_b32 s3, 0x40450000 +; G_VI-NEXT: v_mov_b32_e32 v0, s2 ; G_VI-NEXT: s_waitcnt lgkmcnt(0) -; G_VI-NEXT: s_add_i32 s2, s2, 4 -; G_VI-NEXT: s_mov_b32 s5, 0x40450000 -; G_VI-NEXT: v_mov_b32_e32 v0, s4 -; G_VI-NEXT: s_lshl_b32 s3, s2, 3 -; G_VI-NEXT: v_mov_b32_e32 v1, s5 -; G_VI-NEXT: v_mov_b32_e32 v2, s3 +; G_VI-NEXT: s_add_i32 s4, s4, 4 +; G_VI-NEXT: v_mov_b32_e32 v1, s3 +; G_VI-NEXT: s_lshl_b32 s2, s4, 3 +; G_VI-NEXT: v_mov_b32_e32 v2, s2 ; G_VI-NEXT: s_mov_b32 m0, -1 ; G_VI-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] -; G_VI-NEXT: s_lshl_b32 s2, s2, 4 +; G_VI-NEXT: s_lshl_b32 s2, s4, 4 ; G_VI-NEXT: v_mov_b32_e32 v4, s2 ; G_VI-NEXT: ds_max_rtn_f64 v[0:1], v4, v[0:1] ; G_VI-NEXT: s_waitcnt lgkmcnt(0) @@ -1211,28 +1211,28 @@ define amdgpu_kernel void @lds_ds_fmax_f64(double addrspace(5)* %out, double add ; G_GFX9: ; %bb.0: ; G_GFX9-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX9-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 -; G_GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c -; G_GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24 ; G_GFX9-NEXT: s_mov_b32 s10, -1 ; G_GFX9-NEXT: s_mov_b32 s11, 0xe00000 -; G_GFX9-NEXT: s_mov_b32 s0, 0 ; G_GFX9-NEXT: s_add_u32 s8, s8, s3 +; G_GFX9-NEXT: s_load_dword s4, s[0:1], 0x2c +; G_GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; G_GFX9-NEXT: s_mov_b32 s0, 0 +; G_GFX9-NEXT: s_addc_u32 s9, s9, 0 ; G_GFX9-NEXT: s_mov_b32 s1, 0x40450000 ; G_GFX9-NEXT: v_mov_b32_e32 v0, s0 -; G_GFX9-NEXT: s_addc_u32 s9, s9, 0 -; G_GFX9-NEXT: v_mov_b32_e32 v1, s1 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX9-NEXT: s_add_i32 s0, s2, 4 -; G_GFX9-NEXT: s_lshl_b32 s1, s0, 3 -; G_GFX9-NEXT: v_mov_b32_e32 v2, s1 +; G_GFX9-NEXT: s_add_i32 s4, s4, 4 +; G_GFX9-NEXT: v_mov_b32_e32 v1, s1 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 3 +; G_GFX9-NEXT: v_mov_b32_e32 v2, s0 ; G_GFX9-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] -; G_GFX9-NEXT: s_lshl_b32 s0, s0, 4 +; G_GFX9-NEXT: s_lshl_b32 s0, s4, 4 ; G_GFX9-NEXT: v_mov_b32_e32 v5, s0 -; G_GFX9-NEXT: v_mov_b32_e32 v4, s7 +; G_GFX9-NEXT: v_mov_b32_e32 v4, s3 ; G_GFX9-NEXT: ds_max_rtn_f64 v[0:1], v5, v[0:1] ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX9-NEXT: ds_max_rtn_f64 v[0:1], v4, v[2:3] -; G_GFX9-NEXT: v_mov_b32_e32 v2, s6 +; G_GFX9-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX9-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX9-NEXT: buffer_store_dword v0, v2, s[8:11], 0 offen ; G_GFX9-NEXT: buffer_store_dword v1, v2, s[8:11], 0 offen offset:4 @@ -1240,31 +1240,31 @@ define amdgpu_kernel void @lds_ds_fmax_f64(double addrspace(5)* %out, double add ; ; G_GFX10-LABEL: lds_ds_fmax_f64: ; G_GFX10: ; %bb.0: -; G_GFX10-NEXT: s_clause 0x1 -; G_GFX10-NEXT: s_load_dword s2, s[0:1], 0x2c -; G_GFX10-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x24 ; G_GFX10-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 ; G_GFX10-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 ; G_GFX10-NEXT: s_mov_b32 s10, -1 ; G_GFX10-NEXT: s_mov_b32 s11, 0x31c16000 ; G_GFX10-NEXT: s_add_u32 s8, s8, s3 +; G_GFX10-NEXT: s_clause 0x1 +; G_GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c +; G_GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; G_GFX10-NEXT: s_addc_u32 s9, s9, 0 ; G_GFX10-NEXT: s_mov_b32 s0, 0 ; G_GFX10-NEXT: s_mov_b32 s1, 0x40450000 ; G_GFX10-NEXT: v_mov_b32_e32 v0, s0 ; G_GFX10-NEXT: v_mov_b32_e32 v1, s1 ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) -; G_GFX10-NEXT: s_add_i32 s2, s2, 4 -; G_GFX10-NEXT: v_mov_b32_e32 v5, s7 -; G_GFX10-NEXT: s_lshl_b32 s3, s2, 3 -; G_GFX10-NEXT: s_lshl_b32 s0, s2, 4 -; G_GFX10-NEXT: v_mov_b32_e32 v2, s3 +; G_GFX10-NEXT: s_add_i32 s4, s4, 4 +; G_GFX10-NEXT: v_mov_b32_e32 v5, s3 +; G_GFX10-NEXT: s_lshl_b32 s5, s4, 3 +; G_GFX10-NEXT: s_lshl_b32 s0, s4, 4 +; G_GFX10-NEXT: v_mov_b32_e32 v2, s5 ; G_GFX10-NEXT: v_mov_b32_e32 v4, s0 ; G_GFX10-NEXT: ds_max_rtn_f64 v[2:3], v2, v[0:1] ; G_GFX10-NEXT: ds_max_rtn_f64 v[0:1], v4, v[0:1] ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX10-NEXT: ds_max_rtn_f64 v[0:1], v5, v[2:3] -; G_GFX10-NEXT: v_mov_b32_e32 v2, s6 +; G_GFX10-NEXT: v_mov_b32_e32 v2, s2 ; G_GFX10-NEXT: s_waitcnt lgkmcnt(0) ; G_GFX10-NEXT: buffer_store_dword v0, v2, s[8:11], 0 offen ; G_GFX10-NEXT: buffer_store_dword v1, v2, s[8:11], 0 offen offset:4 diff --git a/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll b/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll index ae0c8b57b064..4c55eddcb4e7 100644 --- a/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll +++ b/llvm/test/CodeGen/AMDGPU/twoaddr-constrain.ll @@ -13,15 +13,12 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(<3 x i32> addrspace(4)* ; CHECK-NEXT: %0.sub1:sreg_64 = COPY killed [[COPY1]] ; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (load (<2 x s32>) from %ir.ptr, align 4, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM killed %0, 8, 0 :: (load (s32) from %ir.ptr + 8, addrspace 4) - ; CHECK-NEXT: undef %16.sub0_sub1:sgpr_96_with_sub0_sub1 = COPY killed [[S_LOAD_DWORDX2_IMM]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_96_with_sub0_sub1 = COPY killed %16 - ; CHECK-NEXT: [[COPY2]].sub2:sgpr_96_with_sub0_sub1 = COPY undef [[S_LOAD_DWORD_IMM]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY2]].sub0 - ; CHECK-NEXT: $sgpr0 = COPY killed [[COPY3]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY killed [[COPY2]].sub1 - ; CHECK-NEXT: $sgpr1 = COPY killed [[COPY4]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORD_IMM]] - ; CHECK-NEXT: $sgpr2 = COPY killed [[COPY5]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[S_LOAD_DWORDX2_IMM]].sub0 + ; CHECK-NEXT: $sgpr0 = COPY killed [[COPY2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORDX2_IMM]].sub1 + ; CHECK-NEXT: $sgpr1 = COPY killed [[COPY3]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY killed [[S_LOAD_DWORD_IMM]] + ; CHECK-NEXT: $sgpr2 = COPY killed [[COPY4]] ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit killed $sgpr0, implicit killed $sgpr1, implicit killed $sgpr2 %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 4 ret <3 x i32> %load diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp index 2270e4fb406b..49d2c8329b30 100644 --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -1370,26 +1370,17 @@ TEST_F(AArch64GISelMITest, FewerElementsAnd) { CHECK: [[IMP_DEF0:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF CHECK: [[IMP_DEF1:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF CHECK: [[VALUE0:%[0-9]+]]:_(s32), [[VALUE1:%[0-9]+]]:_(s32), [[VALUE2:%[0-9]+]]:_(s32), [[VALUE3:%[0-9]+]]:_(s32), [[VALUE4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IMP_DEF0]]:_(<5 x s32>) - CHECK: [[IMP_DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF CHECK: [[VECTOR0:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE0]]:_(s32), [[VALUE1]]:_(s32) CHECK: [[VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE2]]:_(s32), [[VALUE3]]:_(s32) - CHECK: [[VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE4]]:_(s32), [[IMP_DEF2]]:_(s32) - CHECK: [[IMP_DEF3:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF CHECK: [[VALUE5:%[0-9]+]]:_(s32), [[VALUE6:%[0-9]+]]:_(s32), [[VALUE7:%[0-9]+]]:_(s32), [[VALUE8:%[0-9]+]]:_(s32), [[VALUE9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[IMP_DEF1]]:_(<5 x s32>) - CHECK: [[IMP_DEF4:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - CHECK: [[VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE5]]:_(s32), [[VALUE6]]:_(s32) - CHECK: [[VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE7]]:_(s32), [[VALUE8]]:_(s32) - CHECK: [[VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE9]]:_(s32), [[IMP_DEF4]]:_(s32) - CHECK: [[IMP_DEF5:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - - CHECK: [[AND0:%[0-9]+]]:_(<2 x s32>) = G_AND [[VECTOR0]]:_, [[VECTOR3]]:_ - CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[VECTOR1]]:_, [[VECTOR4]]:_ - CHECK: [[AND2:%[0-9]+]]:_(<2 x s32>) = G_AND [[VECTOR2]]:_, [[VECTOR5]]:_ - CHECK: [[IMP_DEF6:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - - CHECK: [[VECTOR6:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[AND0]]:_(<2 x s32>), [[AND1]]:_(<2 x s32>), [[AND2]]:_(<2 x s32>), [[IMP_DEF6]]:_(<2 x s32>), [[IMP_DEF6]]:_(<2 x s32>) - CHECK: [[VECTOR7:%[0-9]+]]:_(<10 x s32>) = G_CONCAT_VECTORS [[AND0]]:_(<2 x s32>), [[AND1]]:_(<2 x s32>), [[AND2]]:_(<2 x s32>), [[IMP_DEF6]]:_(<2 x s32>), [[IMP_DEF6]]:_(<2 x s32>) - CHECK: [[VECTOR8:%[0-9]+]]:_(<5 x s32>), [[VECTOR9:%[0-9]+]]:_(<5 x s32>) = G_UNMERGE_VALUES [[VECTOR7]]:_(<10 x s32>) + CHECK: [[VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE5]]:_(s32), [[VALUE6]]:_(s32) + CHECK: [[VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[VALUE7]]:_(s32), [[VALUE8]]:_(s32) + CHECK: [[AND0:%[0-9]+]]:_(<2 x s32>) = G_AND [[VECTOR0]]:_, [[VECTOR2]]:_ + CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[VECTOR1]]:_, [[VECTOR3]]:_ + CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[VALUE4]]:_, [[VALUE9]]:_ + CHECK: [[AND0_E0:%[0-9]+]]:_(s32), [[AND0_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND0]]:_(<2 x s32>) + CHECK: [[AND1_E0:%[0-9]+]]:_(s32), [[AND1_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND1]]:_(<2 x s32>) + CHECK: [[RESULT:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[AND0_E0]]:_(s32), [[AND0_E1]]:_(s32), [[AND1_E0]]:_(s32), [[AND1_E1]]:_(s32), [[AND2]]:_(s32) )"; // Check @@ -1428,12 +1419,19 @@ TEST_F(AArch64GISelMITest, MoreElementsAnd) { auto CheckStr = R"( CHECK: [[BITCAST0:%[0-9]+]]:_(<2 x s32>) = G_BITCAST CHECK: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST - CHECK: [[IMP_DEF0:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - CHECK: [[CONCAT0:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[BITCAST0]]:_(<2 x s32>), [[IMP_DEF0]]:_(<2 x s32>), [[IMP_DEF0]]:_(<2 x s32>) - CHECK: [[IMP_DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - CHECK: [[CONCAT1:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[BITCAST1]]:_(<2 x s32>), [[IMP_DEF1]]:_(<2 x s32>), [[IMP_DEF1]]:_(<2 x s32>) - CHECK: [[AND:%[0-9]+]]:_(<6 x s32>) = G_AND [[CONCAT0]]:_, [[CONCAT1]]:_ - CHECK: (<2 x s32>) = G_UNMERGE_VALUES [[AND]]:_(<6 x s32>) + + CHECK: [[BITCAST0_E0:%[0-9]+]]:_(s32), [[BITCAST0_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST0]]:_(<2 x s32>) + CHECK: [[IMP_DEF0:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + CHECK: [[BITCAST0_LARGE:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[BITCAST0_E0]]:_(s32), [[BITCAST0_E1]]:_(s32), [[IMP_DEF0]]:_(s32), [[IMP_DEF0]]:_(s32), [[IMP_DEF0]]:_(s32), [[IMP_DEF0]]:_(s32) + + CHECK: [[BITCAST1_E0:%[0-9]+]]:_(s32), [[BITCAST1_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]]:_(<2 x s32>) + CHECK: [[IMP_DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + CHECK: [[BITCAST1_LARGE:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[BITCAST1_E0]]:_(s32), [[BITCAST1_E1]]:_(s32), [[IMP_DEF1]]:_(s32), [[IMP_DEF1]]:_(s32), [[IMP_DEF1]]:_(s32), [[IMP_DEF1]]:_(s32) + + CHECK: [[AND:%[0-9]+]]:_(<6 x s32>) = G_AND [[BITCAST0_LARGE]]:_, [[BITCAST1_LARGE]]:_ + + CHECK: [[AND_E0:%[0-9]+]]:_(s32), [[AND_E1:%[0-9]+]]:_(s32), [[AND_E2:%[0-9]+]]:_(s32), [[AND_E3:%[0-9]+]]:_(s32), [[AND_E4:%[0-9]+]]:_(s32), [[AND_E5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]]:_(<6 x s32>) + CHECK: (<2 x s32>) = G_BUILD_VECTOR [[AND_E0]]:_(s32), [[AND_E1]]:_(s32) )"; EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; @@ -1509,26 +1507,26 @@ TEST_F(AArch64GISelMITest, FewerElementsPhi) { auto CheckStr = R"( CHECK: [[INITVAL:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - CHECK: [[EXTRACT0:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[INITVAL]]:_(<5 x s32>), 0 - CHECK: [[EXTRACT1:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[INITVAL]]:_(<5 x s32>), 64 - CHECK: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[INITVAL]]:_(<5 x s32>), 128 + CHECK: [[INITVAL_E0:%[0-9]+]]:_(s32), [[INITVAL_E1:%[0-9]+]]:_(s32), [[INITVAL_E2:%[0-9]+]]:_(s32), [[INITVAL_E3:%[0-9]+]]:_(s32), [[INITVAL_E4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INITVAL]]:_(<5 x s32>) + CHECK: [[INITVAL_E01:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INITVAL_E0]]:_(s32), [[INITVAL_E1]]:_(s32) + CHECK: [[INITVAL_E23:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[INITVAL_E2]]:_(s32), [[INITVAL_E3]]:_(s32) CHECK: G_BRCOND CHECK: [[MIDVAL:%[0-9]+]]:_(<5 x s32>) = G_IMPLICIT_DEF - CHECK: [[EXTRACT3:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[MIDVAL]]:_(<5 x s32>), 0 - CHECK: [[EXTRACT4:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[MIDVAL]]:_(<5 x s32>), 64 - CHECK: [[EXTRACT5:%[0-9]+]]:_(s32) = G_EXTRACT [[MIDVAL]]:_(<5 x s32>), 128 + CHECK: [[MIDVAL_E0:%[0-9]+]]:_(s32), [[MIDVAL_E1:%[0-9]+]]:_(s32), [[MIDVAL_E2:%[0-9]+]]:_(s32), [[MIDVAL_E3:%[0-9]+]]:_(s32), [[MIDVAL_E4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MIDVAL]]:_(<5 x s32>) + CHECK: [[MIDVAL_E01:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MIDVAL_E0]]:_(s32), [[MIDVAL_E1]]:_(s32) + CHECK: [[MIDVAL_E23:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[MIDVAL_E2]]:_(s32), [[MIDVAL_E3]]:_(s32) CHECK: G_BR - CHECK: [[PHI0:%[0-9]+]]:_(<2 x s32>) = G_PHI [[EXTRACT0]]:_(<2 x s32>), %bb.0, [[EXTRACT3]]:_(<2 x s32>), %bb.1 - CHECK: [[PHI1:%[0-9]+]]:_(<2 x s32>) = G_PHI [[EXTRACT1]]:_(<2 x s32>), %bb.0, [[EXTRACT4]]:_(<2 x s32>), %bb.1 - CHECK: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[EXTRACT2]]:_(s32), %bb.0, [[EXTRACT5]]:_(s32), %bb.1 - - CHECK: [[OTHER_PHI:%[0-9]+]]:_(s64) = G_PHI - + CHECK: [[PHI0:%[0-9]+]]:_(<2 x s32>) = G_PHI [[INITVAL_E01]]:_(<2 x s32>), %bb.0, [[MIDVAL_E01]]:_(<2 x s32>), %bb.1 + CHECK: [[PHI1:%[0-9]+]]:_(<2 x s32>) = G_PHI [[INITVAL_E23]]:_(<2 x s32>), %bb.0, [[MIDVAL_E23]]:_(<2 x s32>), %bb.1 + CHECK: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[INITVAL_E4]]:_(s32), %bb.0, [[MIDVAL_E4]]:_(s32), %bb.1 CHECK: [[UNMERGE0:%[0-9]+]]:_(s32), [[UNMERGE1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PHI0]]:_(<2 x s32>) CHECK: [[UNMERGE2:%[0-9]+]]:_(s32), [[UNMERGE3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PHI1]]:_(<2 x s32>) CHECK: [[BV:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[UNMERGE0]]:_(s32), [[UNMERGE1]]:_(s32), [[UNMERGE2]]:_(s32), [[UNMERGE3]]:_(s32), [[PHI2]]:_(s32) + + CHECK: [[OTHER_PHI:%[0-9]+]]:_(s64) = G_PHI + CHECK: [[USE_OP:%[0-9]+]]:_(<5 x s32>) = G_AND [[BV]]:_, [[BV]]:_ )"; @@ -3196,14 +3194,8 @@ TEST_F(AArch64GISelMITest, LowerInsert) { CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]]:_, [[SHL]]:_ CHECK: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[OR]] - CHECK: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[V2S32]] - CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[S32]] - CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT - CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]]:_, [[C]]:_(s64) - CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT - CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[BITCAST]]:_, [[C]]:_ - CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]]:_, [[SHL]]:_ - CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[OR]] + CHECK: [[V2S32_E0:%[0-9]+]]:_(s32), [[V2S32_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[V2S32]] + CHECK: [[BV:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[V2S32_E0]]:_(s32), [[S32]]:_(s32) )"; // Check @@ -3711,20 +3703,22 @@ TEST_F(AArch64GISelMITest, NarrowFreeze) { DefineLegalizerInfo(A, {}); // Make sure that G_FREEZE is narrowed using unmerge/extract - LLT S16{LLT::scalar(16)}; LLT S32{LLT::scalar(32)}; LLT S33{LLT::scalar(33)}; + LLT S48{LLT::scalar(48)}; LLT S64{LLT::scalar(64)}; LLT V2S16{LLT::fixed_vector(2, 16)}; - LLT V2S32{LLT::fixed_vector(2, 32)}; + LLT V3S16{LLT::fixed_vector(3, 16)}; + LLT V4S16{LLT::fixed_vector(4, 16)}; auto Trunc = B.buildTrunc(S33, {Copies[0]}); - auto Vector = B.buildBitcast(V2S32, Copies[0]); + auto Trunc1 = B.buildTrunc(S48, {Copies[0]}); + auto Vector = B.buildBitcast(V3S16, Trunc1); auto FreezeScalar = B.buildInstr(TargetOpcode::G_FREEZE, {S64}, {Copies[0]}); auto FreezeOdd = B.buildInstr(TargetOpcode::G_FREEZE, {S33}, {Trunc}); - auto FreezeVector = B.buildInstr(TargetOpcode::G_FREEZE, {V2S32}, {Vector}); - auto FreezeVector1 = B.buildInstr(TargetOpcode::G_FREEZE, {V2S32}, {Vector}); + auto FreezeVector = B.buildInstr(TargetOpcode::G_FREEZE, {V3S16}, {Vector}); + auto FreezeVector1 = B.buildInstr(TargetOpcode::G_FREEZE, {V3S16}, {Vector}); AInfo Info(MF->getSubtarget()); DummyGISelObserver Observer; @@ -3736,54 +3730,48 @@ TEST_F(AArch64GISelMITest, NarrowFreeze) { EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, Helper.narrowScalar(*FreezeScalar, 0, S32)); + // This should be followed by narrowScalar to S32. B.setInsertPt(*EntryMBB, FreezeOdd->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, - Helper.narrowScalar(*FreezeOdd, 0, S32)); + Helper.widenScalar(*FreezeOdd, 0, S64)); B.setInsertPt(*EntryMBB, FreezeVector->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, - Helper.narrowScalar(*FreezeVector, 0, V2S16)); + Helper.fewerElementsVector(*FreezeVector, 0, V2S16)); + // This should be followed by fewerElements to V2S16. B.setInsertPt(*EntryMBB, FreezeVector1->getIterator()); EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, - Helper.narrowScalar(*FreezeVector1, 0, S16)); + Helper.moreElementsVector(*FreezeVector1, 0, V4S16)); const auto *CheckStr = R"( CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY CHECK: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]] - CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]] + CHECK: [[TRUNC1:%[0-9]+]]:_(s48) = G_TRUNC [[COPY]] + CHECK: [[BITCAST:%[0-9]+]]:_(<3 x s16>) = G_BITCAST [[TRUNC1]] CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]] CHECK: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[UV]] CHECK: [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[UV1]] CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FREEZE]]:_(s32), [[FREEZE1]] - CHECK: (s1) = G_UNMERGE_VALUES [[TRUNC]]:_(s33) - CHECK: [[UNDEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF - CHECK: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES - CHECK: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES - CHECK: [[UNDEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - CHECK: [[FREEZE2:%[0-9]+]]:_(s32) = G_FREEZE [[MV1]] - CHECK: [[FREEZE3:%[0-9]+]]:_(s32) = G_FREEZE [[MV2]] - CHECK: [[UNDEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - CHECK: [[MV3:%[0-9]+]]:_(s1056) = G_MERGE_VALUES [[FREEZE2]]:_(s32), [[FREEZE3]]:_(s32), [[UNDEF2]] - CHECK: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[MV3]] - - CHECK: [[BITCAST1:%[0-9]+]]:_(s64) = G_BITCAST [[BITCAST]] - CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]] - CHECK: [[FREEZE4:%[0-9]+]]:_(s32) = G_FREEZE [[UV2]] - CHECK: [[FREEZE5:%[0-9]+]]:_(s32) = G_FREEZE [[UV3]] - CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FREEZE4]]:_(s32), [[FREEZE5]]:_(s32) - CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[MV4]] - - CHECK: [[BITCAST3:%[0-9]+]]:_(s64) = G_BITCAST [[BITCAST]] - CHECK: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BITCAST3]] - CHECK: [[FREEZE6:%[0-9]+]]:_(s16) = G_FREEZE [[UV4]] - CHECK: [[FREEZE7:%[0-9]+]]:_(s16) = G_FREEZE [[UV5]] - CHECK: [[FREEZE8:%[0-9]+]]:_(s16) = G_FREEZE [[UV6]] - CHECK: [[FREEZE9:%[0-9]+]]:_(s16) = G_FREEZE [[UV7]] - CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FREEZE6]]:_(s16), [[FREEZE7]]:_(s16), [[FREEZE8]]:_(s16), [[FREEZE9]] - CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[MV5]] + CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]] + CHECK: [[FREEZE2:%[0-9]+]]:_(s64) = G_FREEZE [[ANYEXT]] + CHECK: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[FREEZE2]] + + CHECK: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BITCAST]] + CHECK: [[BV:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UV2]]:_(s16), [[UV3]]:_(s16) + CHECK: [[FREEZE3:%[0-9]+]]:_(<2 x s16>) = G_FREEZE [[BV]] + CHECK: [[FREEZE4:%[0-9]+]]:_(s16) = G_FREEZE [[UV4]] + CHECK: [[FREEZE3_E0:%[0-9]+]]:_(s16), [[FREEZE3_E1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[FREEZE3]] + CHECK: [[BV1:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FREEZE3_E0]]:_(s16), [[FREEZE3_E1]]:_(s16), [[FREEZE4]]:_(s16) + + CHECK: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BITCAST]] + CHECK: [[IMP_DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + CHECK: [[BV1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV5]]:_(s16), [[UV6]]:_(s16), [[UV7]]:_(s16), [[IMP_DEF]]:_(s16) + CHECK: [[FREEZE5:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[BV1]] + CHECK: [[FREEZE5_E0:%[0-9]+]]:_(s16), [[FREEZE5_E1:%[0-9]+]]:_(s16), [[FREEZE5_E2:%[0-9]+]]:_(s16), [[FREEZE5_E3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[FREEZE5]] + CHECK: [[BV2:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[FREEZE5_E0]]:_(s16), [[FREEZE5_E1]]:_(s16), [[FREEZE5_E2]]:_(s16) )"; // Check @@ -3869,10 +3857,12 @@ TEST_F(AArch64GISelMITest, MoreElementsFreeze) { const auto *CheckStr = R"( CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]] - CHECK: [[UNDEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF - CHECK: [[CV:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BITCAST]]:_(<2 x s32>), [[UNDEF]] - CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[CV]] - CHECK: [[EXTR0:%[0-9]+]]:_(<2 x s32>), [[EXTR1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FREEZE]]:_(<4 x s32>) + CHECK: [[BITCAST_E0:%[0-9]+]]:_(s32), [[BITCAST_E1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]]:_(<2 x s32>) + CHECK: [[IMP_DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + CHECK: [[BITCAST_LARGE:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[BITCAST_E0]]:_(s32), [[BITCAST_E1]]:_(s32), [[IMP_DEF]]:_(s32), [[IMP_DEF]]:_(s32) + CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[BITCAST_LARGE]] + CHECK: [[FREEZE_E0:%[0-9]+]]:_(s32), [[FREEZE_E1:%[0-9]+]]:_(s32), [[FREEZE_E2:%[0-9]+]]:_(s32), [[FREEZE_E3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FREEZE]]:_(<4 x s32>) + CHECK: (<2 x s32>) = G_BUILD_VECTOR [[FREEZE_E0]]:_(s32), [[FREEZE_E1]]:_(s32) )"; // Check @@ -4040,14 +4030,19 @@ TEST_F(AArch64GISelMITest, moreElementsShuffle) { CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY CHECK: [[BV1:%[0-9]+]]:_(<6 x s64>) = G_BUILD_VECTOR CHECK: [[BV2:%[0-9]+]]:_(<6 x s64>) = G_BUILD_VECTOR - CHECK: [[IMPDEF1:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF - CHECK: [[INSERT1:%[0-9]+]]:_(<8 x s64>) = G_INSERT [[IMPDEF1]]:_, [[BV1]]:_(<6 x s64>), 0 - CHECK: [[IMPDEF2:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF - CHECK: [[INSERT2:%[0-9]+]]:_(<8 x s64>) = G_INSERT [[IMPDEF2]]:_, [[BV2]]:_(<6 x s64>), 0 - CHECK: [[SHUF:%[0-9]+]]:_(<8 x s64>) = G_SHUFFLE_VECTOR [[INSERT1]]:_(<8 x s64>), [[INSERT2]]:_, shufflemask(3, 4, 9, 0, 1, 13, undef, undef) - CHECK: [[IMPDEF3:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF - CHECK: [[CONCAT:%[0-9]+]]:_(<24 x s64>) = G_CONCAT_VECTORS [[SHUF]]:_(<8 x s64>), [[IMPDEF3]]:_(<8 x s64>), [[IMPDEF3]]:_(<8 x s64>) - CHECK: [[UNMERGE:%[0-9]+]]:_(<6 x s64>), [[UNMERGE2:%[0-9]+]]:_(<6 x s64>), [[UNMERGE3:%[0-9]+]]:_(<6 x s64>), [[UNMERGE4:%[0-9]+]]:_(<6 x s64>) = G_UNMERGE_VALUES [[CONCAT]]:_(<24 x s64>) + + CHECK: [[BV1_E0:%[0-9]+]]:_(s64), [[BV1_E1:%[0-9]+]]:_(s64), [[BV1_E2:%[0-9]+]]:_(s64), [[BV1_E3:%[0-9]+]]:_(s64), [[BV1_E4:%[0-9]+]]:_(s64), [[BV1_E5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BV1]]:_(<6 x s64>) + CHECK: [[IMP_DEF0:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + CHECK: [[BV1_LARGE:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[BV1_E0]]:_(s64), [[BV1_E1]]:_(s64), [[BV1_E2]]:_(s64), [[BV1_E3]]:_(s64), [[BV1_E4]]:_(s64), [[BV1_E5]]:_(s64), [[IMP_DEF0]]:_(s64), [[IMP_DEF0]]:_(s64) + + CHECK: [[BV2_E0:%[0-9]+]]:_(s64), [[BV2_E1:%[0-9]+]]:_(s64), [[BV2_E2:%[0-9]+]]:_(s64), [[BV2_E3:%[0-9]+]]:_(s64), [[BV2_E4:%[0-9]+]]:_(s64), [[BV2_E5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BV2]]:_(<6 x s64>) + CHECK: [[IMP_DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + CHECK: [[BV2_LARGE:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[BV2_E0]]:_(s64), [[BV2_E1]]:_(s64), [[BV2_E2]]:_(s64), [[BV2_E3]]:_(s64), [[BV2_E4]]:_(s64), [[BV2_E5]]:_(s64), [[IMP_DEF1]]:_(s64), [[IMP_DEF1]]:_(s64) + + CHECK: [[SHUF:%[0-9]+]]:_(<8 x s64>) = G_SHUFFLE_VECTOR [[BV1_LARGE]]:_(<8 x s64>), [[BV2_LARGE]]:_, shufflemask(3, 4, 9, 0, 1, 13, undef, undef) + + CHECK: [[SHUF_E0:%[0-9]+]]:_(s64), [[SHUF_E1:%[0-9]+]]:_(s64), [[SHUF_E2:%[0-9]+]]:_(s64), [[SHUF_E3:%[0-9]+]]:_(s64), [[SHUF_E4:%[0-9]+]]:_(s64), [[SHUF_E5:%[0-9]+]]:_(s64), [[SHUF_E6:%[0-9]+]]:_(s64), [[SHUF_E7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[SHUF]]:_(<8 x s64>) + CHECK: (<6 x s64>) = G_BUILD_VECTOR [[SHUF_E0]]:_(s64), [[SHUF_E1]]:_(s64), [[SHUF_E2]]:_(s64), [[SHUF_E3]]:_(s64), [[SHUF_E4]]:_(s64), [[SHUF_E5]]:_(s64) )"; // Check From 2c7dc13146ba0c2991e4950ce159276ad5d9aece Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 23 Dec 2021 14:41:03 +0100 Subject: [PATCH 144/293] [CGBuilder] Add CreateGEP() overload that accepts an Address Add an overload for an Address and a single non-constant offset. This makes it easier to preserve the element type and adjust the alignment appropriately. --- clang/lib/CodeGen/CGBuilder.h | 15 +++++++++ clang/lib/CodeGen/CGOpenMPRuntime.cpp | 39 ++++++++---------------- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 14 +++------ 3 files changed, 31 insertions(+), 37 deletions(-) diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h index 5639abf56982..7c9f41e84eaf 100644 --- a/clang/lib/CodeGen/CGBuilder.h +++ b/clang/lib/CodeGen/CGBuilder.h @@ -258,6 +258,21 @@ class CGBuilderTy : public CGBuilderBaseTy { Addr.getAlignment().alignmentAtOffset(Index * EltSize)); } + /// Create GEP with single dynamic index. The address alignment is reduced + /// according to the element size. + using CGBuilderBaseTy::CreateGEP; + Address CreateGEP(Address Addr, llvm::Value *Index, + const llvm::Twine &Name = "") { + const llvm::DataLayout &DL = BB->getParent()->getParent()->getDataLayout(); + CharUnits EltSize = + CharUnits::fromQuantity(DL.getTypeAllocSize(Addr.getElementType())); + + return Address(CreateGEP(Addr.getElementType(), Addr.getPointer(), Index, + Name), + Addr.getElementType(), + Addr.getAlignment().alignmentOfArrayElement(EltSize)); + } + /// Given a pointer to i8, adjust it by a given constant offset. Address CreateConstInBoundsByteGEP(Address Addr, CharUnits Offset, const llvm::Twine &Name = "") { diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 8a75f06882bc..fe4cca16eb53 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -4501,10 +4501,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, std::tie(Addr, Size) = getPointerAndSize(CGF, E); llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); LValue Base = CGF.MakeAddrLValue( - Address(CGF.Builder.CreateGEP(AffinitiesArray.getElementType(), - AffinitiesArray.getPointer(), Idx), - AffinitiesArray.getAlignment()), - KmpTaskAffinityInfoTy); + CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy); // affs[i].base_addr = &; LValue BaseAddrLVal = CGF.EmitLValueForField( Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); @@ -4668,12 +4665,10 @@ CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); - llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( - Addr.getElementType(), Addr.getPointer(), - llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); + Address DepObjAddr = CGF.Builder.CreateGEP( + Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); LValue NumDepsBase = CGF.MakeAddrLValue( - Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, - Base.getBaseInfo(), Base.getTBAAInfo()); + DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); // NumDeps = deps[i].base_addr; LValue BaseAddrLVal = CGF.EmitLValueForField( NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); @@ -4709,10 +4704,7 @@ static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, LValue &PosLVal = *Pos.get(); llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); Base = CGF.MakeAddrLValue( - Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(), - DependenciesArray.getPointer(), Idx), - DependenciesArray.getAlignment()), - KmpDependInfoTy); + CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy); } // deps[i].base_addr = &; LValue BaseAddrLVal = CGF.EmitLValueForField( @@ -4769,12 +4761,10 @@ emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, Base.getAddress(CGF), KmpDependInfoPtrT); Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); - llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( - Addr.getElementType(), Addr.getPointer(), - llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); + Address DepObjAddr = CGF.Builder.CreateGEP( + Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); LValue NumDepsBase = CGF.MakeAddrLValue( - Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, - Base.getBaseInfo(), Base.getTBAAInfo()); + DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); // NumDeps = deps[i].base_addr; LValue BaseAddrLVal = CGF.EmitLValueForField( NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); @@ -4830,12 +4820,10 @@ static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, Base.getTBAAInfo()); // Get number of elements in a single depobj. - llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( - Addr.getElementType(), Addr.getPointer(), - llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); + Address DepObjAddr = CGF.Builder.CreateGEP( + Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); LValue NumDepsBase = CGF.MakeAddrLValue( - Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, - Base.getBaseInfo(), Base.getTBAAInfo()); + DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); // NumDeps = deps[i].base_addr; LValue BaseAddrLVal = CGF.EmitLValueForField( NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); @@ -4847,10 +4835,7 @@ static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, ElSize, CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); - Address DepAddr = - Address(CGF.Builder.CreateGEP(DependenciesArray.getElementType(), - DependenciesArray.getPointer(), Pos), - DependenciesArray.getAlignment()); + Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos); CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); // Increase pos. diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 6893ab6e2aa0..866454ddeaed 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -2190,11 +2190,8 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, // elemptr = ((CopyType*)(elemptrptr)) + I Address ElemPtr = Address(ElemPtrPtr, Align); ElemPtr = Bld.CreateElementBitCast(ElemPtr, CopyType); - if (NumIters > 1) { - ElemPtr = Address(Bld.CreateGEP(ElemPtr.getElementType(), - ElemPtr.getPointer(), Cnt), - ElemPtr.getAlignment()); - } + if (NumIters > 1) + ElemPtr = Bld.CreateGEP(ElemPtr, Cnt); // Get pointer to location in transfer medium. // MediumPtr = &medium[warp_id] @@ -2260,11 +2257,8 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM, TargetElemPtrPtr, /*Volatile=*/false, C.VoidPtrTy, Loc); Address TargetElemPtr = Address(TargetElemPtrVal, Align); TargetElemPtr = Bld.CreateElementBitCast(TargetElemPtr, CopyType); - if (NumIters > 1) { - TargetElemPtr = Address(Bld.CreateGEP(TargetElemPtr.getElementType(), - TargetElemPtr.getPointer(), Cnt), - TargetElemPtr.getAlignment()); - } + if (NumIters > 1) + TargetElemPtr = Bld.CreateGEP(TargetElemPtr, Cnt); // *TargetElemPtr = SrcMediumVal; llvm::Value *SrcMediumValue = From bf2b5551f9192106b1997f7c9c3bdc36be2dc381 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 23 Dec 2021 14:58:47 +0100 Subject: [PATCH 145/293] [CodeGen] Use CreateConstInBoundsGEP() in one more place This does exactly what this code manually implemented. --- clang/lib/CodeGen/CGExprCXX.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp index ca4450a8cf1c..0571c498c377 100644 --- a/clang/lib/CodeGen/CGExprCXX.cpp +++ b/clang/lib/CodeGen/CGExprCXX.cpp @@ -1052,13 +1052,8 @@ void CodeGenFunction::EmitNewArrayInitializer( InitListElements = cast(ILE->getType()->getAsArrayTypeUnsafe()) ->getSize().getZExtValue(); - CurPtr = - Address(Builder.CreateInBoundsGEP(CurPtr.getElementType(), - CurPtr.getPointer(), - Builder.getSize(InitListElements), - "string.init.end"), - CurPtr.getAlignment().alignmentAtOffset(InitListElements * - ElementSize)); + CurPtr = Builder.CreateConstInBoundsGEP( + CurPtr, InitListElements, "string.init.end"); // Zero out the rest, if any remain. llvm::ConstantInt *ConstNum = dyn_cast(NumElements); From 7977fd7cfc5b60742af062ae4aef46fe400cca9d Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 23 Dec 2021 15:15:26 +0100 Subject: [PATCH 146/293] [OpenMP] Remove no-op cast (NFC) This was casting the address to its own element type, which is a no-op. --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index fe4cca16eb53..03aa84aecef4 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -688,8 +688,6 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, // Drill down to the base element type on both arrays. const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); - DestAddr = - CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); if (DRD) SrcAddr = CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); From a3f50fb06dd467e54a62b371117eeecf88c78480 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 23 Dec 2021 14:09:23 +0000 Subject: [PATCH 147/293] [X86] isVectorShiftByScalarCheap - vXi8 select(shift(x,splat0),shift(x,splat1)) is better than shift(x,select(splat0,splat1)) Even though we don't have vXi8 vector shifts (apart from XOP), it is still better to prefer shift (or funnel-shift/rotate) by scalar where possible. https://llvm.godbolt.org/z/6ss6ffTxv Differential Revision: https://reviews.llvm.org/D116191 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 5 -- .../X86/vec-shift-inseltpoison.ll | 54 +++++++++++++------ .../CodeGenPrepare/X86/vec-shift.ll | 49 +++++++++++++---- .../X86/x86-shuffle-sink-inseltpoison.ll | 36 ++++++++++--- .../CodeGenPrepare/X86/x86-shuffle-sink.ll | 36 ++++++++++--- 5 files changed, 132 insertions(+), 48 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3d186857e00a..ec152d02fd1f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -32994,11 +32994,6 @@ bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL, bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const { unsigned Bits = Ty->getScalarSizeInBits(); - // 8-bit shifts are always expensive, but versions with a scalar amount aren't - // particularly cheaper than those without. - if (Bits == 8) - return false; - // XOP has v16i8/v8i16/v4i32/v2i64 variable vector shifts. // Splitting for v32i8/v16i16 on XOP+AVX2 targets is still preferred. if (Subtarget.hasXOP() && diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll index 1c9617ab0d44..576c945eec3c 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift-inseltpoison.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=ALL,AVX1 -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=ALL,AVX2 -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=ALL,AVX512BW -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=AVX512BW +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=XOP +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=XOP ; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S -enable-debugify < %s 2>&1 | FileCheck %s -check-prefix=DEBUG define <4 x i32> @vector_variable_shift_right_v4i32(<4 x i1> %cond, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { @@ -37,7 +37,6 @@ define <4 x i32> @vector_variable_shift_right_v4i32(<4 x i1> %cond, <4 x i32> %x ; XOP-NEXT: [[SH:%.*]] = lshr <4 x i32> [[Z:%.*]], [[SEL]] ; XOP-NEXT: ret <4 x i32> [[SH]] ; - %splat1 = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> zeroinitializer %splat2 = shufflevector <4 x i32> %y, <4 x i32> poison, <4 x i32> zeroinitializer %sel = select <4 x i1> %cond, <4 x i32> %splat1, <4 x i32> %splat2 @@ -78,7 +77,6 @@ define <16 x i16> @vector_variable_shift_right_v16i16(<16 x i1> %cond, <16 x i16 ; XOP-NEXT: [[SH:%.*]] = lshr <16 x i16> [[Z:%.*]], [[SEL]] ; XOP-NEXT: ret <16 x i16> [[SH]] ; - %splat1 = shufflevector <16 x i16> %x, <16 x i16> poison, <16 x i32> zeroinitializer %splat2 = shufflevector <16 x i16> %y, <16 x i16> poison, <16 x i32> zeroinitializer %sel = select <16 x i1> %cond, <16 x i16> %splat1, <16 x i16> %splat2 @@ -87,14 +85,40 @@ define <16 x i16> @vector_variable_shift_right_v16i16(<16 x i1> %cond, <16 x i16 } define <32 x i8> @vector_variable_shift_right_v32i8(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y, <32 x i8> %z) { -; ALL-LABEL: @vector_variable_shift_right_v32i8( -; ALL-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer -; ALL-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer -; ALL-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] -; ALL-NEXT: [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]] -; ALL-NEXT: ret <32 x i8> [[SH]] +; AVX1-LABEL: @vector_variable_shift_right_v32i8( +; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; AVX1-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; AVX1-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]] +; AVX1-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]] +; AVX1-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]] +; AVX1-NEXT: ret <32 x i8> [[TMP3]] +; +; AVX2-LABEL: @vector_variable_shift_right_v32i8( +; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; AVX2-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; AVX2-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]] +; AVX2-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]] +; AVX2-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]] +; AVX2-NEXT: ret <32 x i8> [[TMP3]] +; +; AVX512BW-LABEL: @vector_variable_shift_right_v32i8( +; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; AVX512BW-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; AVX512BW-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]] +; AVX512BW-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]] +; AVX512BW-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]] +; AVX512BW-NEXT: ret <32 x i8> [[TMP3]] +; +; XOP-LABEL: @vector_variable_shift_right_v32i8( +; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer +; XOP-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; XOP-NEXT: [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]] +; XOP-NEXT: ret <32 x i8> [[SH]] ; - %splat1 = shufflevector <32 x i8> %x, <32 x i8> poison, <32 x i32> zeroinitializer %splat2 = shufflevector <32 x i8> %y, <32 x i8> poison, <32 x i32> zeroinitializer %sel = select <32 x i1> %cond, <32 x i8> %splat1, <32 x i8> %splat2 @@ -233,7 +257,6 @@ define void @vector_variable_shift_left_loop(i32* nocapture %arr, i8* nocapture ; XOP: exit: ; XOP-NEXT: ret void ; - entry: %cmp16 = icmp sgt i32 %count, 0 %wide.trip.count = zext i32 %count to i64 @@ -377,7 +400,6 @@ define void @fancierRotate2(i32* %arr, i8* %control, i32 %rot0, i32 %rot1) { ; XOP: exit: ; XOP-NEXT: ret void ; - entry: %i0 = insertelement <8 x i32> poison, i32 %rot0, i32 0 %s0 = shufflevector <8 x i32> %i0, <8 x i32> poison, <8 x i32> zeroinitializer diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll index 7d685b78f310..d0ceec32b90c 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/vec-shift.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=ALL,AVX1 -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=ALL,AVX2 -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=ALL,AVX512BW -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP -; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=ALL,XOP +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S < %s | FileCheck %s --check-prefixes=AVX1 +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2 -S < %s | FileCheck %s --check-prefixes=AVX2 +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx512bw -S < %s | FileCheck %s --check-prefixes=AVX512BW +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx,+xop -S < %s | FileCheck %s --check-prefixes=XOP +; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx2,+xop -S < %s | FileCheck %s --check-prefixes=XOP ; RUN: opt -codegenprepare -mtriple=x86_64-- -mattr=+avx -S -enable-debugify < %s 2>&1 | FileCheck %s -check-prefix=DEBUG define <4 x i32> @vector_variable_shift_right_v4i32(<4 x i1> %cond, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { @@ -85,12 +85,39 @@ define <16 x i16> @vector_variable_shift_right_v16i16(<16 x i1> %cond, <16 x i16 } define <32 x i8> @vector_variable_shift_right_v32i8(<32 x i1> %cond, <32 x i8> %x, <32 x i8> %y, <32 x i8> %z) { -; ALL-LABEL: @vector_variable_shift_right_v32i8( -; ALL-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer -; ALL-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer -; ALL-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] -; ALL-NEXT: [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]] -; ALL-NEXT: ret <32 x i8> [[SH]] +; AVX1-LABEL: @vector_variable_shift_right_v32i8( +; AVX1-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; AVX1-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; AVX1-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]] +; AVX1-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]] +; AVX1-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]] +; AVX1-NEXT: ret <32 x i8> [[TMP3]] +; +; AVX2-LABEL: @vector_variable_shift_right_v32i8( +; AVX2-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; AVX2-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; AVX2-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]] +; AVX2-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]] +; AVX2-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]] +; AVX2-NEXT: ret <32 x i8> [[TMP3]] +; +; AVX512BW-LABEL: @vector_variable_shift_right_v32i8( +; AVX512BW-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; AVX512BW-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; AVX512BW-NEXT: [[TMP1:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SPLAT1]] +; AVX512BW-NEXT: [[TMP2:%.*]] = lshr <32 x i8> [[Z]], [[SPLAT2]] +; AVX512BW-NEXT: [[TMP3:%.*]] = select <32 x i1> [[COND]], <32 x i8> [[TMP1]], <32 x i8> [[TMP2]] +; AVX512BW-NEXT: ret <32 x i8> [[TMP3]] +; +; XOP-LABEL: @vector_variable_shift_right_v32i8( +; XOP-NEXT: [[SPLAT1:%.*]] = shufflevector <32 x i8> [[X:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; XOP-NEXT: [[SPLAT2:%.*]] = shufflevector <32 x i8> [[Y:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer +; XOP-NEXT: [[SEL:%.*]] = select <32 x i1> [[COND:%.*]], <32 x i8> [[SPLAT1]], <32 x i8> [[SPLAT2]] +; XOP-NEXT: [[SH:%.*]] = lshr <32 x i8> [[Z:%.*]], [[SEL]] +; XOP-NEXT: ret <32 x i8> [[SH]] ; %splat1 = shufflevector <32 x i8> %x, <32 x i8> undef, <32 x i32> zeroinitializer %splat2 = shufflevector <32 x i8> %y, <32 x i8> undef, <32 x i32> zeroinitializer diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll index be3723b7d554..7fd50c4d80d6 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink-inseltpoison.ll @@ -8,14 +8,34 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "x86_64-apple-darwin10.9.0" define <16 x i8> @test_8bit(<16 x i8> %lhs, <16 x i8> %tmp, i1 %tst) { -; CHECK-LABEL: @test_8bit( -; CHECK-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] -; CHECK: if_true: -; CHECK-NEXT: ret <16 x i8> [[MASK]] -; CHECK: if_false: -; CHECK-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]] -; CHECK-NEXT: ret <16 x i8> [[RES]] +; CHECK-SSE2-LABEL: @test_8bit( +; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-SSE2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-SSE2: if_true: +; CHECK-SSE2-NEXT: ret <16 x i8> [[MASK]] +; CHECK-SSE2: if_false: +; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-SSE2-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[TMP1]] +; CHECK-SSE2-NEXT: ret <16 x i8> [[RES]] +; +; CHECK-XOP-LABEL: @test_8bit( +; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-XOP-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-XOP: if_true: +; CHECK-XOP-NEXT: ret <16 x i8> [[MASK]] +; CHECK-XOP: if_false: +; CHECK-XOP-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]] +; CHECK-XOP-NEXT: ret <16 x i8> [[RES]] +; +; CHECK-AVX-LABEL: @test_8bit( +; CHECK-AVX-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-AVX-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-AVX: if_true: +; CHECK-AVX-NEXT: ret <16 x i8> [[MASK]] +; CHECK-AVX: if_false: +; CHECK-AVX-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP]], <16 x i8> poison, <16 x i32> zeroinitializer +; CHECK-AVX-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[TMP1]] +; CHECK-AVX-NEXT: ret <16 x i8> [[RES]] ; %mask = shufflevector <16 x i8> %tmp, <16 x i8> poison, <16 x i32> zeroinitializer br i1 %tst, label %if_true, label %if_false diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll index a443e7100062..97e00023aa02 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/x86-shuffle-sink.ll @@ -8,14 +8,34 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "x86_64-apple-darwin10.9.0" define <16 x i8> @test_8bit(<16 x i8> %lhs, <16 x i8> %tmp, i1 %tst) { -; CHECK-LABEL: @test_8bit( -; CHECK-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer -; CHECK-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] -; CHECK: if_true: -; CHECK-NEXT: ret <16 x i8> [[MASK]] -; CHECK: if_false: -; CHECK-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]] -; CHECK-NEXT: ret <16 x i8> [[RES]] +; CHECK-SSE2-LABEL: @test_8bit( +; CHECK-SSE2-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-SSE2-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-SSE2: if_true: +; CHECK-SSE2-NEXT: ret <16 x i8> [[MASK]] +; CHECK-SSE2: if_false: +; CHECK-SSE2-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP]], <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-SSE2-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[TMP1]] +; CHECK-SSE2-NEXT: ret <16 x i8> [[RES]] +; +; CHECK-XOP-LABEL: @test_8bit( +; CHECK-XOP-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-XOP-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-XOP: if_true: +; CHECK-XOP-NEXT: ret <16 x i8> [[MASK]] +; CHECK-XOP: if_false: +; CHECK-XOP-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[MASK]] +; CHECK-XOP-NEXT: ret <16 x i8> [[RES]] +; +; CHECK-AVX-LABEL: @test_8bit( +; CHECK-AVX-NEXT: [[MASK:%.*]] = shufflevector <16 x i8> [[TMP:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-AVX-NEXT: br i1 [[TST:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK-AVX: if_true: +; CHECK-AVX-NEXT: ret <16 x i8> [[MASK]] +; CHECK-AVX: if_false: +; CHECK-AVX-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP]], <16 x i8> undef, <16 x i32> zeroinitializer +; CHECK-AVX-NEXT: [[RES:%.*]] = shl <16 x i8> [[LHS:%.*]], [[TMP1]] +; CHECK-AVX-NEXT: ret <16 x i8> [[RES]] ; %mask = shufflevector <16 x i8> %tmp, <16 x i8> undef, <16 x i32> zeroinitializer br i1 %tst, label %if_true, label %if_false From a9486a40f7d18115682737b912f550ceef9b7d8d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 23 Dec 2021 08:11:09 -0600 Subject: [PATCH 148/293] [RISCV] Disable interleaving scalar loops in the loop vectorizer. The loop vectorizer can interleave scalar loops even if it doesn't vectorize them. I don't believe we intended to enable this when we enabled interleaving for vector instructions. Disable interleaving for VF=1 like X86 and AMDGPU already do. Test lifted from AMDGPU. Differential Revision: https://reviews.llvm.org/D115975 --- .../Target/RISCV/RISCVTargetTransformInfo.h | 4 +- .../RISCV/unroll-in-loop-vectorizer.ll | 43 +++++++++++++++++++ 2 files changed, 46 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 016db9d4c26d..7353496f4684 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -185,7 +185,9 @@ class RISCVTTIImpl : public BasicTTIImplBase { } unsigned getMaxInterleaveFactor(unsigned VF) { - return ST->getMaxInterleaveFactor(); + // If the loop will not be vectorized, don't interleave the loop. + // Let regular unroll to unroll the loop. + return VF == 1 ? 1 : ST->getMaxInterleaveFactor(); } }; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll b/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll new file mode 100644 index 000000000000..3349d4554491 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -mtriple=riscv64 -mattr=+experimental-v -loop-vectorize < %s | FileCheck %s + +; Make sure we don't unroll scalar loops in the loop vectorizer. +; +define void @small_loop(i32* nocapture %inArray, i32 %size) nounwind { +; CHECK-LABEL: @small_loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = icmp sgt i32 [[SIZE:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV1:%.*]], [[LOOP]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[INARRAY:%.*]], i32 [[IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = add nsw i32 [[TMP2]], 6 +; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP1]], align 4 +; CHECK-NEXT: [[IV1]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV1]], [[SIZE]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT_LOOPEXIT:%.*]], label [[LOOP]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %0 = icmp sgt i32 %size, 0 + br i1 %0, label %loop, label %exit + +loop: ; preds = %entry, %loop + %iv = phi i32 [ %iv1, %loop ], [ 0, %entry ] + %1 = getelementptr inbounds i32, i32* %inArray, i32 %iv + %2 = load i32, i32* %1, align 4 + %3 = add nsw i32 %2, 6 + store i32 %3, i32* %1, align 4 + %iv1 = add i32 %iv, 1 + %cond = icmp eq i32 %iv1, %size + br i1 %cond, label %exit, label %loop + +exit: ; preds = %loop, %entry + ret void +} From 0ff20f2f4448e24304b25c28bcc889806874488d Mon Sep 17 00:00:00 2001 From: Mehrnoosh Heidarpour Date: Tue, 14 Dec 2021 16:05:52 -0500 Subject: [PATCH 149/293] [InstSimplify] Fold logic AND to zero Adding following fold opportunity: ((A | B) ^ A) & ((A | B) ^ B) --> 0 Reviewed By: spatel, rampitec Differential Revision: https://reviews.llvm.org/D115755 --- llvm/lib/Analysis/InstructionSimplify.cpp | 9 ++ llvm/test/Transforms/InstSimplify/and.ll | 115 +++++++++++++++++----- 2 files changed, 100 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 029859f197e0..4831b22b1d46 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -2173,6 +2173,15 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, } } + // ((X | Y) ^ X ) & ((X | Y) ^ Y) --> 0 + // ((X | Y) ^ Y ) & ((X | Y) ^ X) --> 0 + BinaryOperator *Or; + if (match(Op0, m_c_Xor(m_Value(X), + m_CombineAnd(m_BinOp(Or), + m_c_Or(m_Deferred(X), m_Value(Y))))) && + match(Op1, m_c_Xor(m_Specific(Or), m_Specific(Y)))) + return Constant::getNullValue(Op0->getType()); + return nullptr; } diff --git a/llvm/test/Transforms/InstSimplify/and.ll b/llvm/test/Transforms/InstSimplify/and.ll index 73b439c381d3..4900d48c5ffe 100644 --- a/llvm/test/Transforms/InstSimplify/and.ll +++ b/llvm/test/Transforms/InstSimplify/and.ll @@ -132,15 +132,11 @@ define i8 @or_or_not_no_common_op(i8 %x, i8 %y, i8 %z) { ret i8 %and } -; ((A | B) ^ A ) & ((A | B) ^ B) +; ((X | Y) ^ X ) & ((X | Y) ^ Y) --> 0 define i8 @or_xor(i8 %x, i8 %y) { ; CHECK-LABEL: @or_xor( -; CHECK-NEXT: [[OR:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[XOR1:%.*]] = xor i8 [[OR]], [[X]] -; CHECK-NEXT: [[XOR2:%.*]] = xor i8 [[OR]], [[Y]] -; CHECK-NEXT: [[AND:%.*]] = and i8 [[XOR1]], [[XOR2]] -; CHECK-NEXT: ret i8 [[AND]] +; CHECK-NEXT: ret i8 0 ; %or = or i8 %x, %y %xor1 = xor i8 %or, %x @@ -149,15 +145,11 @@ define i8 @or_xor(i8 %x, i8 %y) { ret i8 %and } -; ((A | B) ^ B ) & ((A | B) ^ A) +; ((X | Y) ^ Y ) & ((X | Y) ^ X) --> 0 define i8 @or_xor_commute1(i8 %x, i8 %y) { ; CHECK-LABEL: @or_xor_commute1( -; CHECK-NEXT: [[OR:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[XOR1:%.*]] = xor i8 [[OR]], [[X]] -; CHECK-NEXT: [[XOR2:%.*]] = xor i8 [[OR]], [[Y]] -; CHECK-NEXT: [[AND:%.*]] = and i8 [[XOR2]], [[XOR1]] -; CHECK-NEXT: ret i8 [[AND]] +; CHECK-NEXT: ret i8 0 ; %or = or i8 %x, %y %xor1 = xor i8 %or, %x @@ -166,15 +158,11 @@ define i8 @or_xor_commute1(i8 %x, i8 %y) { ret i8 %and } -; (A ^ (A | B) ) & (B ^ (A | B)) +; (X ^ (X | Y) ) & (Y ^ (X | Y)) --> 0 define i71 @or_xor_commute2(i71 %x, i71 %y) { ; CHECK-LABEL: @or_xor_commute2( -; CHECK-NEXT: [[OR:%.*]] = or i71 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[XOR1:%.*]] = xor i71 [[X]], [[OR]] -; CHECK-NEXT: [[XOR2:%.*]] = xor i71 [[Y]], [[OR]] -; CHECK-NEXT: [[AND:%.*]] = and i71 [[XOR1]], [[XOR2]] -; CHECK-NEXT: ret i71 [[AND]] +; CHECK-NEXT: ret i71 0 ; %or = or i71 %x, %y %xor1 = xor i71 %x, %or @@ -183,15 +171,11 @@ define i71 @or_xor_commute2(i71 %x, i71 %y) { ret i71 %and } -; (B ^ (A | B) ) & (A ^ (A | B)) +; (Y ^ (X | Y) ) & (X ^ (X | Y)) --> 0 define <2 x i64> @or_xor_commute3(<2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @or_xor_commute3( -; CHECK-NEXT: [[OR:%.*]] = or <2 x i64> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[XOR1:%.*]] = xor <2 x i64> [[Y]], [[OR]] -; CHECK-NEXT: [[XOR2:%.*]] = xor <2 x i64> [[X]], [[OR]] -; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> [[XOR1]], [[XOR2]] -; CHECK-NEXT: ret <2 x i64> [[AND]] +; CHECK-NEXT: ret <2 x i64> zeroinitializer ; %or = or <2 x i64> %x, %y %xor1 = xor <2 x i64> %y, %or @@ -199,3 +183,86 @@ define <2 x i64> @or_xor_commute3(<2 x i64> %x, <2 x i64> %y) { %and = and <2 x i64> %xor1, %xor2 ret <2 x i64> %and } + +; ((X | Y) ^ X ) & (Y ^ (X | Y)) --> 0 + +define i32 @or_xor_commute4(i32 %x, i32 %y) { +; CHECK-LABEL: @or_xor_commute4( +; CHECK-NEXT: ret i32 0 +; + %or = or i32 %x, %y + %xor1 = xor i32 %or, %x + %xor2 = xor i32 %y, %or + %and = and i32 %xor1, %xor2 + ret i32 %and +} + +; ((X | Y) ^ Y ) & (X ^ (X | Y)) --> 0 + +define i32 @or_xor_commute5(i32 %x, i32 %y) { +; CHECK-LABEL: @or_xor_commute5( +; CHECK-NEXT: ret i32 0 +; + %or = or i32 %x, %y + %xor1 = xor i32 %or, %y + %xor2 = xor i32 %x, %or + %and = and i32 %xor1, %xor2 + ret i32 %and +} + +; (X ^ (X | Y) ) & ((X | Y) ^ Y) --> 0 + +define i32 @or_xor_commute6(i32 %x, i32 %y) { +; CHECK-LABEL: @or_xor_commute6( +; CHECK-NEXT: ret i32 0 +; + %or = or i32 %x, %y + %xor1 = xor i32 %x, %or + %xor2 = xor i32 %or, %y + %and = and i32 %xor1, %xor2 + ret i32 %and +} + +; (Y ^ (X | Y) ) & ((X | Y) ^ X) --> 0 + +define i32 @or_xor_commute7(i32 %x, i32 %y) { +; CHECK-LABEL: @or_xor_commute7( +; CHECK-NEXT: ret i32 0 +; + %or = or i32 %x, %y + %xor1 = xor i32 %y, %or + %xor2 = xor i32 %or, %x + %and = and i32 %xor1, %xor2 + ret i32 %and +} + +; (Y ^ (X | Y) ) & ((X | Y) ^ X) --> 0 + +define i32 @or_xor_complex_op(i32 %x, i32 %in) { +; CHECK-LABEL: @or_xor_complex_op( +; CHECK-NEXT: ret i32 0 +; + %y = or i32 %in, 1 + %or = or i32 %x, %y + %xor1 = xor i32 %y, %or + %xor2 = xor i32 %or, %x + %and = and i32 %xor1, %xor2 + ret i32 %and +} + +define i32 @or_xor_limitation(i32 %x, i32 %y) { +; CHECK-LABEL: @or_xor_limitation( +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[OR2:%.*]] = or i32 [[X]], [[Y]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[Y]], [[OR1]] +; CHECK-NEXT: [[XOR2:%.*]] = xor i32 [[OR2]], [[X]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[XOR1]], [[XOR2]] +; CHECK-NEXT: ret i32 [[AND]] +; + %or1 = or i32 %y, %x + %or2 = or i32 %x, %y + %xor1 = xor i32 %y, %or1 + %xor2 = xor i32 %or2, %x + %and = and i32 %xor1, %xor2 + ret i32 %and +} From 71fc4bbdd2009019acca0f66a7574f97966620fc Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 23 Dec 2021 14:52:23 +0000 Subject: [PATCH 150/293] [X86][SSE] Add ISD::ROTR support Fix issue in TargetLowering::expandROT where we only attempt to flip a rotation if the other direction has better support - this matches TargetLowering::expandFunnelShift This allows us to enable ISD::ROTR lowering on SSE targets, which particularly simplifies/improves codegen for splat amount and AVX2 per-element shifts. --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 5 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 93 +++++++------ llvm/test/CodeGen/X86/funnel-shift-rot.ll | 6 +- llvm/test/CodeGen/X86/vector-fshr-rot-128.ll | 130 +++++++----------- llvm/test/CodeGen/X86/vector-fshr-rot-256.ll | 93 +++++-------- llvm/test/CodeGen/X86/vector-fshr-rot-512.ll | 98 ++++++------- .../CodeGen/X86/vector-fshr-rot-sub128.ll | 6 +- 7 files changed, 191 insertions(+), 240 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 4a0b23f191dc..e6b06ab93d6b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6657,9 +6657,10 @@ SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps, EVT ShVT = Op1.getValueType(); SDValue Zero = DAG.getConstant(0, DL, ShVT); - // If a rotate in the other direction is supported, use it. + // If a rotate in the other direction is more supported, use it. unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL; - if (isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) { + if (!isOperationLegalOrCustom(Node->getOpcode(), VT) && + isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) { SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1); return DAG.getNode(RevRot, DL, VT, Op0, Sub); } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ec152d02fd1f..34ea8901fb3a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1091,12 +1091,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); + if (VT == MVT::v2i64) continue; + setOperationAction(ISD::ROTL, VT, Custom); + setOperationAction(ISD::ROTR, VT, Custom); } - setOperationAction(ISD::ROTL, MVT::v4i32, Custom); - setOperationAction(ISD::ROTL, MVT::v8i16, Custom); - setOperationAction(ISD::ROTL, MVT::v16i8, Custom); - setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); @@ -1194,8 +1193,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) { for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, - MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) + MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { setOperationAction(ISD::ROTL, VT, Custom); + setOperationAction(ISD::ROTR, VT, Custom); + } // XOP can efficiently perform BITREVERSE with VPPERM. for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) @@ -1278,6 +1279,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); + if (VT == MVT::v4i64) continue; + setOperationAction(ISD::ROTL, VT, Custom); + setOperationAction(ISD::ROTR, VT, Custom); } // These types need custom splitting if their input is a 128-bit vector. @@ -1286,10 +1290,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); - setOperationAction(ISD::ROTL, MVT::v8i32, Custom); - setOperationAction(ISD::ROTL, MVT::v16i16, Custom); - setOperationAction(ISD::ROTL, MVT::v32i8, Custom); - setOperationAction(ISD::SELECT, MVT::v4f64, Custom); setOperationAction(ISD::SELECT, MVT::v4i64, Custom); setOperationAction(ISD::SELECT, MVT::v8i32, Custom); @@ -1675,10 +1675,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, } // With BWI, expanding (and promoting the shifts) is the better. - if (!Subtarget.useBWIRegs()) + if (!Subtarget.useBWIRegs()) { setOperationAction(ISD::ROTL, MVT::v32i16, Custom); + setOperationAction(ISD::ROTR, MVT::v32i16, Custom); + } setOperationAction(ISD::ROTL, MVT::v64i8, Custom); + setOperationAction(ISD::ROTR, MVT::v64i8, Custom); for (auto VT : { MVT::v64i8, MVT::v32i16 }) { setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom); @@ -29847,7 +29850,19 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, return DAG.getNode(FunnelOpc, DL, VT, R, R, Amt); } - assert(IsROTL && "Only ROTL supported"); + SDValue Z = DAG.getConstant(0, DL, VT); + + if (!IsROTL) { + // If the ISD::ROTR amount is constant, we're always better converting to + // ISD::ROTL. + if (SDValue NegAmt = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {Z, Amt})) + return DAG.getNode(ISD::ROTL, DL, VT, R, NegAmt); + + // XOP targets always prefers ISD::ROTL. + if (Subtarget.hasXOP()) + return DAG.getNode(ISD::ROTL, DL, VT, R, + DAG.getNode(ISD::SUB, DL, VT, Z, Amt)); + } // Split 256-bit integers on XOP/pre-AVX2 targets. if (VT.is256BitVector() && (Subtarget.hasXOP() || !Subtarget.hasAVX2())) @@ -29857,6 +29872,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, // +ve/-ve Amt = rotate left/right - just need to handle ISD::ROTL. // XOP implicitly uses modulo rotation amounts. if (Subtarget.hasXOP()) { + assert(IsROTL && "Only ROTL expected"); assert(VT.is128BitVector() && "Only rotate 128-bit vectors!"); // Attempt to rotate by immediate. @@ -29885,20 +29901,11 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, (VT == MVT::v64i8 && Subtarget.useBWIRegs())) && "Only vXi32/vXi16/vXi8 vector rotates supported"); - // Check for a hidden ISD::ROTR, splat + vXi8 lowering can handle both, but we - // currently hit infinite loops in legalization if we allow ISD::ROTR. - // FIXME: Infinite ROTL<->ROTR legalization in TargetLowering::expandROT. - SDValue HiddenROTRAmt; - if (Amt.getOpcode() == ISD::SUB && - ISD::isBuildVectorAllZeros(Amt.getOperand(0).getNode())) - HiddenROTRAmt = Amt.getOperand(1); - MVT ExtSVT = MVT::getIntegerVT(2 * EltSizeInBits); MVT ExtVT = MVT::getVectorVT(ExtSVT, NumElts / 2); SDValue AmtMask = DAG.getConstant(EltSizeInBits - 1, DL, VT); - SDValue AmtMod = DAG.getNode(ISD::AND, DL, VT, - HiddenROTRAmt ? HiddenROTRAmt : Amt, AmtMask); + SDValue AmtMod = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask); // Attempt to fold as unpack(x,x) << zext(splat(y)): // rotl(x,y) -> (unpack(x,x) << (y & (bw-1))) >> bw. @@ -29906,7 +29913,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, // TODO: Handle vXi16 cases. if (EltSizeInBits == 8 || EltSizeInBits == 32) { if (SDValue BaseRotAmt = DAG.getSplatValue(AmtMod)) { - unsigned ShiftX86Opc = HiddenROTRAmt ? X86ISD::VSRLI : X86ISD::VSHLI; + unsigned ShiftX86Opc = IsROTL ? X86ISD::VSHLI : X86ISD::VSRLI; SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R)); SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R)); BaseRotAmt = DAG.getZExtOrTrunc(BaseRotAmt, DL, MVT::i32); @@ -29914,7 +29921,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, Subtarget, DAG); Hi = getTargetVShiftNode(ShiftX86Opc, DL, ExtVT, Hi, BaseRotAmt, Subtarget, DAG); - return getPack(DAG, Subtarget, DL, VT, Lo, Hi, !HiddenROTRAmt); + return getPack(DAG, Subtarget, DL, VT, Lo, Hi, IsROTL); } } @@ -29925,7 +29932,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, bool IsConstAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()); MVT WideVT = MVT::getVectorVT(Subtarget.hasBWI() ? MVT::i16 : MVT::i32, NumElts); - unsigned ShiftOpc = HiddenROTRAmt ? ISD::SRL : ISD::SHL; + unsigned ShiftOpc = IsROTL ? ISD::SHL : ISD::SRL; // Attempt to fold as: // rotl(x,y) -> (((aext(x) << bw) | zext(x)) << (y & (bw-1))) >> bw. @@ -29942,7 +29949,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, getTargetVShiftByConstNode(X86ISD::VSHLI, DL, WideVT, R, 8, DAG)); Amt = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, AmtMod); R = DAG.getNode(ShiftOpc, DL, WideVT, R, Amt); - if (!HiddenROTRAmt) + if (IsROTL) R = getTargetVShiftByConstNode(X86ISD::VSRLI, DL, WideVT, R, 8, DAG); return DAG.getNode(ISD::TRUNCATE, DL, VT, R); } @@ -29952,14 +29959,13 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, // rotr(x,y) -> (unpack(x,x) >> (y & (bw-1))). if (IsConstAmt || supportedVectorVarShift(ExtVT, Subtarget, ShiftOpc)) { // See if we can perform this by unpacking to lo/hi vXi16. - SDValue Z = DAG.getConstant(0, DL, VT); SDValue RLo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R)); SDValue RHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R)); SDValue ALo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, AmtMod, Z)); SDValue AHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, AmtMod, Z)); SDValue Lo = DAG.getNode(ShiftOpc, DL, ExtVT, RLo, ALo); SDValue Hi = DAG.getNode(ShiftOpc, DL, ExtVT, RHi, AHi); - return getPack(DAG, Subtarget, DL, VT, Lo, Hi, !HiddenROTRAmt); + return getPack(DAG, Subtarget, DL, VT, Lo, Hi, IsROTL); } assert((VT == MVT::v16i8 || VT == MVT::v32i8) && "Unsupported vXi8 type"); @@ -29982,15 +29988,15 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, return DAG.getSelect(DL, SelVT, C, V0, V1); }; - // 'Hidden' ROTR is currently only profitable on AVX512 targets where we - // have VPTERNLOG. - unsigned ShiftLHS = ISD::SHL; - unsigned ShiftRHS = ISD::SRL; - if (HiddenROTRAmt && useVPTERNLOG(Subtarget, VT)) { - std::swap(ShiftLHS, ShiftRHS); - Amt = HiddenROTRAmt; + // ISD::ROTR is currently only profitable on AVX512 targets with VPTERNLOG. + if (!IsROTL && !useVPTERNLOG(Subtarget, VT)) { + Amt = DAG.getNode(ISD::SUB, DL, VT, Z, Amt); + IsROTL = true; } + unsigned ShiftLHS = IsROTL ? ISD::SHL : ISD::SRL; + unsigned ShiftRHS = IsROTL ? ISD::SRL : ISD::SHL; + // Turn 'a' into a mask suitable for VSELECT: a = a << 5; // We can safely do this using i16 shifts as we're only interested in // the 3 lower bits of each byte. @@ -30027,9 +30033,6 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, return SignBitSelect(VT, Amt, M, R); } - // ISD::ROT* uses modulo rotate amounts. - Amt = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask); - bool IsSplatAmt = DAG.isSplatValue(Amt); bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()); bool LegalVarShifts = supportedVectorVarShift(VT, Subtarget, ISD::SHL) && @@ -30038,13 +30041,25 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, // Fallback for splats + all supported variable shifts. // Fallback for non-constants AVX2 vXi16 as well. if (IsSplatAmt || LegalVarShifts || (Subtarget.hasAVX2() && !ConstantAmt)) { + Amt = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask); SDValue AmtR = DAG.getConstant(EltSizeInBits, DL, VT); AmtR = DAG.getNode(ISD::SUB, DL, VT, AmtR, Amt); - SDValue SHL = DAG.getNode(ISD::SHL, DL, VT, R, Amt); - SDValue SRL = DAG.getNode(ISD::SRL, DL, VT, R, AmtR); + SDValue SHL = DAG.getNode(IsROTL ? ISD::SHL : ISD::SRL, DL, VT, R, Amt); + SDValue SRL = DAG.getNode(IsROTL ? ISD::SRL : ISD::SHL, DL, VT, R, AmtR); return DAG.getNode(ISD::OR, DL, VT, SHL, SRL); } + // Everything below assumes ISD::ROTL. + if (!IsROTL) { + Amt = DAG.getNode(ISD::SUB, DL, VT, Z, Amt); + IsROTL = true; + } + + // ISD::ROT* uses modulo rotate amounts. + Amt = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask); + + assert(IsROTL && "Only ROTL supported"); + // As with shifts, attempt to convert the rotation amount to a multiplication // factor, fallback to general expansion. SDValue Scale = convertShiftLeftToScale(Amt, DL, Subtarget, DAG); diff --git a/llvm/test/CodeGen/X86/funnel-shift-rot.ll b/llvm/test/CodeGen/X86/funnel-shift-rot.ll index c95df7bcd67b..ef287b959427 100644 --- a/llvm/test/CodeGen/X86/funnel-shift-rot.ll +++ b/llvm/test/CodeGen/X86/funnel-shift-rot.ll @@ -328,14 +328,12 @@ define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) nounwind { ; ; X64-AVX2-LABEL: rotr_v4i32: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; X64-AVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1 ; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] ; X64-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; X64-AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm2 +; X64-AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm2 ; X64-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32] ; X64-AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1 -; X64-AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 +; X64-AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 ; X64-AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 ; X64-AVX2-NEXT: retq %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z) diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll index 477f25902189..612c8103d4b1 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -232,14 +232,12 @@ define <4 x i32> @var_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind { ; ; AVX2-LABEL: var_funnnel_v4i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm2 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32] ; AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1 -; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: retq ; @@ -386,36 +384,32 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind { ; ; AVX2-LABEL: var_funnnel_v8i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpsubw %xmm1, %xmm2, %xmm1 -; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16] -; AVX2-NEXT: vpsubw %xmm1, %xmm2, %xmm2 -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; AVX2-NEXT: vpsrlvd %ymm2, %ymm0, %ymm2 ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3 ; AVX2-NEXT: vpackusdw %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u] ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512F-LABEL: var_funnnel_v8i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; AVX512F-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512F-NEXT: vpsllvd %ymm2, %ymm0, %ymm2 +; AVX512F-NEXT: vpsrlvd %ymm2, %ymm0, %ymm2 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; AVX512F-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 @@ -424,16 +418,14 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind { ; ; AVX512VL-LABEL: var_funnnel_v8i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512VL-NEXT: vpsllvd %ymm2, %ymm0, %ymm2 +; AVX512VL-NEXT: vpsrlvd %ymm2, %ymm0, %ymm2 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; AVX512VL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 ; AVX512VL-NEXT: vzeroupper @@ -442,26 +434,22 @@ define <8 x i16> @var_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind { ; AVX512BW-LABEL: var_funnnel_v8i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: var_funnnel_v8i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 +; AVX512VLBW-NEXT: vpsrlvw %xmm1, %xmm0, %xmm2 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 -; AVX512VLBW-NEXT: vpsrlvw %xmm1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpsllvw %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX512VLBW-NEXT: retq ; @@ -975,103 +963,89 @@ define <4 x i32> @splatvar_funnnel_v4i32(<4 x i32> %x, <4 x i32> %amt) nounwind define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind { ; SSE2-LABEL: splatvar_funnnel_v8i16: ; SSE2: # %bb.0: -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: psubw %xmm1, %xmm2 -; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,0,0,0] -; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,0,0] +; SSE2-NEXT: pand %xmm1, %xmm2 ; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: psllw %xmm1, %xmm3 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16] -; SSE2-NEXT: psubw %xmm2, %xmm1 -; SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] -; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; SSE2-NEXT: psrlw %xmm1, %xmm0 +; SSE2-NEXT: psrlw %xmm2, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16] +; SSE2-NEXT: psubw %xmm1, %xmm2 +; SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1] +; SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; SSE2-NEXT: psllw %xmm2, %xmm0 ; SSE2-NEXT: por %xmm3, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: splatvar_funnnel_v8i16: ; SSE41: # %bb.0: -; SSE41-NEXT: pxor %xmm2, %xmm2 +; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; SSE41-NEXT: movdqa %xmm0, %xmm3 +; SSE41-NEXT: psrlw %xmm2, %xmm3 +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16] ; SSE41-NEXT: psubw %xmm1, %xmm2 -; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 ; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero -; SSE41-NEXT: movdqa %xmm0, %xmm3 -; SSE41-NEXT: psllw %xmm1, %xmm3 -; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16] -; SSE41-NEXT: psubw %xmm2, %xmm1 -; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; SSE41-NEXT: psrlw %xmm1, %xmm0 +; SSE41-NEXT: psllw %xmm1, %xmm0 ; SSE41-NEXT: por %xmm3, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: splatvar_funnnel_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX-NEXT: vpsrlw %xmm2, %xmm0, %xmm2 ; AVX-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX-NEXT: retq ; ; AVX512F-LABEL: splatvar_funnnel_v8i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX512F-NEXT: vpsrlw %xmm2, %xmm0, %xmm2 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v8i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX512VL-NEXT: vpsrlw %xmm2, %xmm0, %xmm2 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v8i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX512BW-NEXT: vpsrlw %xmm2, %xmm0, %xmm2 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX512BW-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v8i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm0, %xmm2 +; AVX512VLBW-NEXT: vpsrlw %xmm2, %xmm0, %xmm2 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX512VLBW-NEXT: retq ; @@ -1109,18 +1083,16 @@ define <8 x i16> @splatvar_funnnel_v8i16(<8 x i16> %x, <8 x i16> %amt) nounwind ; ; X86-SSE2-LABEL: splatvar_funnnel_v8i16: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pxor %xmm2, %xmm2 -; X86-SSE2-NEXT: psubw %xmm1, %xmm2 -; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,0,0,0] -; X86-SSE2-NEXT: pand %xmm2, %xmm1 +; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,0,0,0] +; X86-SSE2-NEXT: pand %xmm1, %xmm2 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm3 -; X86-SSE2-NEXT: psllw %xmm1, %xmm3 -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16] -; X86-SSE2-NEXT: psubw %xmm2, %xmm1 -; X86-SSE2-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1] -; X86-SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; X86-SSE2-NEXT: psrlw %xmm1, %xmm0 +; X86-SSE2-NEXT: psrlw %xmm2, %xmm3 +; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16] +; X86-SSE2-NEXT: psubw %xmm1, %xmm2 +; X86-SSE2-NEXT: pslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1] +; X86-SSE2-NEXT: psrldq {{.*#+}} xmm2 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; X86-SSE2-NEXT: psllw %xmm2, %xmm0 ; X86-SSE2-NEXT: por %xmm3, %xmm0 ; X86-SSE2-NEXT: retl %splat = shufflevector <8 x i16> %amt, <8 x i16> undef, <8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll index c760469b20b4..2093a2870cd1 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -171,14 +171,12 @@ define <8 x i32> @var_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind { ; ; AVX2-LABEL: var_funnnel_v8i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpsubd %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31] ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 -; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm2 +; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm2 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32] ; AVX2-NEXT: vpsubd %ymm1, %ymm3, %ymm1 -; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: retq ; @@ -290,23 +288,22 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind { ; AVX2: # %bb.0: ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm3 = ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15] -; AVX2-NEXT: vpsubw %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm4 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] -; AVX2-NEXT: vpsllvd %ymm4, %ymm3, %ymm4 +; AVX2-NEXT: vpsrlvd %ymm4, %ymm3, %ymm4 ; AVX2-NEXT: vpsrld $16, %ymm4, %ymm4 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11] ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm5 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] -; AVX2-NEXT: vpsllvd %ymm5, %ymm0, %ymm5 +; AVX2-NEXT: vpsrlvd %ymm5, %ymm0, %ymm5 ; AVX2-NEXT: vpsrld $16, %ymm5, %ymm5 ; AVX2-NEXT: vpackusdw %ymm4, %ymm5, %ymm4 ; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX2-NEXT: vpsubw %ymm1, %ymm5, %ymm1 ; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm5 = ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15] -; AVX2-NEXT: vpsrlvd %ymm5, %ymm3, %ymm3 +; AVX2-NEXT: vpsllvd %ymm5, %ymm3, %ymm3 ; AVX2-NEXT: vpsrld $16, %ymm3, %ymm3 ; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11] -; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpsrld $16, %ymm0, %ymm0 ; AVX2-NEXT: vpackusdw %ymm3, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm0, %ymm4, %ymm0 @@ -314,32 +311,28 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind { ; ; AVX512F-LABEL: var_funnnel_v16i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpsubw %ymm1, %ymm2, %ymm1 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512F-NEXT: vpsllvd %zmm2, %zmm0, %zmm2 +; AVX512F-NEXT: vpsrlvd %zmm2, %zmm0, %zmm2 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512F-NEXT: vpsubw %ymm1, %ymm3, %ymm1 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero -; AVX512F-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpord %zmm0, %zmm2, %zmm0 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: var_funnnel_v16i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsubw %ymm1, %ymm2, %ymm1 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VL-NEXT: vpsllvd %zmm2, %zmm0, %zmm2 +; AVX512VL-NEXT: vpsrlvd %zmm2, %zmm0, %zmm2 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VL-NEXT: vpsubw %ymm1, %ymm3, %ymm1 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero -; AVX512VL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: vpord %zmm0, %zmm2, %zmm0 ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512VL-NEXT: retq @@ -347,25 +340,21 @@ define <16 x i16> @var_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind { ; AVX512BW-LABEL: var_funnnel_v16i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsubw %ymm1, %ymm2, %ymm1 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512BW-NEXT: vpsubw %ymm1, %ymm3, %ymm1 -; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: var_funnnel_v16i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsubw %ymm1, %ymm2, %ymm1 ; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 -; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 +; AVX512VLBW-NEXT: vpsrlvw %ymm1, %ymm0, %ymm2 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: vpsubw %ymm1, %ymm3, %ymm1 -; AVX512VLBW-NEXT: vpsrlvw %ymm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpsllvw %ymm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX512VLBW-NEXT: retq ; @@ -801,90 +790,78 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %amt) nounwind define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %amt) nounwind { ; AVX1-LABEL: splatvar_funnnel_v16i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 -; AVX1-NEXT: vpsllw %xmm2, %xmm3, %xmm4 +; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX1-NEXT: vpsrlw %xmm3, %xmm2, %xmm4 ; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [16,16,16,16,16,16,16,16] ; AVX1-NEXT: vpsubw %xmm1, %xmm5, %xmm1 ; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX1-NEXT: vpsrlw %xmm1, %xmm3, %xmm3 -; AVX1-NEXT: vpor %xmm3, %xmm4, %xmm3 -; AVX1-NEXT: vpsllw %xmm2, %xmm0, %xmm2 -; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpor %xmm0, %xmm2, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 +; AVX1-NEXT: vpsllw %xmm1, %xmm2, %xmm2 +; AVX1-NEXT: vpor %xmm2, %xmm4, %xmm2 +; AVX1-NEXT: vpsrlw %xmm3, %xmm0, %xmm3 +; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: splatvar_funnnel_v16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX2-NEXT: vpsllw %xmm2, %ymm0, %ymm2 +; AVX2-NEXT: vpsrlw %xmm2, %ymm0, %ymm2 ; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX2-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: splatvar_funnnel_v16i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2 +; AVX512F-NEXT: vpsrlw %xmm2, %ymm0, %ymm2 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512F-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v16i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2 +; AVX512VL-NEXT: vpsrlw %xmm2, %ymm0, %ymm2 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512VL-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v16i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsllw %xmm2, %ymm0, %ymm2 +; AVX512BW-NEXT: vpsrlw %xmm2, %ymm0, %ymm2 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v16i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm2, %xmm1 ; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsllw %xmm2, %ymm0, %ymm2 +; AVX512VLBW-NEXT: vpsrlw %xmm2, %ymm0, %ymm2 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpsllw %xmm1, %ymm0, %ymm0 ; AVX512VLBW-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX512VLBW-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll index ce177f1a8a81..bd458426f114 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll @@ -36,29 +36,26 @@ define <16 x i32> @var_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounwind { define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind { ; AVX512F-LABEL: var_funnnel_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpsubw %ymm1, %ymm2, %ymm3 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512F-NEXT: vpand %ymm4, %ymm3, %ymm3 -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero -; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm6 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512F-NEXT: vpsllvd %zmm5, %zmm6, %zmm5 -; AVX512F-NEXT: vmovdqa {{.*#+}} ymm7 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512F-NEXT: vpsubw %ymm3, %ymm7, %ymm3 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpsrlvd %zmm4, %zmm5, %zmm4 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm6 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512F-NEXT: vpsubw %ymm3, %ymm6, %ymm3 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero -; AVX512F-NEXT: vpsrlvd %zmm3, %zmm6, %zmm3 -; AVX512F-NEXT: vpord %zmm3, %zmm5, %zmm3 +; AVX512F-NEXT: vpsllvd %zmm3, %zmm5, %zmm3 +; AVX512F-NEXT: vpord %zmm3, %zmm4, %zmm3 ; AVX512F-NEXT: vpmovdw %zmm3, %ymm3 ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1 -; AVX512F-NEXT: vpsubw %ymm1, %ymm2, %ymm1 -; AVX512F-NEXT: vpand %ymm4, %ymm1, %ymm1 +; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512F-NEXT: vpsllvd %zmm2, %zmm0, %zmm2 -; AVX512F-NEXT: vpsubw %ymm1, %ymm7, %ymm1 +; AVX512F-NEXT: vpsrlvd %zmm2, %zmm0, %zmm2 +; AVX512F-NEXT: vpsubw %ymm1, %ymm6, %ymm1 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero -; AVX512F-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpord %zmm0, %zmm2, %zmm0 ; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0 @@ -66,29 +63,26 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind { ; ; AVX512VL-LABEL: var_funnnel_v32i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsubw %ymm1, %ymm2, %ymm3 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VL-NEXT: vpand %ymm4, %ymm3, %ymm3 -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm6 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VL-NEXT: vpsllvd %zmm5, %zmm6, %zmm5 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VL-NEXT: vpsubw %ymm3, %ymm7, %ymm3 +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm3 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm4 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm5 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VL-NEXT: vpsrlvd %zmm4, %zmm5, %zmm4 +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm6 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VL-NEXT: vpsubw %ymm3, %ymm6, %ymm3 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm3 = ymm3[0],zero,ymm3[1],zero,ymm3[2],zero,ymm3[3],zero,ymm3[4],zero,ymm3[5],zero,ymm3[6],zero,ymm3[7],zero,ymm3[8],zero,ymm3[9],zero,ymm3[10],zero,ymm3[11],zero,ymm3[12],zero,ymm3[13],zero,ymm3[14],zero,ymm3[15],zero -; AVX512VL-NEXT: vpsrlvd %zmm3, %zmm6, %zmm3 -; AVX512VL-NEXT: vpord %zmm3, %zmm5, %zmm3 +; AVX512VL-NEXT: vpsllvd %zmm3, %zmm5, %zmm3 +; AVX512VL-NEXT: vpord %zmm3, %zmm4, %zmm3 ; AVX512VL-NEXT: vpmovdw %zmm3, %ymm3 ; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 -; AVX512VL-NEXT: vpsubw %ymm1, %ymm2, %ymm1 -; AVX512VL-NEXT: vpand %ymm4, %ymm1, %ymm1 +; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm1 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero -; AVX512VL-NEXT: vpsllvd %zmm2, %zmm0, %zmm2 -; AVX512VL-NEXT: vpsubw %ymm1, %ymm7, %ymm1 +; AVX512VL-NEXT: vpsrlvd %zmm2, %zmm0, %zmm2 +; AVX512VL-NEXT: vpsubw %ymm1, %ymm6, %ymm1 ; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero -; AVX512VL-NEXT: vpsrlvd %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpsllvd %zmm1, %zmm0, %zmm0 ; AVX512VL-NEXT: vpord %zmm0, %zmm2, %zmm0 ; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm3, %zmm0 @@ -308,40 +302,36 @@ define <16 x i32> @splatvar_funnnel_v16i32(<16 x i32> %x, <16 x i32> %amt) nounw define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind { ; AVX512F-LABEL: splatvar_funnnel_v32i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512F-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 ; AVX512F-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512F-NEXT: vpsllw %xmm2, %ymm3, %ymm4 -; AVX512F-NEXT: vpsllw %xmm2, %ymm0, %ymm2 -; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 +; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512F-NEXT: vpsrlw %xmm3, %ymm2, %ymm4 +; AVX512F-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 +; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] ; AVX512F-NEXT: vpsubw %xmm1, %xmm4, %xmm1 ; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512F-NEXT: vpsrlw %xmm1, %ymm3, %ymm3 -; AVX512F-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 -; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512F-NEXT: vpsllw %xmm1, %ymm2, %ymm2 +; AVX512F-NEXT: vpsllw %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: splatvar_funnnel_v32i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VL-NEXT: vpsubw %xmm1, %xmm2, %xmm1 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm2 ; AVX512VL-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512VL-NEXT: vpsllw %xmm2, %ymm3, %ymm4 -; AVX512VL-NEXT: vpsllw %xmm2, %ymm0, %ymm2 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm2 +; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VL-NEXT: vpsrlw %xmm3, %ymm2, %ymm4 +; AVX512VL-NEXT: vpsrlw %xmm3, %ymm0, %ymm3 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm3, %zmm3 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16] ; AVX512VL-NEXT: vpsubw %xmm1, %xmm4, %xmm1 ; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm1, %ymm3, %ymm3 -; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 -; AVX512VL-NEXT: vporq %zmm0, %zmm2, %zmm0 +; AVX512VL-NEXT: vpsllw %xmm1, %ymm2, %ymm2 +; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v32i16: diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll index 88975d76af98..23e6f2f8c77b 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll @@ -83,14 +83,12 @@ define <2 x i32> @var_funnnel_v2i32(<2 x i32> %x, <2 x i32> %amt) nounwind { ; ; AVX2-LABEL: var_funnnel_v2i32: ; AVX2: # %bb.0: -; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: vpsubd %xmm1, %xmm2, %xmm1 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [31,31,31,31] ; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm2 +; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm2 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [32,32,32,32] ; AVX2-NEXT: vpsubd %xmm1, %xmm3, %xmm1 -; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: retq ; From 0045d01af96ff56c5b62d133be076020a33867ff Mon Sep 17 00:00:00 2001 From: Anastasia Stulova Date: Thu, 23 Dec 2021 14:57:34 +0000 Subject: [PATCH 151/293] [SPIR-V] Add a toolchain for SPIR-V in clang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a toolchain (TC) for SPIR-V along with the following changes in Driver and base ToolChain and Tool. This is required to provide a mechanism in clang to bypass SPIR-V backend in LLVM for SPIR-V until it lands in LLVM and matures. The SPIR-V code is generated by the SPIRV-LLVM translator tool named 'llvm-spirv' that is sought in 'PATH'. The compilation phases/actions should be bound for SPIR-V in the meantime as following: compile -> tools::Clang backend -> tools::SPIRV::Translator assemble -> tools::SPIRV::Translator However, Driver’s ToolSelector collapses compile-backend-assemble and compile-backend sequences to tools::Clang. To prevent this, added new {use,has}IntegratedBackend properties in ToolChain and Tool to which the ToolSelector reacts on, and which SPIR-V TC overrides. Linking of multiple input files is currently not supported but can be added separately. Differential Revision: https://reviews.llvm.org/D112410 Co-authored-by: Henry Linjamäki --- clang/docs/UsersManual.rst | 54 +++++++++++++++ .../clang/Basic/DiagnosticDriverKinds.td | 4 ++ clang/include/clang/Driver/Tool.h | 1 + clang/include/clang/Driver/ToolChain.h | 3 + clang/lib/Driver/Driver.cpp | 36 +++++++++- clang/lib/Driver/ToolChain.cpp | 2 +- clang/lib/Driver/ToolChains/Clang.cpp | 4 +- clang/lib/Driver/ToolChains/Clang.h | 7 +- clang/lib/Driver/ToolChains/SPIRV.cpp | 25 ++++++- clang/lib/Driver/ToolChains/SPIRV.h | 33 ++++++++++ clang/test/Driver/spirv-toolchain.cl | 65 +++++++++++++++++++ 11 files changed, 227 insertions(+), 7 deletions(-) create mode 100644 clang/test/Driver/spirv-toolchain.cl diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index ce66ef58fca6..3f9947afc29b 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -3093,6 +3093,15 @@ There is a set of concrete HW architectures that OpenCL can be compiled for. Generic Targets ^^^^^^^^^^^^^^^ +- A SPIR-V binary can be produced for 32 or 64 bit targets. + + .. code-block:: console + + $ clang -target spirv32 test.cl + $ clang -target spirv64 test.cl + + More details can be found in :ref:`the SPIR-V support section `. + - SPIR is available as a generic target to allow portable bitcode to be produced that can be used across GPU toolchains. The implementation follows `the SPIR specification `_. There are two flavors @@ -3510,6 +3519,51 @@ Clang expects the GCC executable "gcc.exe" compiled for `Some tests might fail `_ on ``x86_64-w64-mingw32``. +.. _spir-v: + +SPIR-V support +-------------- + +Clang supports generation of SPIR-V conformant to `the OpenCL Environment +Specification +`_. + +To generate SPIR-V binaries, Clang uses the external ``llvm-spirv`` tool from the +`SPIRV-LLVM-Translator repo +`_. + +Prior to the generation of SPIR-V binary with Clang, ``llvm-spirv`` +should be built or installed. Please refer to `the following instructions +`_ +for more details. Clang will expects the ``llvm-spirv`` executable to +be present in the ``PATH`` environment variable. Clang uses ``llvm-spirv`` +with `the conformant assembly syntax package +`_. + +`The versioning +`_ of +``llvm-spirv`` is aligned with Clang major releases. The same applies to the +main development branch. It is therefore important to ensure the ``llvm-spirv`` +version is in alignment with the Clang version. For troubleshooting purposes +``llvm-spirv`` can be `tested in isolation +`_. + +Example usage for OpenCL kernel compilation: + + .. code-block:: console + + $ clang -target spirv32 test.cl + $ clang -target spirv64 test.cl + +Both invocations of Clang will result in the generation of a SPIR-V binary file +`test.o` for 32 bit and 64 bit respectively. This file can be imported +by an OpenCL driver that support SPIR-V consumption or it can be compiled +further by offline SPIR-V consumer tools. + +Converting to SPIR-V produced with the optimization levels other than `-O0` is +currently available as an experimental feature and it is not guaranteed to work +in all cases. + .. _clang-cl: clang-cl diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 2aeb987725e1..9776cd7b7b25 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -116,6 +116,10 @@ def warn_drv_unsupported_option_for_target : Warning< "ignoring '%0' option as it is not currently supported for target '%1'">, InGroup; +def warn_drv_spirv_linking_multiple_inputs_unsupported: Warning< + "Linking multiple input files is not supported for SPIR-V yet">, + InGroup; + def err_drv_invalid_thread_model_for_target : Error< "invalid thread model '%0' in '%1' for this target">; def err_drv_invalid_linker_name : Error< diff --git a/clang/include/clang/Driver/Tool.h b/clang/include/clang/Driver/Tool.h index cc0a09fb2747..42cf99a4a970 100644 --- a/clang/include/clang/Driver/Tool.h +++ b/clang/include/clang/Driver/Tool.h @@ -52,6 +52,7 @@ class Tool { const ToolChain &getToolChain() const { return TheToolChain; } virtual bool hasIntegratedAssembler() const { return false; } + virtual bool hasIntegratedBackend() const { return true; } virtual bool canEmitIR() const { return false; } virtual bool hasIntegratedCPP() const = 0; virtual bool isLinkJob() const { return false; } diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index e7b13eec3fc1..4afc9bf36b5f 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -384,6 +384,9 @@ class ToolChain { /// Check if the toolchain should use the integrated assembler. virtual bool useIntegratedAs() const; + /// Check if the toolchain should use the integrated backend. + virtual bool useIntegratedBackend() const { return true; } + /// Check if the toolchain should use AsmParser to parse inlineAsm when /// integrated assembler is not default. virtual bool parseInlineAsmUsingAsmParser() const { return false; } diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 45bfcefd49f4..3b551ea94cc2 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -43,6 +43,7 @@ #include "ToolChains/PPCLinux.h" #include "ToolChains/PS4CPU.h" #include "ToolChains/RISCVToolchain.h" +#include "ToolChains/SPIRV.h" #include "ToolChains/Solaris.h" #include "ToolChains/TCE.h" #include "ToolChains/VEToolchain.h" @@ -3774,6 +3775,14 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, } } + // FIXME: Linking separate translation units for SPIR-V is not supported yet. + // It can be done either by LLVM IR linking before conversion of the final + // linked module to SPIR-V or external SPIR-V linkers can be used e.g. + // spirv-link. + if (C.getDefaultToolChain().getTriple().isSPIRV() && Inputs.size() > 1) { + Diag(clang::diag::warn_drv_spirv_linking_multiple_inputs_unsupported); + } + handleArguments(C, Args, Inputs, Actions); // Builder to be used to build offloading actions. @@ -3813,8 +3822,15 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, // Queue linker inputs. if (Phase == phases::Link) { assert(Phase == PL.back() && "linking must be final compilation step."); - LinkerInputs.push_back(Current); - Current = nullptr; + // Compilation phases are setup per language, however for SPIR-V the + // final linking phase is meaningless since the compilation phase + // produces the final binary. + // FIXME: OpenCL - we could strip linking phase out from OpenCL + // compilation phases if we could verify it is not needed by any target. + if (!C.getDefaultToolChain().getTriple().isSPIRV()) { + LinkerInputs.push_back(Current); + Current = nullptr; + } break; } @@ -4387,6 +4403,12 @@ class ToolSelector final { if (!T) return nullptr; + // Can't collapse if we don't have codegen support unless we are + // emitting LLVM IR. + bool OutputIsLLVM = types::isLLVMIR(ActionInfo[0].JA->getType()); + if (!T->hasIntegratedBackend() && !(OutputIsLLVM && T->canEmitIR())) + return nullptr; + // When using -fembed-bitcode, it is required to have the same tool (clang) // for both CompilerJA and BackendJA. Otherwise, combine two stages. if (EmbedBitcode) { @@ -4456,6 +4478,12 @@ class ToolSelector final { if (!T) return nullptr; + // Can't collapse if we don't have codegen support unless we are + // emitting LLVM IR. + bool OutputIsLLVM = types::isLLVMIR(ActionInfo[0].JA->getType()); + if (!T->hasIntegratedBackend() && !(OutputIsLLVM && T->canEmitIR())) + return nullptr; + if (T->canEmitIR() && ((SaveTemps && !InputIsBitcode) || EmbedBitcode)) return nullptr; @@ -5479,6 +5507,10 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, case llvm::Triple::ve: TC = std::make_unique(*this, Target, Args); break; + case llvm::Triple::spirv32: + case llvm::Triple::spirv64: + TC = std::make_unique(*this, Target, Args); + break; default: if (Target.getVendor() == llvm::Triple::Myriad) TC = std::make_unique(*this, Target, diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index ac033dd427c2..50c89aaadc18 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -260,7 +260,7 @@ bool ToolChain::IsUnwindTablesDefault(const ArgList &Args) const { Tool *ToolChain::getClang() const { if (!Clang) - Clang.reset(new tools::Clang(*this)); + Clang.reset(new tools::Clang(*this, useIntegratedBackend())); return Clang.get(); } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 2a3723975568..ca62229b8153 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7126,11 +7126,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.ClaimAllArgs(options::OPT_emit_llvm); } -Clang::Clang(const ToolChain &TC) +Clang::Clang(const ToolChain &TC, bool HasIntegratedBackend) // CAUTION! The first constructor argument ("clang") is not arbitrary, // as it is for other tools. Some operations on a Tool actually test // whether that tool is Clang based on the Tool's Name as a string. - : Tool("clang", "clang frontend", TC) {} + : Tool("clang", "clang frontend", TC), HasBackend(HasIntegratedBackend) {} Clang::~Clang() {} diff --git a/clang/lib/Driver/ToolChains/Clang.h b/clang/lib/Driver/ToolChains/Clang.h index d4b4988b4a8c..00e0490e069b 100644 --- a/clang/lib/Driver/ToolChains/Clang.h +++ b/clang/lib/Driver/ToolChains/Clang.h @@ -26,6 +26,10 @@ namespace tools { /// Clang compiler tool. class LLVM_LIBRARY_VISIBILITY Clang : public Tool { + // Indicates whether this instance has integrated backend using + // internal LLVM infrastructure. + bool HasBackend; + public: static const char *getBaseInputName(const llvm::opt::ArgList &Args, const InputInfo &Input); @@ -99,11 +103,12 @@ class LLVM_LIBRARY_VISIBILITY Clang : public Tool { const InputInfo &Input, const llvm::opt::ArgList &Args) const; public: - Clang(const ToolChain &TC); + Clang(const ToolChain &TC, bool HasIntegratedBackend = true); ~Clang() override; bool hasGoodDiagnostics() const override { return true; } bool hasIntegratedAssembler() const override { return true; } + bool hasIntegratedBackend() const override { return HasBackend; } bool hasIntegratedCPP() const override { return true; } bool canEmitIR() const override { return true; } diff --git a/clang/lib/Driver/ToolChains/SPIRV.cpp b/clang/lib/Driver/ToolChains/SPIRV.cpp index 16e72d3c733f..50d03e79bbb0 100644 --- a/clang/lib/Driver/ToolChains/SPIRV.cpp +++ b/clang/lib/Driver/ToolChains/SPIRV.cpp @@ -13,6 +13,7 @@ #include "clang/Driver/Options.h" using namespace clang::driver; +using namespace clang::driver::toolchains; using namespace clang::driver::tools; using namespace llvm::opt; @@ -27,7 +28,7 @@ void SPIRV::constructTranslateCommand(Compilation &C, const Tool &T, if (Input.getType() == types::TY_PP_Asm) CmdArgs.push_back("-to-binary"); if (Output.getType() == types::TY_PP_Asm) - CmdArgs.push_back("-spirv-text"); + CmdArgs.push_back("--spirv-tools-dis"); CmdArgs.append({"-o", Output.getFilename()}); @@ -47,3 +48,25 @@ void SPIRV::Translator::ConstructJob(Compilation &C, const JobAction &JA, llvm_unreachable("Invalid number of input files."); constructTranslateCommand(C, *this, JA, Output, Inputs[0], {}); } + +clang::driver::Tool *SPIRVToolChain::getTranslator() const { + if (!Translator) + Translator = std::make_unique(*this); + return Translator.get(); +} + +clang::driver::Tool *SPIRVToolChain::SelectTool(const JobAction &JA) const { + Action::ActionClass AC = JA.getKind(); + return SPIRVToolChain::getTool(AC); +} + +clang::driver::Tool *SPIRVToolChain::getTool(Action::ActionClass AC) const { + switch (AC) { + default: + break; + case Action::BackendJobClass: + case Action::AssembleJobClass: + return SPIRVToolChain::getTranslator(); + } + return ToolChain::getTool(AC); +} diff --git a/clang/lib/Driver/ToolChains/SPIRV.h b/clang/lib/Driver/ToolChains/SPIRV.h index 35d0446bd8b8..229f7018e3b5 100644 --- a/clang/lib/Driver/ToolChains/SPIRV.h +++ b/clang/lib/Driver/ToolChains/SPIRV.h @@ -41,6 +41,39 @@ class LLVM_LIBRARY_VISIBILITY Translator : public Tool { } // namespace SPIRV } // namespace tools + +namespace toolchains { + +class LLVM_LIBRARY_VISIBILITY SPIRVToolChain final : public ToolChain { + mutable std::unique_ptr Translator; + +public: + SPIRVToolChain(const Driver &D, const llvm::Triple &Triple, + const llvm::opt::ArgList &Args) + : ToolChain(D, Triple, Args) {} + + bool useIntegratedAs() const override { return true; } + bool useIntegratedBackend() const override { return false; } + + bool IsMathErrnoDefault() const override { return false; } + bool isCrossCompiling() const override { return true; } + bool isPICDefault() const override { return false; } + bool isPIEDefault(const llvm::opt::ArgList &Args) const override { + return false; + } + bool isPICDefaultForced() const override { return false; } + bool SupportsProfiling() const override { return false; } + + clang::driver::Tool *SelectTool(const JobAction &JA) const override; + +protected: + clang::driver::Tool *getTool(Action::ActionClass AC) const override; + +private: + clang::driver::Tool *getTranslator() const; +}; + +} // namespace toolchains } // namespace driver } // namespace clang #endif diff --git a/clang/test/Driver/spirv-toolchain.cl b/clang/test/Driver/spirv-toolchain.cl new file mode 100644 index 000000000000..d2562c0b667d --- /dev/null +++ b/clang/test/Driver/spirv-toolchain.cl @@ -0,0 +1,65 @@ +// Check object emission. +// RUN: %clang -### -no-canonical-prefixes -target spirv64 -x cl -c %s 2>&1 | FileCheck --check-prefix=SPV64 %s +// RUN: %clang -### -target spirv64 %s 2>&1 | FileCheck --check-prefix=SPV64 %s +// RUN: %clang -### -no-canonical-prefixes -target spirv64 -x ir -c %s 2>&1 | FileCheck --check-prefix=SPV64 %s +// RUN: %clang -### -no-canonical-prefixes -target spirv64 -x clcpp -c %s 2>&1 | FileCheck --check-prefix=SPV64 %s +// RUN: %clang -### -no-canonical-prefixes -target spirv64 -x c -c %s 2>&1 | FileCheck --check-prefix=SPV64 %s + +// SPV64: clang{{.*}} "-cc1" "-triple" "spirv64" +// SPV64-SAME: "-o" [[BC:".*bc"]] +// SPV64: {{llvm-spirv.*"}} [[BC]] "-o" {{".*o"}} + +// RUN: %clang -### -no-canonical-prefixes -target spirv32 -x cl -c %s 2>&1 | FileCheck --check-prefix=SPV32 %s +// RUN: %clang -### -target spirv32 %s 2>&1 | FileCheck --check-prefix=SPV32 %s +// RUN: %clang -### -no-canonical-prefixes -target spirv32 -x ir -c %s 2>&1 | FileCheck --check-prefix=SPV32 %s +// RUN: %clang -### -no-canonical-prefixes -target spirv32 -x clcpp -c %s 2>&1 | FileCheck --check-prefix=SPV32 %s +// RUN: %clang -### -no-canonical-prefixes -target spirv32 -x c -c %s 2>&1 | FileCheck --check-prefix=SPV32 %s + +// SPV32: clang{{.*}} "-cc1" "-triple" "spirv32" +// SPV32-SAME: "-o" [[BC:".*bc"]] +// SPV32: {{llvm-spirv.*"}} [[BC]] "-o" {{".*o"}} + +//----------------------------------------------------------------------------- +// Check Assembly emission. +// RUN: %clang -### -no-canonical-prefixes -target spirv64 -x cl -S %s 2>&1 | FileCheck --check-prefix=SPT64 %s +// RUN: %clang -### -no-canonical-prefixes -target spirv64 -x ir -S %s 2>&1 | FileCheck --check-prefix=SPT64 %s +// RUN: %clang -### -no-canonical-prefixes -target spirv64 -x clcpp -c %s 2>&1 | FileCheck --check-prefix=SPV64 %s +// RUN: %clang -### -no-canonical-prefixes -target spirv64 -x c -S %s 2>&1 | FileCheck --check-prefix=SPT64 %s + +// SPT64: clang{{.*}} "-cc1" "-triple" "spirv64" +// SPT64-SAME: "-o" [[BC:".*bc"]] +// SPT64: {{llvm-spirv.*"}} [[BC]] "--spirv-tools-dis" "-o" {{".*s"}} + +// RUN: %clang -### -no-canonical-prefixes -target spirv32 -x cl -S %s 2>&1 | FileCheck --check-prefix=SPT32 %s +// RUN: %clang -### -no-canonical-prefixes -target spirv32 -x ir -S %s 2>&1 | FileCheck --check-prefix=SPT32 %s +// RUN: %clang -### -no-canonical-prefixes -target spirv32 -x clcpp -c %s 2>&1 | FileCheck --check-prefix=SPV32 %s +// RUN: %clang -### -no-canonical-prefixes -target spirv32 -x c -S %s 2>&1 | FileCheck --check-prefix=SPT32 %s + +// SPT32: clang{{.*}} "-cc1" "-triple" "spirv32" +// SPT32-SAME: "-o" [[BC:".*bc"]] +// SPT32: {{llvm-spirv.*"}} [[BC]] "--spirv-tools-dis" "-o" {{".*s"}} + +//----------------------------------------------------------------------------- +// Check assembly input -> object output +// RUN: %clang -### -no-canonical-prefixes -target spirv64 -x assembler -c %s 2>&1 | FileCheck --check-prefix=ASM %s +// RUN: %clang -### -no-canonical-prefixes -target spirv32 -x assembler -c %s 2>&1 | FileCheck --check-prefix=ASM %s +// ASM: {{llvm-spirv.*"}} {{".*"}} "-to-binary" "-o" {{".*o"}} + +//----------------------------------------------------------------------------- +// Check --save-temps. +// RUN: %clang -### -no-canonical-prefixes -target spirv64 -x cl -c %s --save-temps 2>&1 | FileCheck --check-prefix=TMP %s + +// TMP: clang{{.*}} "-cc1" "-triple" "spirv64" +// TMP-SAME: "-E" +// TMP-SAME: "-o" [[I:".*i"]] +// TMP: clang{{.*}} "-cc1" "-triple" "spirv64" +// TMP-SAME: "-o" [[BC:".*bc"]] +// TMP-SAME: [[I]] +// TMP: {{llvm-spirv.*"}} [[BC]] "--spirv-tools-dis" "-o" [[S:".*s"]] +// TMP: {{llvm-spirv.*"}} [[S]] "-to-binary" "-o" {{".*o"}} + +//----------------------------------------------------------------------------- +// Check that warning occurs if multiple input files are passed. +// RUN: %clang -### -target spirv64 %s %s 2>&1 | FileCheck --check-prefix=WARN %s + +// WARN: warning: Linking multiple input files is not supported for SPIR-V yet From fd3cde600ba6d3e9900b74d7022d1215571a6ee2 Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Thu, 23 Dec 2021 16:13:20 +0100 Subject: [PATCH 152/293] AMDGPU/GlobalISel: Fix attempt to select non-legal instr in mir test Delete inst-select-insert.xfail.mir. G_INSERT instructions in inst-select-insert.xfail.mir are no longer legal after D114198. This breaks build bots, since builds with LLVM_ENABLE_ASSERTIONS=Off don't check for legality and report cannot select while build with LLVM_ENABLE_ASSERTIONS=On reports instruction is not legal. --- .../GlobalISel/inst-select-insert.xfail.mir | 39 ------------------- 1 file changed, 39 deletions(-) delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir deleted file mode 100644 index 1720519b89e2..000000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir +++ /dev/null @@ -1,39 +0,0 @@ -# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s - -# ERR: remark: :0:0: instruction is not legal: %3:sgpr(<4 x s16>) = G_INSERT %0:sgpr, %2:sgpr(s16), 0 (in function: insert_sgpr_s16_to_v4s16_offset0) -# ERR-NEXT: :0:0: instruction is not legal: %2:sgpr(<16 x s32>) = G_INSERT %0:sgpr, %1:sgpr(<8 x s32>), 0 (in function: insert_sgpr_v8s32_to_v16s32_offset0) -# ERR-NOT: remark - -# FIXME: This 16-bit insert source should not be legal and this test -# should be deleted ---- -name: insert_sgpr_s16_to_v4s16_offset0 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1, $sgpr2 - %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 - %1:sgpr(s32) = COPY $sgpr2 - %2:sgpr(s16) = G_TRUNC %1 - %3:sgpr(<4 x s16>) = G_INSERT %0, %2, 0 - S_ENDPGM 0, implicit %3 - -... - -# getSubRegFromChannel current does not handle cases > 128-bits ---- -name: insert_sgpr_v8s32_to_v16s32_offset0 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 - %0:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 - %1:sgpr(<8 x s32>) = COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23 - %2:sgpr(<16 x s32>) = G_INSERT %0, %1, 0 - S_ENDPGM 0, implicit %2 - -... From e70ef6d9245e0f5497f34d9f10958a15be0cceed Mon Sep 17 00:00:00 2001 From: Alexandros Lamprineas Date: Thu, 23 Dec 2021 15:14:40 +0000 Subject: [PATCH 153/293] [AArch64] Add a tablegen pattern for SQXTN2. Converts concat_vectors(Vd, trunc(smin(smax Vm, -2^n), 2^n-1) to sqxtn2(Vd, Vm). Deliberately not handling v2i64 ~> v2i32 as the min/max nodes are not legal (same thing we did for the SQXTN patterns in https://reviews.llvm.org/D103263). Differential Revision: https://reviews.llvm.org/D116105 --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 26 +++++++++++++ .../CodeGen/AArch64/arm64-sqxtn2-combine.ll | 37 +++++++++++++++++++ llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll | 34 ++++++----------- 3 files changed, 75 insertions(+), 22 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/arm64-sqxtn2-combine.ll diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 727e16069fef..27013b51d966 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4378,6 +4378,32 @@ def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)), (v4i32 VImm8000)))), (SQXTNv4i16 V128:$Vn)>; +// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn) +// with reversed min/max +def : Pat<(v16i8 (concat_vectors + (v8i8 V64:$Vd), + (v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)), + (v8i16 VImm7F)))))), + (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; +def : Pat<(v16i8 (concat_vectors + (v8i8 V64:$Vd), + (v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)), + (v8i16 VImm80)))))), + (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; + +// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn) +// with reversed min/max +def : Pat<(v8i16 (concat_vectors + (v4i16 V64:$Vd), + (v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)), + (v4i32 VImm7FFF)))))), + (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; +def : Pat<(v8i16 (concat_vectors + (v4i16 V64:$Vd), + (v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)), + (v4i32 VImm8000)))))), + (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>; + //===----------------------------------------------------------------------===// // Advanced SIMD three vector instructions. //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/arm64-sqxtn2-combine.ll b/llvm/test/CodeGen/AArch64/arm64-sqxtn2-combine.ll new file mode 100644 index 000000000000..f09109c282a2 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-sqxtn2-combine.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple aarch64-none-linux-gnu | FileCheck %s + +; Test the (concat_vectors (X), (trunc(smin(smax(Y, -2^n), 2^n-1))) pattern. + +define <16 x i8> @test_combine_v8i16_to_v16i8(<8 x i8> %x, <8 x i16> %y) { +; CHECK-LABEL: test_combine_v8i16_to_v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: sqxtn2 v0.16b, v1.8h +; CHECK-NEXT: ret +entry: + %min = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %y, <8 x i16> ) + %max = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %min, <8 x i16> ) + %trunc = trunc <8 x i16> %max to <8 x i8> + %shuffle = shufflevector <8 x i8> %x, <8 x i8> %trunc, <16 x i32> + ret <16 x i8> %shuffle +} + +define <8 x i16> @test_combine_v4i32_to_v8i16(<4 x i16> %x, <4 x i32> %y) { +; CHECK-LABEL: test_combine_v4i32_to_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: sqxtn2 v0.8h, v1.4s +; CHECK-NEXT: ret +entry: + %max = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %y, <4 x i32> ) + %min = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %max, <4 x i32> ) + %trunc = trunc <4 x i32> %min to <4 x i16> + %shuffle = shufflevector <4 x i16> %x, <4 x i16> %trunc, <8 x i32> + ret <8 x i16> %shuffle +} + +declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll index 0030e2fa94b0..b66ba95daf80 100644 --- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll @@ -315,17 +315,12 @@ entry: define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-CVT-LABEL: stest_f16i16: ; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8 -; CHECK-CVT-NEXT: mvni v3.4s, #127, msl #8 -; CHECK-CVT-NEXT: fcvtzs v2.4s, v2.4s -; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-CVT-NEXT: smin v2.4s, v2.4s, v1.4s -; CHECK-CVT-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: smax v1.4s, v2.4s, v3.4s -; CHECK-CVT-NEXT: smax v0.4s, v0.4s, v3.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h +; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-CVT-NEXT: fcvtzs v2.4s, v0.4s +; CHECK-CVT-NEXT: sqxtn v0.4h, v1.4s +; CHECK-CVT-NEXT: sqxtn2 v0.8h, v2.4s ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: stest_f16i16: @@ -1028,17 +1023,12 @@ entry: define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-CVT-LABEL: stest_f16i16_mm: ; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8 -; CHECK-CVT-NEXT: mvni v3.4s, #127, msl #8 -; CHECK-CVT-NEXT: fcvtzs v2.4s, v2.4s -; CHECK-CVT-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-CVT-NEXT: smin v2.4s, v2.4s, v1.4s -; CHECK-CVT-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-CVT-NEXT: smax v1.4s, v2.4s, v3.4s -; CHECK-CVT-NEXT: smax v0.4s, v0.4s, v3.4s -; CHECK-CVT-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-CVT-NEXT: fcvtl v1.4s, v0.4h +; CHECK-CVT-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-CVT-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-CVT-NEXT: fcvtzs v2.4s, v0.4s +; CHECK-CVT-NEXT: sqxtn v0.4h, v1.4s +; CHECK-CVT-NEXT: sqxtn2 v0.8h, v2.4s ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: stest_f16i16_mm: From b7b260e19a7463464da64501a0991ea17e766a4b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 23 Dec 2021 08:57:51 -0600 Subject: [PATCH 154/293] [RISCV] Support strict FP conversion operations. This adds support for strict conversions between fp types and between integer and fp. NOTE: RISCV has static rounding mode instructions, but the constrainted intrinsic metadata is not used to select static rounding modes. Dynamic rounding mode is always used. Differential Revision: https://reviews.llvm.org/D115997 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 47 +- llvm/lib/Target/RISCV/RISCVISelLowering.h | 5 + llvm/lib/Target/RISCV/RISCVInstrInfoD.td | 28 +- llvm/lib/Target/RISCV/RISCVInstrInfoF.td | 50 +- llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td | 32 +- .../CodeGen/RISCV/double-convert-strict.ll | 850 ++++++++++++++++++ .../CodeGen/RISCV/float-convert-strict.ll | 719 +++++++++++++++ .../test/CodeGen/RISCV/half-convert-strict.ll | 712 +++++++++++++++ .../RISCV/rv64d-double-convert-strict.ll | 134 +++ .../RISCV/rv64f-float-convert-strict.ll | 134 +++ .../RISCV/rv64f-half-convert-strict.ll | 123 +++ 11 files changed, 2778 insertions(+), 56 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/double-convert-strict.ll create mode 100644 llvm/test/CodeGen/RISCV/float-convert-strict.ll create mode 100644 llvm/test/CodeGen/RISCV/half-convert-strict.ll create mode 100644 llvm/test/CodeGen/RISCV/rv64d-double-convert-strict.ll create mode 100644 llvm/test/CodeGen/RISCV/rv64f-float-convert-strict.ll create mode 100644 llvm/test/CodeGen/RISCV/rv64f-half-convert-strict.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 399be7abc9e6..3919aae25630 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -335,6 +335,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FSUB, MVT::f16, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::f16, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f16, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Legal); for (auto CC : FPCCToExpand) setCondCodeAction(CC, MVT::f16, Expand); @@ -405,6 +407,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal); setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal); for (auto CC : FPCCToExpand) setCondCodeAction(CC, MVT::f64, Expand); @@ -430,6 +434,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_TO_UINT_SAT, XLenVT, Custom); setOperationAction(ISD::FP_TO_SINT_SAT, XLenVT, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, XLenVT, Legal); + setOperationAction(ISD::STRICT_FP_TO_SINT, XLenVT, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, XLenVT, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, XLenVT, Legal); + setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom); setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); } @@ -5727,11 +5736,17 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != TargetLowering::TypeSoftenFloat) { - // FIXME: Support strict FP. - if (IsStrict) - return; if (!isTypeLegal(Op0.getValueType())) return; + if (IsStrict) { + unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RTZ_RV64 + : RISCVISD::STRICT_FCVT_WU_RTZ_RV64; + SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); + SDValue Res = DAG.getNode(Opc, DL, VTs, N->getOperand(0), Op0); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); + Results.push_back(Res.getValue(1)); + return; + } unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RTZ_RV64 : RISCVISD::FCVT_WU_RTZ_RV64; SDValue Res = DAG.getNode(Opc, DL, MVT::i64, Op0); @@ -7152,11 +7167,23 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during // type legalization. This is safe because fp_to_uint produces poison if // it overflows. - if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit() && - N->getOperand(0).getOpcode() == ISD::FP_TO_UINT && - isTypeLegal(N->getOperand(0).getOperand(0).getValueType())) - return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64, - N->getOperand(0).getOperand(0)); + if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) { + SDValue Src = N->getOperand(0); + if (Src.getOpcode() == ISD::FP_TO_UINT && + isTypeLegal(Src.getOperand(0).getValueType())) + return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64, + Src.getOperand(0)); + if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() && + isTypeLegal(Src.getOperand(1).getValueType())) { + SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); + SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs, + Src.getOperand(0), Src.getOperand(1)); + DCI.CombineTo(N, Res); + DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1)); + DCI.recursivelyDeleteUnusedNodes(Src.getNode()); + return SDValue(N, 0); // Return N so it doesn't get rechecked. + } + } return SDValue(); case RISCVISD::SELECT_CC: { // Transform @@ -7717,6 +7744,8 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( case RISCVISD::BDECOMPRESSW: case RISCVISD::FCVT_W_RTZ_RV64: case RISCVISD::FCVT_WU_RTZ_RV64: + case RISCVISD::STRICT_FCVT_W_RTZ_RV64: + case RISCVISD::STRICT_FCVT_WU_RTZ_RV64: // TODO: As the result is sign-extended, this is conservatively correct. A // more precise answer could be calculated for SRAW depending on known // bits in the shift amount. @@ -9448,6 +9477,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(FCVT_XU_RTZ) NODE_NAME_CASE(FCVT_W_RTZ_RV64) NODE_NAME_CASE(FCVT_WU_RTZ_RV64) + NODE_NAME_CASE(STRICT_FCVT_W_RTZ_RV64) + NODE_NAME_CASE(STRICT_FCVT_WU_RTZ_RV64) NODE_NAME_CASE(READ_CYCLE_WIDE) NODE_NAME_CASE(GREV) NODE_NAME_CASE(GREVW) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index a2d3c74ed5fd..794ec98bd38d 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -282,6 +282,11 @@ enum NodeType : unsigned { // the value read before the modification and the new chain pointer. SWAP_CSR, + // FP to 32 bit int conversions for RV64. These are used to keep track of the + // result being sign extended to 64 bit. These saturate out of range inputs. + STRICT_FCVT_W_RTZ_RV64 = ISD::FIRST_TARGET_STRICTFP_OPCODE, + STRICT_FCVT_WU_RTZ_RV64, + // Memory opcodes start here. VLE_VL = ISD::FIRST_TARGET_MEMORY_OPCODE, VSE_VL, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index f328972e75c5..d6c31c4804db 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -184,8 +184,8 @@ let Predicates = [HasStdExtD] in { /// Float conversion operations // f64 -> f32, f32 -> f64 -def : Pat<(fpround FPR64:$rs1), (FCVT_S_D FPR64:$rs1, 0b111)>; -def : Pat<(fpextend FPR32:$rs1), (FCVT_D_S FPR32:$rs1)>; +def : Pat<(any_fpround FPR64:$rs1), (FCVT_S_D FPR64:$rs1, 0b111)>; +def : Pat<(any_fpextend FPR32:$rs1), (FCVT_D_S FPR32:$rs1)>; // [u]int<->double conversion patterns must be gated on IsRV32 or IsRV64, so // are defined later. @@ -271,8 +271,8 @@ let Predicates = [HasStdExtD, IsRV32] in { def : Pat<(f64 (fpimm0)), (FCVT_D_W (i32 X0))>; // double->[u]int. Round-to-zero must be used. -def : Pat<(i32 (fp_to_sint FPR64:$rs1)), (FCVT_W_D FPR64:$rs1, 0b001)>; -def : Pat<(i32 (fp_to_uint FPR64:$rs1)), (FCVT_WU_D FPR64:$rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_sint FPR64:$rs1)), (FCVT_W_D FPR64:$rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_uint FPR64:$rs1)), (FCVT_WU_D FPR64:$rs1, 0b001)>; // Saturating double->[u]int32. def : Pat<(i32 (riscv_fcvt_x_rtz FPR64:$rs1)), (FCVT_W_D $rs1, 0b001)>; @@ -285,8 +285,8 @@ def : Pat<(i32 (lrint FPR64:$rs1)), (FCVT_W_D $rs1, 0b111)>; def : Pat<(i32 (lround FPR64:$rs1)), (FCVT_W_D $rs1, 0b100)>; // [u]int->double. -def : Pat<(sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W GPR:$rs1)>; -def : Pat<(uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU GPR:$rs1)>; +def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W GPR:$rs1)>; +def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU GPR:$rs1)>; } // Predicates = [HasStdExtD, IsRV32] let Predicates = [HasStdExtD, IsRV64] in { @@ -301,20 +301,20 @@ def : Pat<(i64 (bitconvert FPR64:$rs1)), (FMV_X_D FPR64:$rs1)>; // Use target specific isd nodes to help us remember the result is sign // extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be // duplicated if it has another user that didn't need the sign_extend. -def : Pat<(riscv_fcvt_w_rtz_rv64 FPR64:$rs1), (FCVT_W_D $rs1, 0b001)>; -def : Pat<(riscv_fcvt_wu_rtz_rv64 FPR64:$rs1), (FCVT_WU_D $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_w_rtz_rv64 FPR64:$rs1), (FCVT_W_D $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_wu_rtz_rv64 FPR64:$rs1), (FCVT_WU_D $rs1, 0b001)>; // [u]int32->fp -def : Pat<(sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W $rs1)>; -def : Pat<(uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU $rs1)>; +def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W $rs1)>; +def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU $rs1)>; // Saturating double->[u]int64. def : Pat<(i64 (riscv_fcvt_x_rtz FPR64:$rs1)), (FCVT_L_D $rs1, 0b001)>; def : Pat<(i64 (riscv_fcvt_xu_rtz FPR64:$rs1)), (FCVT_LU_D $rs1, 0b001)>; // double->[u]int64. Round-to-zero must be used. -def : Pat<(i64 (fp_to_sint FPR64:$rs1)), (FCVT_L_D FPR64:$rs1, 0b001)>; -def : Pat<(i64 (fp_to_uint FPR64:$rs1)), (FCVT_LU_D FPR64:$rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_sint FPR64:$rs1)), (FCVT_L_D FPR64:$rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_uint FPR64:$rs1)), (FCVT_LU_D FPR64:$rs1, 0b001)>; // double->int64 with current rounding mode. def : Pat<(i64 (lrint FPR64:$rs1)), (FCVT_L_D $rs1, 0b111)>; @@ -325,6 +325,6 @@ def : Pat<(i64 (lround FPR64:$rs1)), (FCVT_L_D $rs1, 0b100)>; def : Pat<(i64 (llround FPR64:$rs1)), (FCVT_L_D $rs1, 0b100)>; // [u]int64->fp. Match GCC and default to using dynamic rounding mode. -def : Pat<(sint_to_fp (i64 GPR:$rs1)), (FCVT_D_L GPR:$rs1, 0b111)>; -def : Pat<(uint_to_fp (i64 GPR:$rs1)), (FCVT_D_LU GPR:$rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_D_L GPR:$rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_D_LU GPR:$rs1, 0b111)>; } // Predicates = [HasStdExtD, IsRV64] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index b8f71d753614..bb45ed859442 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -19,9 +19,9 @@ def SDT_RISCVFMV_W_X_RV64 : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i64>]>; def SDT_RISCVFMV_X_ANYEXTW_RV64 : SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>; -def STD_RISCVFCVT_W_RV64 +def SDT_RISCVFCVT_W_RV64 : SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisFP<1>]>; -def STD_RISCVFCVT_X +def SDT_RISCVFCVT_X : SDTypeProfile<1, 1, [SDTCisVT<0, XLenVT>, SDTCisFP<1>]>; def riscv_fmv_w_x_rv64 @@ -29,13 +29,27 @@ def riscv_fmv_w_x_rv64 def riscv_fmv_x_anyextw_rv64 : SDNode<"RISCVISD::FMV_X_ANYEXTW_RV64", SDT_RISCVFMV_X_ANYEXTW_RV64>; def riscv_fcvt_w_rtz_rv64 - : SDNode<"RISCVISD::FCVT_W_RTZ_RV64", STD_RISCVFCVT_W_RV64>; + : SDNode<"RISCVISD::FCVT_W_RTZ_RV64", SDT_RISCVFCVT_W_RV64>; def riscv_fcvt_wu_rtz_rv64 - : SDNode<"RISCVISD::FCVT_WU_RTZ_RV64", STD_RISCVFCVT_W_RV64>; + : SDNode<"RISCVISD::FCVT_WU_RTZ_RV64", SDT_RISCVFCVT_W_RV64>; def riscv_fcvt_x_rtz - : SDNode<"RISCVISD::FCVT_X_RTZ", STD_RISCVFCVT_X>; + : SDNode<"RISCVISD::FCVT_X_RTZ", SDT_RISCVFCVT_X>; def riscv_fcvt_xu_rtz - : SDNode<"RISCVISD::FCVT_XU_RTZ", STD_RISCVFCVT_X>; + : SDNode<"RISCVISD::FCVT_XU_RTZ", SDT_RISCVFCVT_X>; + +def riscv_strict_fcvt_w_rtz_rv64 + : SDNode<"RISCVISD::STRICT_FCVT_W_RTZ_RV64", SDT_RISCVFCVT_W_RV64, + [SDNPHasChain]>; +def riscv_strict_fcvt_wu_rtz_rv64 + : SDNode<"RISCVISD::STRICT_FCVT_WU_RTZ_RV64", SDT_RISCVFCVT_W_RV64, + [SDNPHasChain]>; + +def riscv_any_fcvt_w_rtz_rv64 : PatFrags<(ops node:$src), + [(riscv_strict_fcvt_w_rtz_rv64 node:$src), + (riscv_fcvt_w_rtz_rv64 node:$src)]>; +def riscv_any_fcvt_wu_rtz_rv64 : PatFrags<(ops node:$src), + [(riscv_strict_fcvt_wu_rtz_rv64 node:$src), + (riscv_fcvt_wu_rtz_rv64 node:$src)]>; //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. @@ -375,8 +389,8 @@ def : Pat<(bitconvert (i32 GPR:$rs1)), (FMV_W_X GPR:$rs1)>; def : Pat<(i32 (bitconvert FPR32:$rs1)), (FMV_X_W FPR32:$rs1)>; // float->[u]int. Round-to-zero must be used. -def : Pat<(i32 (fp_to_sint FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>; -def : Pat<(i32 (fp_to_uint FPR32:$rs1)), (FCVT_WU_S $rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_sint FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_uint FPR32:$rs1)), (FCVT_WU_S $rs1, 0b001)>; // Saturating float->[u]int32. def : Pat<(i32 (riscv_fcvt_x_rtz FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>; @@ -389,8 +403,8 @@ def : Pat<(i32 (lrint FPR32:$rs1)), (FCVT_W_S $rs1, 0b111)>; def : Pat<(i32 (lround FPR32:$rs1)), (FCVT_W_S $rs1, 0b100)>; // [u]int->float. Match GCC and default to using dynamic rounding mode. -def : Pat<(sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W $rs1, 0b111)>; -def : Pat<(uint_to_fp (i32 GPR:$rs1)), (FCVT_S_WU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_S_WU $rs1, 0b111)>; } // Predicates = [HasStdExtF, IsRV32] let Predicates = [HasStdExtF, IsRV64] in { @@ -403,12 +417,12 @@ def : Pat<(sext_inreg (riscv_fmv_x_anyextw_rv64 FPR32:$src), i32), // Use target specific isd nodes to help us remember the result is sign // extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be // duplicated if it has another user that didn't need the sign_extend. -def : Pat<(riscv_fcvt_w_rtz_rv64 FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>; -def : Pat<(riscv_fcvt_wu_rtz_rv64 FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_w_rtz_rv64 FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_wu_rtz_rv64 FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>; // float->[u]int64. Round-to-zero must be used. -def : Pat<(i64 (fp_to_sint FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>; -def : Pat<(i64 (fp_to_uint FPR32:$rs1)), (FCVT_LU_S $rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_sint FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_uint FPR32:$rs1)), (FCVT_LU_S $rs1, 0b001)>; // Saturating float->[u]int64. def : Pat<(i64 (riscv_fcvt_x_rtz FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>; @@ -423,8 +437,8 @@ def : Pat<(i64 (lround FPR32:$rs1)), (FCVT_L_S $rs1, 0b100)>; def : Pat<(i64 (llround FPR32:$rs1)), (FCVT_L_S $rs1, 0b100)>; // [u]int->fp. Match GCC and default to using dynamic rounding mode. -def : Pat<(sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_S_W $rs1, 0b111)>; -def : Pat<(uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_S_WU $rs1, 0b111)>; -def : Pat<(sint_to_fp (i64 GPR:$rs1)), (FCVT_S_L $rs1, 0b111)>; -def : Pat<(uint_to_fp (i64 GPR:$rs1)), (FCVT_S_LU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_S_W $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_S_WU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_S_L $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_S_LU $rs1, 0b111)>; } // Predicates = [HasStdExtF, IsRV64] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index 76e0ea6f093b..663e44813899 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -277,8 +277,8 @@ defm : StPat; /// Float conversion operations // f32 -> f16, f16 -> f32 -def : Pat<(fpround FPR32:$rs1), (FCVT_H_S FPR32:$rs1, 0b111)>; -def : Pat<(fpextend FPR16:$rs1), (FCVT_S_H FPR16:$rs1)>; +def : Pat<(any_fpround FPR32:$rs1), (FCVT_H_S FPR32:$rs1, 0b111)>; +def : Pat<(any_fpextend FPR16:$rs1), (FCVT_S_H FPR16:$rs1)>; // Moves (no conversion) def : Pat<(riscv_fmv_h_x GPR:$src), (FMV_H_X GPR:$src)>; @@ -287,8 +287,8 @@ def : Pat<(riscv_fmv_x_anyexth FPR16:$src), (FMV_X_H FPR16:$src)>; let Predicates = [HasStdExtZfh, IsRV32] in { // half->[u]int. Round-to-zero must be used. -def : Pat<(i32 (fp_to_sint FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>; -def : Pat<(i32 (fp_to_uint FPR16:$rs1)), (FCVT_WU_H $rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_sint FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_uint FPR16:$rs1)), (FCVT_WU_H $rs1, 0b001)>; // Saturating float->[u]int32. def : Pat<(i32 (riscv_fcvt_x_rtz FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>; @@ -301,20 +301,20 @@ def : Pat<(i32 (lrint FPR16:$rs1)), (FCVT_W_H $rs1, 0b111)>; def : Pat<(i32 (lround FPR16:$rs1)), (FCVT_W_H $rs1, 0b100)>; // [u]int->half. Match GCC and default to using dynamic rounding mode. -def : Pat<(sint_to_fp (i32 GPR:$rs1)), (FCVT_H_W $rs1, 0b111)>; -def : Pat<(uint_to_fp (i32 GPR:$rs1)), (FCVT_H_WU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_H_W $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_H_WU $rs1, 0b111)>; } // Predicates = [HasStdExtZfh, IsRV32] let Predicates = [HasStdExtZfh, IsRV64] in { // Use target specific isd nodes to help us remember the result is sign // extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be // duplicated if it has another user that didn't need the sign_extend. -def : Pat<(riscv_fcvt_w_rtz_rv64 FPR16:$rs1), (FCVT_W_H $rs1, 0b001)>; -def : Pat<(riscv_fcvt_wu_rtz_rv64 FPR16:$rs1), (FCVT_WU_H $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_w_rtz_rv64 FPR16:$rs1), (FCVT_W_H $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_wu_rtz_rv64 FPR16:$rs1), (FCVT_WU_H $rs1, 0b001)>; // half->[u]int64. Round-to-zero must be used. -def : Pat<(i64 (fp_to_sint FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>; -def : Pat<(i64 (fp_to_uint FPR16:$rs1)), (FCVT_LU_H $rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_sint FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_uint FPR16:$rs1)), (FCVT_LU_H $rs1, 0b001)>; // Saturating float->[u]int64. def : Pat<(i64 (riscv_fcvt_x_rtz FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>; @@ -329,17 +329,17 @@ def : Pat<(i64 (lround FPR16:$rs1)), (FCVT_L_H $rs1, 0b100)>; def : Pat<(i64 (llround FPR16:$rs1)), (FCVT_L_H $rs1, 0b100)>; // [u]int->fp. Match GCC and default to using dynamic rounding mode. -def : Pat<(sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_H_W $rs1, 0b111)>; -def : Pat<(uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_H_WU $rs1, 0b111)>; -def : Pat<(sint_to_fp (i64 GPR:$rs1)), (FCVT_H_L $rs1, 0b111)>; -def : Pat<(uint_to_fp (i64 GPR:$rs1)), (FCVT_H_LU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_H_W $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_H_WU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_H_L $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_H_LU $rs1, 0b111)>; } // Predicates = [HasStdExtZfh, IsRV64] let Predicates = [HasStdExtZfhmin, HasStdExtD] in { /// Float conversion operations // f64 -> f16, f16 -> f64 -def : Pat<(fpround FPR64:$rs1), (FCVT_H_D FPR64:$rs1, 0b111)>; -def : Pat<(fpextend FPR16:$rs1), (FCVT_D_H FPR16:$rs1)>; +def : Pat<(any_fpround FPR64:$rs1), (FCVT_H_D FPR64:$rs1, 0b111)>; +def : Pat<(any_fpextend FPR16:$rs1), (FCVT_D_H FPR16:$rs1)>; /// Float arithmetic operations def : Pat<(fcopysign FPR16:$rs1, FPR64:$rs2), diff --git a/llvm/test/CodeGen/RISCV/double-convert-strict.ll b/llvm/test/CodeGen/RISCV/double-convert-strict.ll new file mode 100644 index 000000000000..1d300697ca5c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/double-convert-strict.ll @@ -0,0 +1,850 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV32IFD %s +; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV64IFD %s +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV64I %s + +; NOTE: The rounding mode metadata does not effect which instruction is +; selected. Dynamic rounding mode is always used for operations that +; support rounding mode. + +define float @fcvt_s_d(double %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_s_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: fcvt.s.d ft0, ft0 +; RV32IFD-NEXT: fmv.x.w a0, ft0 +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_s_d: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.s.d ft0, ft0 +; RV64IFD-NEXT: fmv.x.w a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_s_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __truncdfsf2@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __truncdfsf2@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret float %1 +} +declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata) + +define double @fcvt_d_s(float %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_s: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: fmv.w.x ft0, a0 +; RV32IFD-NEXT: fcvt.d.s ft0, ft0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_s: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.w.x ft0, a0 +; RV64IFD-NEXT: fcvt.d.s ft0, ft0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __extendsfdf2@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __extendsfdf2@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.fpext.f64.f32(float %a, metadata !"fpexcept.strict") + ret double %1 +} +declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) + +define i32 @fcvt_w_d(double %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_w_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_w_d: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.w.d a0, ft0, rtz +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_w_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixdfsi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_w_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixdfsi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) + +; For RV64D, fcvt.lu.d is semantically equivalent to fcvt.wu.d in this case +; because fptosi will produce poison if the result doesn't fit into an i32. +define i32 @fcvt_wu_d(double %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_wu_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: fcvt.wu.d a0, ft0, rtz +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_wu_d: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.wu.d a0, ft0, rtz +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_wu_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixunsdfsi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_wu_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixunsdfsi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) + +; Test where the fptoui has multiple uses, one of which causes a sext to be +; inserted on RV64. +define i32 @fcvt_wu_d_multiple_use(double %x, i32* %y) { +; RV32IFD-LABEL: fcvt_wu_d_multiple_use: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: .cfi_def_cfa_offset 16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: fcvt.wu.d a1, ft0, rtz +; RV32IFD-NEXT: li a0, 1 +; RV32IFD-NEXT: beqz a1, .LBB4_2 +; RV32IFD-NEXT: # %bb.1: +; RV32IFD-NEXT: mv a0, a1 +; RV32IFD-NEXT: .LBB4_2: +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_wu_d_multiple_use: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.wu.d a1, ft0, rtz +; RV64IFD-NEXT: li a0, 1 +; RV64IFD-NEXT: beqz a1, .LBB4_2 +; RV64IFD-NEXT: # %bb.1: +; RV64IFD-NEXT: mv a0, a1 +; RV64IFD-NEXT: .LBB4_2: +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_wu_d_multiple_use: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: call __fixunsdfsi@plt +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: beqz a1, .LBB4_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: .LBB4_2: +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_wu_d_multiple_use: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: call __fixunsdfsi@plt +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: beqz a1, .LBB4_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB4_2: +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %a = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x, metadata !"fpexcept.strict") strictfp + %b = icmp eq i32 %a, 0 + %c = select i1 %b, i32 1, i32 %a + ret i32 %c +} + +define double @fcvt_d_w(i32 %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_w: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: fcvt.d.w ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_w: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.w ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_w: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_w: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: call __floatsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata) + +define double @fcvt_d_w_load(i32* %p) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_w_load: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: lw a0, 0(a0) +; RV32IFD-NEXT: fcvt.d.w ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_w_load: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: lw a0, 0(a0) +; RV64IFD-NEXT: fcvt.d.w ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_w_load: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: call __floatsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_w_load: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: call __floatsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %a = load i32, i32* %p + %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +define double @fcvt_d_wu(i32 %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_wu: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: fcvt.d.wu ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_wu: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.wu ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_wu: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_wu: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: call __floatunsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata) + +define double @fcvt_d_wu_load(i32* %p) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_wu_load: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: lw a0, 0(a0) +; RV32IFD-NEXT: fcvt.d.wu ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_wu_load: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: lwu a0, 0(a0) +; RV64IFD-NEXT: fcvt.d.wu ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_wu_load: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: call __floatunsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_wu_load: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: call __floatunsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %a = load i32, i32* %p + %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +define i64 @fcvt_l_d(double %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_l_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call __fixdfdi@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_l_d: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_l_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixdfdi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_l_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixdfdi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i64 %1 +} +declare i64 @llvm.experimental.constrained.fptosi.i64.f64(double, metadata) + +define i64 @fcvt_lu_d(double %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_lu_d: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call __fixunsdfdi@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_lu_d: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: fcvt.lu.d a0, ft0, rtz +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_lu_d: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixunsdfdi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_lu_d: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixunsdfdi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i64 %1 +} +declare i64 @llvm.experimental.constrained.fptoui.i64.f64(double, metadata) + +define double @fcvt_d_l(i64 %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_l: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call __floatdidf@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_l: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.l ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_l: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatdidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_l: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatdidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.sitofp.f64.i64(i64, metadata, metadata) + +define double @fcvt_d_lu(i64 %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_lu: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IFD-NEXT: call __floatundidf@plt +; RV32IFD-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_lu: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.lu ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_lu: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatundidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_lu: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatundidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata) + +define double @fcvt_d_w_i8(i8 signext %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_w_i8: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: fcvt.d.w ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_w_i8: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.w ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_w_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_w_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.sitofp.f64.i8(i8 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.sitofp.f64.i8(i8, metadata, metadata) + +define double @fcvt_d_wu_i8(i8 zeroext %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_wu_i8: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: fcvt.d.wu ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_wu_i8: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.wu ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_wu_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_wu_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatunsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.uitofp.f64.i8(i8 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.uitofp.f64.i8(i8, metadata, metadata) + +define double @fcvt_d_w_i16(i16 signext %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_w_i16: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: fcvt.d.w ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_w_i16: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.w ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_w_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_w_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.sitofp.f64.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.sitofp.f64.i16(i16, metadata, metadata) + +define double @fcvt_d_wu_i16(i16 zeroext %a) nounwind strictfp { +; RV32IFD-LABEL: fcvt_d_wu_i16: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: fcvt.d.wu ft0, a0 +; RV32IFD-NEXT: fsd ft0, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_wu_i16: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.wu ft0, a0 +; RV64IFD-NEXT: fmv.x.d a0, ft0 +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_wu_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsidf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_wu_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatunsidf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call double @llvm.experimental.constrained.uitofp.f64.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.uitofp.f64.i16(i16, metadata, metadata) + +; Make sure we select W version of addi on RV64. +define signext i32 @fcvt_d_w_demanded_bits(i32 signext %0, double* %1) { +; RV32IFD-LABEL: fcvt_d_w_demanded_bits: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi a0, a0, 1 +; RV32IFD-NEXT: fcvt.d.w ft0, a0 +; RV32IFD-NEXT: fsd ft0, 0(a1) +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_w_demanded_bits: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addiw a0, a0, 1 +; RV64IFD-NEXT: fcvt.d.w ft0, a0 +; RV64IFD-NEXT: fsd ft0, 0(a1) +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_w_demanded_bits: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: addi s1, a0, 1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __floatsidf@plt +; RV32I-NEXT: sw a1, 4(s0) +; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_w_demanded_bits: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: .cfi_def_cfa_offset 32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: addiw s1, a0, 1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __floatsidf@plt +; RV64I-NEXT: sd a0, 0(s0) +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %3 = add i32 %0, 1 + %4 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %3, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + store double %4, double* %1, align 8 + ret i32 %3 +} + +; Make sure we select W version of addi on RV64. +define signext i32 @fcvt_d_wu_demanded_bits(i32 signext %0, double* %1) { +; RV32IFD-LABEL: fcvt_d_wu_demanded_bits: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: addi a0, a0, 1 +; RV32IFD-NEXT: fcvt.d.wu ft0, a0 +; RV32IFD-NEXT: fsd ft0, 0(a1) +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fcvt_d_wu_demanded_bits: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: addiw a0, a0, 1 +; RV64IFD-NEXT: fcvt.d.wu ft0, a0 +; RV64IFD-NEXT: fsd ft0, 0(a1) +; RV64IFD-NEXT: ret +; +; RV32I-LABEL: fcvt_d_wu_demanded_bits: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: addi s1, a0, 1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __floatunsidf@plt +; RV32I-NEXT: sw a1, 4(s0) +; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_d_wu_demanded_bits: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: .cfi_def_cfa_offset 32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: addiw s1, a0, 1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __floatunsidf@plt +; RV64I-NEXT: sd a0, 0(s0) +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %3 = add i32 %0, 1 + %4 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %3, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + store double %4, double* %1, align 8 + ret i32 %3 +} diff --git a/llvm/test/CodeGen/RISCV/float-convert-strict.ll b/llvm/test/CodeGen/RISCV/float-convert-strict.ll new file mode 100644 index 000000000000..bd0ab2249fb8 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/float-convert-strict.ll @@ -0,0 +1,719 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV32IF %s +; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV64IF %s +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck -check-prefix=RV64I %s + +; NOTE: The rounding mode metadata does not effect which instruction is +; selected. Dynamic rounding mode is always used for operations that +; support rounding mode. + +define i32 @fcvt_w_s(float %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_w_s: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: fcvt.w.s a0, ft0, rtz +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_w_s: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_w_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixsfsi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_w_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixsfsi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptosi.i32.f32(float, metadata) + +define i32 @fcvt_wu_s(float %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_wu_s: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: fcvt.wu.s a0, ft0, rtz +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_wu_s: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.wu.s a0, ft0, rtz +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_wu_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixunssfsi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_wu_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixunssfsi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata) + +; Test where the fptoui has multiple uses, one of which causes a sext to be +; inserted on RV64. +define i32 @fcvt_wu_s_multiple_use(float %x, i32* %y) { +; RV32IF-LABEL: fcvt_wu_s_multiple_use: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fmv.w.x ft0, a0 +; RV32IF-NEXT: fcvt.wu.s a1, ft0, rtz +; RV32IF-NEXT: li a0, 1 +; RV32IF-NEXT: beqz a1, .LBB2_2 +; RV32IF-NEXT: # %bb.1: +; RV32IF-NEXT: mv a0, a1 +; RV32IF-NEXT: .LBB2_2: +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_wu_s_multiple_use: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.wu.s a1, ft0, rtz +; RV64IF-NEXT: li a0, 1 +; RV64IF-NEXT: beqz a1, .LBB2_2 +; RV64IF-NEXT: # %bb.1: +; RV64IF-NEXT: mv a0, a1 +; RV64IF-NEXT: .LBB2_2: +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_wu_s_multiple_use: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: call __fixunssfsi@plt +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: beqz a1, .LBB2_2 +; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: .LBB2_2: +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_wu_s_multiple_use: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: .cfi_def_cfa_offset 16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: call __fixunssfsi@plt +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: beqz a1, .LBB2_2 +; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: .LBB2_2: +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %a = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") strictfp + %b = icmp eq i32 %a, 0 + %c = select i1 %b, i32 1, i32 %a + ret i32 %c +} + +define float @fcvt_s_w(i32 %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_w: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_w: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_w: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_w: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: call __floatsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata) + +define float @fcvt_s_w_load(i32* %p) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_w_load: +; RV32IF: # %bb.0: +; RV32IF-NEXT: lw a0, 0(a0) +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_w_load: +; RV64IF: # %bb.0: +; RV64IF-NEXT: lw a0, 0(a0) +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_w_load: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: call __floatsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_w_load: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: call __floatsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %a = load i32, i32* %p + %1 = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +define float @fcvt_s_wu(i32 %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_wu: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fcvt.s.wu ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_wu: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_wu: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_wu: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: call __floatunsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata, metadata) + +define float @fcvt_s_wu_load(i32* %p) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_wu_load: +; RV32IF: # %bb.0: +; RV32IF-NEXT: lw a0, 0(a0) +; RV32IF-NEXT: fcvt.s.wu ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_wu_load: +; RV64IF: # %bb.0: +; RV64IF-NEXT: lwu a0, 0(a0) +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_wu_load: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: call __floatunsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_wu_load: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: lw a0, 0(a0) +; RV64I-NEXT: call __floatunsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %a = load i32, i32* %p + %1 = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +define i64 @fcvt_l_s(float %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_l_s: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call __fixsfdi@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_l_s: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.l.s a0, ft0, rtz +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_l_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixsfdi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_l_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixsfdi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i64 %1 +} +declare i64 @llvm.experimental.constrained.fptosi.i64.f32(float, metadata) + +define i64 @fcvt_lu_s(float %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_lu_s: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call __fixunssfdi@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_lu_s: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_lu_s: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __fixunssfdi@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_lu_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __fixunssfdi@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i64 %1 +} +declare i64 @llvm.experimental.constrained.fptoui.i64.f32(float, metadata) + +define float @fcvt_s_l(i64 %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_l: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call __floatdisf@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_l: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.l ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_l: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatdisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_l: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatdisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.sitofp.f32.i64(i64 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.sitofp.f32.i64(i64, metadata, metadata) + +define float @fcvt_s_lu(i64 %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_lu: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: call __floatundisf@plt +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_lu: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.lu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_lu: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatundisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_lu: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatundisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.uitofp.f32.i64(i64 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.uitofp.f32.i64(i64, metadata, metadata) + +define float @fcvt_s_w_i8(i8 signext %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_w_i8: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_w_i8: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_w_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_w_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.sitofp.f32.i8(i8 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.sitofp.f32.i8(i8, metadata, metadata) + +define float @fcvt_s_wu_i8(i8 zeroext %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_wu_i8: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fcvt.s.wu ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_wu_i8: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_wu_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_wu_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatunsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.uitofp.f32.i8(i8 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret float %1 +} +declare float @llvm.experimental.constrained.uitofp.f32.i8(i8, metadata, metadata) + +define float @fcvt_s_w_i16(i16 signext %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_w_i16: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_w_i16: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_w_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_w_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.sitofp.f32.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.sitofp.f32.i16(i16, metadata, metadata) + +define float @fcvt_s_wu_i16(i16 zeroext %a) nounwind strictfp { +; RV32IF-LABEL: fcvt_s_wu_i16: +; RV32IF: # %bb.0: +; RV32IF-NEXT: fcvt.s.wu ft0, a0 +; RV32IF-NEXT: fmv.x.w a0, ft0 +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_wu_i16: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_wu_i16: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: call __floatunsisf@plt +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_wu_i16: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: call __floatunsisf@plt +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.experimental.constrained.uitofp.f32.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.uitofp.f32.i16(i16, metadata, metadata) + +; Make sure we select W version of addi on RV64. +define signext i32 @fcvt_s_w_demanded_bits(i32 signext %0, float* %1) { +; RV32IF-LABEL: fcvt_s_w_demanded_bits: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi a0, a0, 1 +; RV32IF-NEXT: fcvt.s.w ft0, a0 +; RV32IF-NEXT: fsw ft0, 0(a1) +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_w_demanded_bits: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addiw a0, a0, 1 +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fsw ft0, 0(a1) +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_w_demanded_bits: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: addi s1, a0, 1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __floatsisf@plt +; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_w_demanded_bits: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: .cfi_def_cfa_offset 32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: addiw s1, a0, 1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __floatsisf@plt +; RV64I-NEXT: sw a0, 0(s0) +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %3 = add i32 %0, 1 + %4 = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %3, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + store float %4, float* %1, align 4 + ret i32 %3 +} + +; Make sure we select W version of addi on RV64. +define signext i32 @fcvt_s_wu_demanded_bits(i32 signext %0, float* %1) { +; RV32IF-LABEL: fcvt_s_wu_demanded_bits: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi a0, a0, 1 +; RV32IF-NEXT: fcvt.s.wu ft0, a0 +; RV32IF-NEXT: fsw ft0, 0(a1) +; RV32IF-NEXT: ret +; +; RV64IF-LABEL: fcvt_s_wu_demanded_bits: +; RV64IF: # %bb.0: +; RV64IF-NEXT: addiw a0, a0, 1 +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fsw ft0, 0(a1) +; RV64IF-NEXT: ret +; +; RV32I-LABEL: fcvt_s_wu_demanded_bits: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset ra, -4 +; RV32I-NEXT: .cfi_offset s0, -8 +; RV32I-NEXT: .cfi_offset s1, -12 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: addi s1, a0, 1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: call __floatunsisf@plt +; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fcvt_s_wu_demanded_bits: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: .cfi_def_cfa_offset 32 +; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset ra, -8 +; RV64I-NEXT: .cfi_offset s0, -16 +; RV64I-NEXT: .cfi_offset s1, -24 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: addiw s1, a0, 1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: call __floatunsisf@plt +; RV64I-NEXT: sw a0, 0(s0) +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %3 = add i32 %0, 1 + %4 = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %3, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + store float %4, float* %1, align 4 + ret i32 %3 +} diff --git a/llvm/test/CodeGen/RISCV/half-convert-strict.ll b/llvm/test/CodeGen/RISCV/half-convert-strict.ll new file mode 100644 index 000000000000..bb8771ea2d07 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/half-convert-strict.ll @@ -0,0 +1,712 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi ilp32f -disable-strictnode-mutation < %s \ +; RUN: | FileCheck -check-prefix=RV32IZFH %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi lp64f -disable-strictnode-mutation < %s \ +; RUN: | FileCheck -check-prefix=RV64IZFH %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi ilp32d -disable-strictnode-mutation < %s \ +; RUN: | FileCheck -check-prefix=RV32IDZFH %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi lp64d -disable-strictnode-mutation < %s \ +; RUN: | FileCheck -check-prefix=RV64IDZFH %s + +; NOTE: The rounding mode metadata does not effect which instruction is +; selected. Dynamic rounding mode is always used for operations that +; support rounding mode. + +define i16 @fcvt_si_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_si_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_si_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_si_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_si_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = call i16 @llvm.experimental.constrained.fptosi.i16.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i16 %1 +} +declare i16 @llvm.experimental.constrained.fptosi.i16.f16(half, metadata) + +define i16 @fcvt_ui_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_ui_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_ui_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_ui_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_ui_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = call i16 @llvm.experimental.constrained.fptoui.i16.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i16 %1 +} +declare i16 @llvm.experimental.constrained.fptoui.i16.f16(half, metadata) + +define i32 @fcvt_w_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_w_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_w_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_w_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_w_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptosi.i32.f16(half, metadata) + +define i32 @fcvt_wu_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_wu_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_wu_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_wu_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_wu_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata) + +; Test where the fptoui has multiple uses, one of which causes a sext to be +; inserted on RV64. +; FIXME: We should not have an fcvt.wu.h and an fcvt.lu.h. +define i32 @fcvt_wu_h_multiple_use(half %x, i32* %y) { +; RV32IZFH-LABEL: fcvt_wu_h_multiple_use: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.wu.h a1, fa0, rtz +; RV32IZFH-NEXT: li a0, 1 +; RV32IZFH-NEXT: beqz a1, .LBB4_2 +; RV32IZFH-NEXT: # %bb.1: +; RV32IZFH-NEXT: mv a0, a1 +; RV32IZFH-NEXT: .LBB4_2: +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_wu_h_multiple_use: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.wu.h a1, fa0, rtz +; RV64IZFH-NEXT: li a0, 1 +; RV64IZFH-NEXT: beqz a1, .LBB4_2 +; RV64IZFH-NEXT: # %bb.1: +; RV64IZFH-NEXT: mv a0, a1 +; RV64IZFH-NEXT: .LBB4_2: +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_wu_h_multiple_use: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.wu.h a1, fa0, rtz +; RV32IDZFH-NEXT: li a0, 1 +; RV32IDZFH-NEXT: beqz a1, .LBB4_2 +; RV32IDZFH-NEXT: # %bb.1: +; RV32IDZFH-NEXT: mv a0, a1 +; RV32IDZFH-NEXT: .LBB4_2: +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_wu_h_multiple_use: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.wu.h a1, fa0, rtz +; RV64IDZFH-NEXT: li a0, 1 +; RV64IDZFH-NEXT: beqz a1, .LBB4_2 +; RV64IDZFH-NEXT: # %bb.1: +; RV64IDZFH-NEXT: mv a0, a1 +; RV64IDZFH-NEXT: .LBB4_2: +; RV64IDZFH-NEXT: ret + %a = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") strictfp + %b = icmp eq i32 %a, 0 + %c = select i1 %b, i32 1, i32 %a + ret i32 %c +} + +define i64 @fcvt_l_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_l_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: call __fixhfdi@plt +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_l_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_l_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: addi sp, sp, -16 +; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: call __fixhfdi@plt +; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_l_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.l.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i64 %1 +} +declare i64 @llvm.experimental.constrained.fptosi.i64.f16(half, metadata) + +define i64 @fcvt_lu_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_lu_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: call __fixunshfdi@plt +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_lu_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_lu_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: addi sp, sp, -16 +; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: call __fixunshfdi@plt +; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_lu_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IDZFH-NEXT: ret + %1 = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i64 %1 +} +declare i64 @llvm.experimental.constrained.fptoui.i64.f16(half, metadata) + +define half @fcvt_h_si(i16 %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_si: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: slli a0, a0, 16 +; RV32IZFH-NEXT: srai a0, a0, 16 +; RV32IZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_si: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: slli a0, a0, 48 +; RV64IZFH-NEXT: srai a0, a0, 48 +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_si: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: slli a0, a0, 16 +; RV32IDZFH-NEXT: srai a0, a0, 16 +; RV32IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_si: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: slli a0, a0, 48 +; RV64IDZFH-NEXT: srai a0, a0, 48 +; RV64IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.sitofp.f16.i16(i16, metadata, metadata) + +define half @fcvt_h_si_signext(i16 signext %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_si_signext: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_si_signext: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_si_signext: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_si_signext: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @fcvt_h_ui(i16 %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_ui: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: lui a1, 16 +; RV32IZFH-NEXT: addi a1, a1, -1 +; RV32IZFH-NEXT: and a0, a0, a1 +; RV32IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_ui: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: lui a1, 16 +; RV64IZFH-NEXT: addiw a1, a1, -1 +; RV64IZFH-NEXT: and a0, a0, a1 +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_ui: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: lui a1, 16 +; RV32IDZFH-NEXT: addi a1, a1, -1 +; RV32IDZFH-NEXT: and a0, a0, a1 +; RV32IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_ui: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: lui a1, 16 +; RV64IDZFH-NEXT: addiw a1, a1, -1 +; RV64IDZFH-NEXT: and a0, a0, a1 +; RV64IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.uitofp.f16.i16(i16, metadata, metadata) + +define half @fcvt_h_ui_zeroext(i16 zeroext %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_ui_zeroext: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_ui_zeroext: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_ui_zeroext: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_ui_zeroext: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i16(i16 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @fcvt_h_w(i32 %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_w: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_w: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_w: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_w: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.sitofp.f16.i32(i32, metadata, metadata) + +define half @fcvt_h_w_load(i32* %p) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_w_load: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: lw a0, 0(a0) +; RV32IZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_w_load: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: lw a0, 0(a0) +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_w_load: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: lw a0, 0(a0) +; RV32IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_w_load: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: lw a0, 0(a0) +; RV64IDZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IDZFH-NEXT: ret + %a = load i32, i32* %p + %1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @fcvt_h_wu(i32 %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_wu: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_wu: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_wu: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_wu: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.uitofp.f16.i32(i32, metadata, metadata) + +define half @fcvt_h_wu_load(i32* %p) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_wu_load: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: lw a0, 0(a0) +; RV32IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_wu_load: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: lwu a0, 0(a0) +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_wu_load: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: lw a0, 0(a0) +; RV32IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_wu_load: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: lwu a0, 0(a0) +; RV64IDZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IDZFH-NEXT: ret + %a = load i32, i32* %p + %1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @fcvt_h_l(i64 %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_l: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: call __floatdihf@plt +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_l: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.l fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_l: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: addi sp, sp, -16 +; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: call __floatdihf@plt +; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_l: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.l fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i64(i64 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.sitofp.f16.i64(i64, metadata, metadata) + +define half @fcvt_h_lu(i64 %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_lu: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: call __floatundihf@plt +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_lu: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.lu fa0, a0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_lu: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: addi sp, sp, -16 +; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: call __floatundihf@plt +; RV32IDZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IDZFH-NEXT: addi sp, sp, 16 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_lu: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.lu fa0, a0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i64(i64 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.uitofp.f16.i64(i64, metadata, metadata) + +define half @fcvt_h_s(float %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_s: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_s: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_s: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_s: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.s fa0, fa0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %a, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret half %1 +} +declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) + +define float @fcvt_s_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_s_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_s_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_s_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_s_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.s.h fa0, fa0 +; RV64IDZFH-NEXT: ret + %1 = call float @llvm.experimental.constrained.fpext.f32.f16(half %a, metadata !"fpexcept.strict") + ret float %1 +} +declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) + +define half @fcvt_h_d(double %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_h_d: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: call __truncdfhf2@plt +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_d: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFH-NEXT: call __truncdfhf2@plt +; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_d: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.h.d fa0, fa0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_d: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.h.d fa0, fa0 +; RV64IDZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.fptrunc.f16.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret half %1 +} +declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata) + +define double @fcvt_d_h(half %a) nounwind strictfp { +; RV32IZFH-LABEL: fcvt_d_h: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi sp, sp, -16 +; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV32IZFH-NEXT: call __extendsfdf2@plt +; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IZFH-NEXT: addi sp, sp, 16 +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_d_h: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addi sp, sp, -16 +; RV64IZFH-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 +; RV64IZFH-NEXT: call __extendsfdf2@plt +; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IZFH-NEXT: addi sp, sp, 16 +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_d_h: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: fcvt.d.h fa0, fa0 +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_d_h: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: fcvt.d.h fa0, fa0 +; RV64IDZFH-NEXT: ret + %1 = call double @llvm.experimental.constrained.fpext.f64.f16(half %a, metadata !"fpexcept.strict") + ret double %1 +} +declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata) + +; Make sure we select W version of addi on RV64. +define signext i32 @fcvt_h_w_demanded_bits(i32 signext %0, half* %1) { +; RV32IZFH-LABEL: fcvt_h_w_demanded_bits: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi a0, a0, 1 +; RV32IZFH-NEXT: fcvt.h.w ft0, a0 +; RV32IZFH-NEXT: fsh ft0, 0(a1) +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_w_demanded_bits: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addiw a0, a0, 1 +; RV64IZFH-NEXT: fcvt.h.w ft0, a0 +; RV64IZFH-NEXT: fsh ft0, 0(a1) +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_w_demanded_bits: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: addi a0, a0, 1 +; RV32IDZFH-NEXT: fcvt.h.w ft0, a0 +; RV32IDZFH-NEXT: fsh ft0, 0(a1) +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_w_demanded_bits: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: addiw a0, a0, 1 +; RV64IDZFH-NEXT: fcvt.h.w ft0, a0 +; RV64IDZFH-NEXT: fsh ft0, 0(a1) +; RV64IDZFH-NEXT: ret + %3 = add i32 %0, 1 + %4 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %3, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + store half %4, half* %1, align 2 + ret i32 %3 +} + +; Make sure we select W version of addi on RV64. +define signext i32 @fcvt_h_wu_demanded_bits(i32 signext %0, half* %1) { +; RV32IZFH-LABEL: fcvt_h_wu_demanded_bits: +; RV32IZFH: # %bb.0: +; RV32IZFH-NEXT: addi a0, a0, 1 +; RV32IZFH-NEXT: fcvt.h.wu ft0, a0 +; RV32IZFH-NEXT: fsh ft0, 0(a1) +; RV32IZFH-NEXT: ret +; +; RV64IZFH-LABEL: fcvt_h_wu_demanded_bits: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: addiw a0, a0, 1 +; RV64IZFH-NEXT: fcvt.h.wu ft0, a0 +; RV64IZFH-NEXT: fsh ft0, 0(a1) +; RV64IZFH-NEXT: ret +; +; RV32IDZFH-LABEL: fcvt_h_wu_demanded_bits: +; RV32IDZFH: # %bb.0: +; RV32IDZFH-NEXT: addi a0, a0, 1 +; RV32IDZFH-NEXT: fcvt.h.wu ft0, a0 +; RV32IDZFH-NEXT: fsh ft0, 0(a1) +; RV32IDZFH-NEXT: ret +; +; RV64IDZFH-LABEL: fcvt_h_wu_demanded_bits: +; RV64IDZFH: # %bb.0: +; RV64IDZFH-NEXT: addiw a0, a0, 1 +; RV64IDZFH-NEXT: fcvt.h.wu ft0, a0 +; RV64IDZFH-NEXT: fsh ft0, 0(a1) +; RV64IDZFH-NEXT: ret + %3 = add i32 %0, 1 + %4 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %3, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + store half %4, half* %1, align 2 + ret i32 %3 +} diff --git a/llvm/test/CodeGen/RISCV/rv64d-double-convert-strict.ll b/llvm/test/CodeGen/RISCV/rv64d-double-convert-strict.ll new file mode 100644 index 000000000000..cc8c1cfdceb3 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64d-double-convert-strict.ll @@ -0,0 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck %s -check-prefix=RV64ID + +; This file exhaustively checks double<->i32 conversions. In general, +; fcvt.l[u].d can be selected instead of fcvt.w[u].d because poison is +; generated for an fpto[s|u]i conversion if the result doesn't fit in the +; target type. + +define i32 @aext_fptosi(double %a) nounwind strictfp { +; RV64ID-LABEL: aext_fptosi: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fmv.d.x ft0, a0 +; RV64ID-NEXT: fcvt.w.d a0, ft0, rtz +; RV64ID-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) + +define signext i32 @sext_fptosi(double %a) nounwind strictfp { +; RV64ID-LABEL: sext_fptosi: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fmv.d.x ft0, a0 +; RV64ID-NEXT: fcvt.w.d a0, ft0, rtz +; RV64ID-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define zeroext i32 @zext_fptosi(double %a) nounwind strictfp { +; RV64ID-LABEL: zext_fptosi: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fmv.d.x ft0, a0 +; RV64ID-NEXT: fcvt.w.d a0, ft0, rtz +; RV64ID-NEXT: slli a0, a0, 32 +; RV64ID-NEXT: srli a0, a0, 32 +; RV64ID-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define i32 @aext_fptoui(double %a) nounwind strictfp { +; RV64ID-LABEL: aext_fptoui: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fmv.d.x ft0, a0 +; RV64ID-NEXT: fcvt.wu.d a0, ft0, rtz +; RV64ID-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) + +define signext i32 @sext_fptoui(double %a) nounwind strictfp { +; RV64ID-LABEL: sext_fptoui: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fmv.d.x ft0, a0 +; RV64ID-NEXT: fcvt.wu.d a0, ft0, rtz +; RV64ID-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define zeroext i32 @zext_fptoui(double %a) nounwind strictfp { +; RV64ID-LABEL: zext_fptoui: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fmv.d.x ft0, a0 +; RV64ID-NEXT: fcvt.lu.d a0, ft0, rtz +; RV64ID-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define double @uitofp_aext_i32_to_f64(i32 %a) nounwind strictfp { +; RV64ID-LABEL: uitofp_aext_i32_to_f64: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fcvt.d.wu ft0, a0 +; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: ret + %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata, metadata) + +define double @uitofp_sext_i32_to_f64(i32 signext %a) nounwind strictfp { +; RV64ID-LABEL: uitofp_sext_i32_to_f64: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fcvt.d.wu ft0, a0 +; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: ret + %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +define double @uitofp_zext_i32_to_f64(i32 zeroext %a) nounwind strictfp { +; RV64ID-LABEL: uitofp_zext_i32_to_f64: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fcvt.d.wu ft0, a0 +; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: ret + %1 = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +define double @sitofp_aext_i32_to_f64(i32 %a) nounwind strictfp { +; RV64ID-LABEL: sitofp_aext_i32_to_f64: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fcvt.d.w ft0, a0 +; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: ret + %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} +declare double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata, metadata) + +define double @sitofp_sext_i32_to_f64(i32 signext %a) nounwind strictfp { +; RV64ID-LABEL: sitofp_sext_i32_to_f64: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fcvt.d.w ft0, a0 +; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: ret + %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} + +define double @sitofp_zext_i32_to_f64(i32 zeroext %a) nounwind strictfp { +; RV64ID-LABEL: sitofp_zext_i32_to_f64: +; RV64ID: # %bb.0: +; RV64ID-NEXT: fcvt.d.w ft0, a0 +; RV64ID-NEXT: fmv.x.d a0, ft0 +; RV64ID-NEXT: ret + %1 = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret double %1 +} diff --git a/llvm/test/CodeGen/RISCV/rv64f-float-convert-strict.ll b/llvm/test/CodeGen/RISCV/rv64f-float-convert-strict.ll new file mode 100644 index 000000000000..466c15cbdf38 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64f-float-convert-strict.ll @@ -0,0 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \ +; RUN: -disable-strictnode-mutation | FileCheck %s -check-prefix=RV64IF + +; This file exhaustively checks float<->i32 conversions. In general, +; fcvt.l[u].s can be selected instead of fcvt.w[u].s because poison is +; generated for an fpto[s|u]i conversion if the result doesn't fit in the +; target type. + +define i32 @aext_fptosi(float %a) nounwind strictfp { +; RV64IF-LABEL: aext_fptosi: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IF-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptosi.i32.f32(float, metadata) + +define signext i32 @sext_fptosi(float %a) nounwind strictfp { +; RV64IF-LABEL: sext_fptosi: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IF-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define zeroext i32 @zext_fptosi(float %a) nounwind strictfp { +; RV64IF-LABEL: zext_fptosi: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.w.s a0, ft0, rtz +; RV64IF-NEXT: slli a0, a0, 32 +; RV64IF-NEXT: srli a0, a0, 32 +; RV64IF-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define i32 @aext_fptoui(float %a) nounwind strictfp { +; RV64IF-LABEL: aext_fptoui: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.wu.s a0, ft0, rtz +; RV64IF-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata) + +define signext i32 @sext_fptoui(float %a) nounwind strictfp { +; RV64IF-LABEL: sext_fptoui: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.wu.s a0, ft0, rtz +; RV64IF-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define zeroext i32 @zext_fptoui(float %a) nounwind strictfp { +; RV64IF-LABEL: zext_fptoui: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fmv.w.x ft0, a0 +; RV64IF-NEXT: fcvt.lu.s a0, ft0, rtz +; RV64IF-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define float @uitofp_aext_i32_to_f32(i32 %a) nounwind strictfp { +; RV64IF-LABEL: uitofp_aext_i32_to_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret + %1 = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata, metadata) + +define float @uitofp_sext_i32_to_f32(i32 signext %a) nounwind strictfp { +; RV64IF-LABEL: uitofp_sext_i32_to_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret + %1 = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +define float @uitofp_zext_i32_to_f32(i32 zeroext %a) nounwind strictfp { +; RV64IF-LABEL: uitofp_zext_i32_to_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.wu ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret + %1 = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +define float @sitofp_aext_i32_to_f32(i32 %a) nounwind strictfp { +; RV64IF-LABEL: sitofp_aext_i32_to_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret + %1 = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} +declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata) + +define float @sitofp_sext_i32_to_f32(i32 signext %a) nounwind strictfp { +; RV64IF-LABEL: sitofp_sext_i32_to_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret + %1 = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} + +define float @sitofp_zext_i32_to_f32(i32 zeroext %a) nounwind strictfp { +; RV64IF-LABEL: sitofp_zext_i32_to_f32: +; RV64IF: # %bb.0: +; RV64IF-NEXT: fcvt.s.w ft0, a0 +; RV64IF-NEXT: fmv.x.w a0, ft0 +; RV64IF-NEXT: ret + %1 = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret float %1 +} diff --git a/llvm/test/CodeGen/RISCV/rv64f-half-convert-strict.ll b/llvm/test/CodeGen/RISCV/rv64f-half-convert-strict.ll new file mode 100644 index 000000000000..cd9eed25a85a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64f-half-convert-strict.ll @@ -0,0 +1,123 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zfh -verify-machineinstrs \ +; RUN: -target-abi lp64f -disable-strictnode-mutation < %s | \ +; RUN: FileCheck %s -check-prefix=RV64IZFH + +; This file exhaustively checks half<->i32 conversions. In general, +; fcvt.l[u].h can be selected instead of fcvt.w[u].h because poison is +; generated for an fpto[s|u]i conversion if the result doesn't fit in the +; target type. + +define i32 @aext_fptosi(half %a) nounwind { +; RV64IZFH-LABEL: aext_fptosi: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptosi.i32.f16(half, metadata) + +define signext i32 @sext_fptosi(half %a) nounwind { +; RV64IZFH-LABEL: sext_fptosi: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define zeroext i32 @zext_fptosi(half %a) nounwind { +; RV64IZFH-LABEL: zext_fptosi: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.w.h a0, fa0, rtz +; RV64IZFH-NEXT: slli a0, a0, 32 +; RV64IZFH-NEXT: srli a0, a0, 32 +; RV64IZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define i32 @aext_fptoui(half %a) nounwind { +; RV64IZFH-LABEL: aext_fptoui: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} +declare i32 @llvm.experimental.constrained.fptoui.i32.f16(half, metadata) + +define signext i32 @sext_fptoui(half %a) nounwind { +; RV64IZFH-LABEL: sext_fptoui: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.wu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define zeroext i32 @zext_fptoui(half %a) nounwind { +; RV64IZFH-LABEL: zext_fptoui: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.lu.h a0, fa0, rtz +; RV64IZFH-NEXT: ret + %1 = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %a, metadata !"fpexcept.strict") strictfp + ret i32 %1 +} + +define half @uitofp_aext_i32_to_f16(i32 %a) nounwind { +; RV64IZFH-LABEL: uitofp_aext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata, metadata) + +define half @uitofp_sext_i32_to_f16(i32 signext %a) nounwind { +; RV64IZFH-LABEL: uitofp_sext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @uitofp_zext_i32_to_f16(i32 zeroext %a) nounwind { +; RV64IZFH-LABEL: uitofp_zext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.wu fa0, a0 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @sitofp_aext_i32_to_f16(i32 %a) nounwind { +; RV64IZFH-LABEL: sitofp_aext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} +declare half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata, metadata) + +define half @sitofp_sext_i32_to_f16(i32 signext %a) nounwind { +; RV64IZFH-LABEL: sitofp_sext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} + +define half @sitofp_zext_i32_to_f16(i32 zeroext %a) nounwind { +; RV64IZFH-LABEL: sitofp_zext_i32_to_f16: +; RV64IZFH: # %bb.0: +; RV64IZFH-NEXT: fcvt.h.w fa0, a0 +; RV64IZFH-NEXT: ret + %1 = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %a, metadata !"round.dynamic", metadata !"fpexcept.strict") strictfp + ret half %1 +} From dfb20d4d19ec80e2e3e0985e08642567019b2068 Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Tue, 21 Dec 2021 19:12:54 +0100 Subject: [PATCH 155/293] [libc++][format] Improve ABI stability. During the review of D115991 @vitaut pointed out the enum shouldn't depend on whether or not _LIBCPP_HAS_NO_INT128 is defined. The current implementation lets the enum's ABI depend on this configuration option without a good cause. Reviewed By: ldionne, #libc Differential Revision: https://reviews.llvm.org/D116120 --- libcxx/include/__format/format_arg.h | 17 +++++---- .../format.arg/arg_t.compile.pass.cpp | 38 +++++++++++++++++++ 2 files changed, 48 insertions(+), 7 deletions(-) create mode 100644 libcxx/test/libcxx/utilities/format/format.arguments/format.arg/arg_t.compile.pass.cpp diff --git a/libcxx/include/__format/format_arg.h b/libcxx/include/__format/format_arg.h index a9a8c1f0da03..59429c13d415 100644 --- a/libcxx/include/__format/format_arg.h +++ b/libcxx/include/__format/format_arg.h @@ -37,21 +37,20 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if !defined(_LIBCPP_HAS_NO_CONCEPTS) namespace __format { -/** The type stored in @ref basic_format_arg. */ +/// The type stored in @ref basic_format_arg. +/// +/// @note The 128-bit types are unconditionally in the list to avoid the values +/// of the enums to depend on the availability of 128-bit integers. enum class _LIBCPP_ENUM_VIS __arg_t : uint8_t { __none, __boolean, __char_type, __int, __long_long, -#ifndef _LIBCPP_HAS_NO_INT128 __i128, -#endif __unsigned, __unsigned_long_long, -#ifndef _LIBCPP_HAS_NO_INT128 __u128, -#endif __float, __double, __long_double, @@ -75,18 +74,22 @@ visit_format_arg(_Visitor&& __vis, basic_format_arg<_Context> __arg) { return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__int); case __format::__arg_t::__long_long: return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__long_long); -#ifndef _LIBCPP_HAS_NO_INT128 case __format::__arg_t::__i128: +#ifndef _LIBCPP_HAS_NO_INT128 return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__i128); +#else + _LIBCPP_UNREACHABLE(); #endif case __format::__arg_t::__unsigned: return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__unsigned); case __format::__arg_t::__unsigned_long_long: return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__unsigned_long_long); -#ifndef _LIBCPP_HAS_NO_INT128 case __format::__arg_t::__u128: +#ifndef _LIBCPP_HAS_NO_INT128 return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__u128); +#else + _LIBCPP_UNREACHABLE(); #endif case __format::__arg_t::__float: return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__float); diff --git a/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/arg_t.compile.pass.cpp b/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/arg_t.compile.pass.cpp new file mode 100644 index 000000000000..4e17a61821dc --- /dev/null +++ b/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/arg_t.compile.pass.cpp @@ -0,0 +1,38 @@ +//===----------------------------------------------------------------------===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-no-concepts +// UNSUPPORTED: libcpp-has-no-incomplete-format + +// + +// namespace __format { enum class __arg_t : uint8_t{...}; } + +#include + +#include + +#include "test_macros.h" + +static_assert(std::is_same_v, uint8_t>); + +static_assert(uint8_t(std::__format::__arg_t::__none) == 0); +static_assert(uint8_t(std::__format::__arg_t::__boolean) == 1); +static_assert(uint8_t(std::__format::__arg_t::__char_type) == 2); +static_assert(uint8_t(std::__format::__arg_t::__int) == 3); +static_assert(uint8_t(std::__format::__arg_t::__long_long) == 4); +static_assert(uint8_t(std::__format::__arg_t::__i128) == 5); +static_assert(uint8_t(std::__format::__arg_t::__unsigned) == 6); +static_assert(uint8_t(std::__format::__arg_t::__unsigned_long_long) == 7); +static_assert(uint8_t(std::__format::__arg_t::__u128) == 8); +static_assert(uint8_t(std::__format::__arg_t::__float) == 9); +static_assert(uint8_t(std::__format::__arg_t::__double) == 10); +static_assert(uint8_t(std::__format::__arg_t::__long_double) == 11); +static_assert(uint8_t(std::__format::__arg_t::__const_char_type_ptr) == 12); +static_assert(uint8_t(std::__format::__arg_t::__string_view) == 13); +static_assert(uint8_t(std::__format::__arg_t::__ptr) == 14); From 7704c503ecb8b20574fa8a8dc45dd8545216c450 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 23 Dec 2021 10:24:37 -0600 Subject: [PATCH 156/293] [RISCV] Use positive 0.0 for the neutral element in fadd reductions if nsz is present. -0.0 requires a constant pool. +0.0 can be made with vmv.v.x x0. Not doing this in getNeutralElement for fear of changing other targets. Differential Revision: https://reviews.llvm.org/D115978 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 9 ++++++--- .../RISCV/rvv/fixed-vectors-reduction-fp.ll | 17 +++++++++++++++++ .../CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll | 14 ++++++++++++++ 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 3919aae25630..4322f9bc1ce7 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4538,9 +4538,12 @@ getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) { switch (Opcode) { default: llvm_unreachable("Unhandled reduction"); - case ISD::VECREDUCE_FADD: - return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), - DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags)); + case ISD::VECREDUCE_FADD: { + // Use positive zero if we can. It is cheaper to materialize. + SDValue Zero = + DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT); + return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero); + } case ISD::VECREDUCE_SEQ_FADD: return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1), Op.getOperand(0)); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll index 2d3c3a2e18e4..9217101c946c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -1408,3 +1408,20 @@ define double @vreduce_fmax_v32f64(<32 x double>* %x) { %red = call double @llvm.vector.reduce.fmax.v32f64(<32 x double> %v) ret double %red } + +define float @vreduce_nsz_fadd_v4f32(<4 x float>* %x, float %s) { +; CHECK-LABEL: vreduce_nsz_fadd_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vfredusum.vs v8, v8, v9 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 +; CHECK-NEXT: ret + %v = load <4 x float>, <4 x float>* %x + %red = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float %s, <4 x float> %v) + ret float %red +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll index ce802a00163a..8404398fdc85 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll @@ -882,3 +882,17 @@ define double @vreduce_fmax_nxv16f64( %v) { %red = call double @llvm.vector.reduce.fmax.nxv16f64( %v) ret double %red } + +define float @vreduce_nsz_fadd_nxv1f32( %v, float %s) { +; CHECK-LABEL: vreduce_nsz_fadd_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, mu +; CHECK-NEXT: vfredusum.vs v8, v8, v9 +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: fadd.s fa0, fa0, ft0 +; CHECK-NEXT: ret + %red = call reassoc nsz float @llvm.vector.reduce.fadd.nxv1f32(float %s, %v) + ret float %red +} From e8b24ee1153f7d57792a601705ca6951f41e70e1 Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Wed, 22 Dec 2021 18:46:23 +0100 Subject: [PATCH 157/293] [libc++][format][NFC] Remove some unneeded headers. Reviewed By: #libc, Quuxplusone, Mordante Differential Revision: https://reviews.llvm.org/D116175 --- libcxx/include/__format/format_context.h | 3 +-- libcxx/include/__format/formatter_string.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/libcxx/include/__format/format_context.h b/libcxx/include/__format/format_context.h index b4fe5cc7b12c..f8ec7c8eb001 100644 --- a/libcxx/include/__format/format_context.h +++ b/libcxx/include/__format/format_context.h @@ -14,10 +14,9 @@ #include <__config> #include <__format/format_args.h> #include <__format/format_fwd.h> +#include <__iterator/back_insert_iterator.h> #include <__iterator/concepts.h> #include -#include -#include #ifndef _LIBCPP_HAS_NO_LOCALIZATION #include diff --git a/libcxx/include/__format/formatter_string.h b/libcxx/include/__format/formatter_string.h index 2be36a1ba947..75a81f5184a0 100644 --- a/libcxx/include/__format/formatter_string.h +++ b/libcxx/include/__format/formatter_string.h @@ -16,7 +16,6 @@ #include <__format/format_string.h> #include <__format/formatter.h> #include <__format/parser_std_format_spec.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) From ab3640aa0e8361921a5d0cdc393a5b75e78ec22b Mon Sep 17 00:00:00 2001 From: Kirill Stoimenov Date: Wed, 8 Dec 2021 18:42:29 +0000 Subject: [PATCH 158/293] [ASan] Moved optimized callbacks into a separate library. This will allow linking in the callbacks directly instead of using PLT. Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D116182 --- clang/lib/Driver/ToolChains/CommonArgs.cpp | 5 ++++ clang/test/Driver/sanitizer-ld.c | 2 +- compiler-rt/lib/asan/CMakeLists.txt | 29 +++++++++++++++++++++- compiler-rt/lib/asan/tests/CMakeLists.txt | 2 ++ 4 files changed, 36 insertions(+), 2 deletions(-) diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 407f81a2ae09..267625c70b25 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -826,6 +826,11 @@ collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args, if (SanArgs.needsStatsRt() && SanArgs.linkRuntimes()) StaticRuntimes.push_back("stats_client"); + // Always link the static runtime regardless of DSO or executable. + if (SanArgs.needsAsanRt()) { + HelperStaticRuntimes.push_back("asan_static"); + } + // Collect static runtimes. if (Args.hasArg(options::OPT_shared)) { // Don't link static runtimes into DSOs. diff --git a/clang/test/Driver/sanitizer-ld.c b/clang/test/Driver/sanitizer-ld.c index d62e19fd4021..ea8c49f2384a 100644 --- a/clang/test/Driver/sanitizer-ld.c +++ b/clang/test/Driver/sanitizer-ld.c @@ -22,7 +22,7 @@ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-ASAN-NO-LINK-RUNTIME-LINUX %s // -// CHECK-ASAN-NO-LINK-RUNTIME-LINUX-NOT: libclang_rt.asan +// CHECK-ASAN-NO-LINK-RUNTIME-LINUX-NOT: libclang_rt.asan-x86_64 // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ // RUN: -target i386-unknown-linux -fuse-ld=ld -fsanitize=address -shared-libsan \ diff --git a/compiler-rt/lib/asan/CMakeLists.txt b/compiler-rt/lib/asan/CMakeLists.txt index 2e63c8d05168..2ca59c94cc75 100644 --- a/compiler-rt/lib/asan/CMakeLists.txt +++ b/compiler-rt/lib/asan/CMakeLists.txt @@ -34,7 +34,6 @@ set(ASAN_SOURCES if (NOT WIN32 AND NOT APPLE) list(APPEND ASAN_SOURCES - asan_rtl_x86_64.S asan_interceptors_vfork.S ) endif() @@ -43,6 +42,12 @@ set(ASAN_CXX_SOURCES asan_new_delete.cpp ) +if (NOT WIN32 AND NOT APPLE) + set(ASAN_STATIC_SOURCES + asan_rtl_x86_64.S + ) +endif() + set(ASAN_PREINIT_SOURCES asan_preinit.cpp ) @@ -135,6 +140,12 @@ if(NOT APPLE) ADDITIONAL_HEADERS ${ASAN_HEADERS} CFLAGS ${ASAN_CFLAGS} DEFS ${ASAN_COMMON_DEFINITIONS}) + add_compiler_rt_object_libraries(RTAsan_static + ARCHS ${ASAN_SUPPORTED_ARCH} + SOURCES ${ASAN_STATIC_SOURCES} + ADDITIONAL_HEADERS ${ASAN_HEADERS} + CFLAGS ${ASAN_CFLAGS} + DEFS ${ASAN_COMMON_DEFINITIONS}) add_compiler_rt_object_libraries(RTAsan_preinit ARCHS ${ASAN_SUPPORTED_ARCH} SOURCES ${ASAN_PREINIT_SOURCES} @@ -176,6 +187,14 @@ if(APPLE) LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS} DEFS ${ASAN_DYNAMIC_DEFINITIONS} PARENT_TARGET asan) + + add_compiler_rt_runtime(clang_rt.asan_static + STATIC + ARCHS ${ASAN_SUPPORTED_ARCH} + OBJECT_LIBS RTAsan_static + CFLAGS ${ASAN_CFLAGS} + DEFS ${ASAN_COMMON_DEFINITIONS} + PARENT_TARGET asan) else() # Build separate libraries for each target. @@ -207,6 +226,14 @@ else() DEFS ${ASAN_COMMON_DEFINITIONS} PARENT_TARGET asan) + add_compiler_rt_runtime(clang_rt.asan_static + STATIC + ARCHS ${ASAN_SUPPORTED_ARCH} + OBJECT_LIBS RTAsan_static + CFLAGS ${ASAN_CFLAGS} + DEFS ${ASAN_COMMON_DEFINITIONS} + PARENT_TARGET asan) + add_compiler_rt_runtime(clang_rt.asan-preinit STATIC ARCHS ${ASAN_SUPPORTED_ARCH} diff --git a/compiler-rt/lib/asan/tests/CMakeLists.txt b/compiler-rt/lib/asan/tests/CMakeLists.txt index cc375acf2dfb..95a324766ae7 100644 --- a/compiler-rt/lib/asan/tests/CMakeLists.txt +++ b/compiler-rt/lib/asan/tests/CMakeLists.txt @@ -261,6 +261,7 @@ if(COMPILER_RT_CAN_EXECUTE_TESTS AND NOT ANDROID) set(ASAN_TEST_RUNTIME_OBJECTS $ $ + $ $ $ $ @@ -286,6 +287,7 @@ if(ANDROID) # Test w/o ASan instrumentation. Link it with ASan statically. add_executable(AsanNoinstTest # FIXME: .arch? $ + $ $ $ $ From f766bc093c8cb8df7a8f2e95ee6ddf974f73f146 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 22 Dec 2021 07:46:14 -0800 Subject: [PATCH 159/293] [Hexagon] Introduce Hexagon v69 ISA --- llvm/include/llvm/IR/IntrinsicsHexagonDep.td | 538 ++++++- llvm/lib/Target/Hexagon/Hexagon.td | 55 +- llvm/lib/Target/Hexagon/HexagonDepArch.h | 34 +- llvm/lib/Target/Hexagon/HexagonDepArch.td | 2 + .../lib/Target/Hexagon/HexagonDepDecoders.inc | 1 + llvm/lib/Target/Hexagon/HexagonDepIICHVX.td | 1020 +++++++++++++- .../lib/Target/Hexagon/HexagonDepIICScalar.td | 768 ++++++++++ .../Target/Hexagon/HexagonDepInstrFormats.td | 14 + .../lib/Target/Hexagon/HexagonDepInstrInfo.td | 1253 +++++++++++++++-- .../Target/Hexagon/HexagonDepMapAsm2Intrin.td | 293 +++- llvm/lib/Target/Hexagon/HexagonDepMappings.td | 1 - .../lib/Target/Hexagon/HexagonInstrFormats.td | 7 +- llvm/lib/Target/Hexagon/HexagonPseudo.td | 11 + llvm/lib/Target/Hexagon/HexagonSchedule.td | 1 + llvm/lib/Target/Hexagon/HexagonScheduleV69.td | 40 + llvm/lib/Target/Hexagon/HexagonSubtarget.cpp | 72 +- llvm/lib/Target/Hexagon/HexagonSubtarget.h | 25 +- .../Hexagon/MCTargetDesc/HexagonBaseInfo.h | 5 +- .../Hexagon/MCTargetDesc/HexagonMCChecker.cpp | 38 +- .../Hexagon/MCTargetDesc/HexagonMCChecker.h | 1 + .../MCTargetDesc/HexagonMCInstrInfo.cpp | 18 +- .../Hexagon/MCTargetDesc/HexagonMCInstrInfo.h | 4 +- .../MCTargetDesc/HexagonMCTargetDesc.cpp | 32 +- 23 files changed, 4018 insertions(+), 215 deletions(-) create mode 100644 llvm/lib/Target/Hexagon/HexagonScheduleV69.td diff --git a/llvm/include/llvm/IR/IntrinsicsHexagonDep.td b/llvm/include/llvm/IR/IntrinsicsHexagonDep.td index 8f0f743d1dec..177114636a50 100644 --- a/llvm/include/llvm/IR/IntrinsicsHexagonDep.td +++ b/llvm/include/llvm/IR/IntrinsicsHexagonDep.td @@ -316,7 +316,7 @@ class Hexagon_v32i32_v64i32_Intrinsic; -// tag : V6_lvsplatb +// tag : V6_lvsplatw class Hexagon_v16i32_i32_Intrinsic intr_properties = [IntrNoMem]> : Hexagon_Intrinsic; -// tag : V6_vabsb +// tag : V6_vabs_hf class Hexagon_v16i32_v16i32_Intrinsic intr_properties = [IntrNoMem]> : Hexagon_Intrinsic; -// tag : V6_vabsb +// tag : V6_vabs_hf class Hexagon_v32i32_v32i32_Intrinsic intr_properties = [IntrNoMem]> : Hexagon_Intrinsic; +// tag : V6_vadd_sf_hf +class Hexagon_v32i32_v16i32v16i32_Intrinsic intr_properties = [IntrNoMem]> + : Hexagon_Intrinsic; + +// tag : V6_vadd_sf_hf +class Hexagon_v64i32_v32i32v32i32_Intrinsic intr_properties = [IntrNoMem]> + : Hexagon_Intrinsic; + // tag : V6_vaddb_dv class Hexagon_v64i32_v64i32v64i32_Intrinsic intr_properties = [IntrNoMem]> @@ -491,20 +505,6 @@ class Hexagon_v32i32_v32i32v32i32v128i1_Intrinsic; -// tag : V6_vaddhw -class Hexagon_v32i32_v16i32v16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_Intrinsic; - -// tag : V6_vaddhw -class Hexagon_v64i32_v32i32v32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_Intrinsic; - // tag : V6_vaddhw_acc class Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic intr_properties = [IntrNoMem]> @@ -610,6 +610,20 @@ class Hexagon_v32i32_v32i32i32_Intrinsic; +// tag : V6_vasrvuhubrndsat +class Hexagon_v16i32_v32i32v16i32_Intrinsic intr_properties = [IntrNoMem]> + : Hexagon_Intrinsic; + +// tag : V6_vasrvuhubrndsat +class Hexagon_v32i32_v64i32v32i32_Intrinsic intr_properties = [IntrNoMem]> + : Hexagon_Intrinsic; + // tag : V6_vassignp class Hexagon_v64i32_v64i32_Intrinsic intr_properties = [IntrNoMem]> @@ -617,6 +631,20 @@ class Hexagon_v64i32_v64i32_Intrinsic; +// tag : V6_vcvt_hf_b +class Hexagon_v32i32_v16i32_Intrinsic intr_properties = [IntrNoMem]> + : Hexagon_Intrinsic; + +// tag : V6_vcvt_hf_b +class Hexagon_v64i32_v32i32_Intrinsic intr_properties = [IntrNoMem]> + : Hexagon_Intrinsic; + // tag : V6_vd0 class Hexagon_v16i32__Intrinsic intr_properties = [IntrNoMem]> @@ -652,6 +680,20 @@ class Hexagon_v64i32_v32i32v32i32i32_Intrinsic; +// tag : V6_vdmpy_sf_hf_acc +class Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic intr_properties = [IntrNoMem]> + : Hexagon_Intrinsic; + +// tag : V6_vdmpy_sf_hf_acc +class Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic intr_properties = [IntrNoMem]> + : Hexagon_Intrinsic; + // tag : V6_vdmpybus_dv class Hexagon_v64i32_v64i32i32_Intrinsic intr_properties = [IntrNoMem]> @@ -687,20 +729,6 @@ class Hexagon_v32i32_v32i32v64i32i32_Intrinsic; -// tag : V6_vdmpyhvsat_acc -class Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_Intrinsic; - -// tag : V6_vdmpyhvsat_acc -class Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_Intrinsic; - // tag : V6_veqb class Hexagon_v64i1_v16i32v16i32_Intrinsic intr_properties = [IntrNoMem]> @@ -897,20 +925,6 @@ class Hexagon_v64i32_v64i32v64i32i32i32_Intrinsic; -// tag : V6_vsb -class Hexagon_v32i32_v16i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_Intrinsic; - -// tag : V6_vsb -class Hexagon_v64i32_v32i32_Intrinsic intr_properties = [IntrNoMem]> - : Hexagon_Intrinsic; - // tag : V6_vscattermh class Hexagon__i32i32v16i32v16i32_Intrinsic intr_properties = [IntrNoMem]> @@ -6139,3 +6153,437 @@ Hexagon_v32i32_v32i32v32i32v32i32i32_Intrinsic<"HEXAGON_V6_v6mpyvubs10_vxx", [In def int_hexagon_V6_v6mpyvubs10_vxx_128B : Hexagon_v64i32_v64i32v64i32v64i32i32_Intrinsic<"HEXAGON_V6_v6mpyvubs10_vxx_128B", [IntrNoMem, ImmArg>]>; +def int_hexagon_V6_vabs_hf : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vabs_hf">; + +def int_hexagon_V6_vabs_hf_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vabs_hf_128B">; + +def int_hexagon_V6_vabs_sf : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vabs_sf">; + +def int_hexagon_V6_vabs_sf_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vabs_sf_128B">; + +def int_hexagon_V6_vadd_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadd_hf">; + +def int_hexagon_V6_vadd_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadd_hf_128B">; + +def int_hexagon_V6_vadd_hf_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadd_hf_hf">; + +def int_hexagon_V6_vadd_hf_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadd_hf_hf_128B">; + +def int_hexagon_V6_vadd_qf16 : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadd_qf16">; + +def int_hexagon_V6_vadd_qf16_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadd_qf16_128B">; + +def int_hexagon_V6_vadd_qf16_mix : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadd_qf16_mix">; + +def int_hexagon_V6_vadd_qf16_mix_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadd_qf16_mix_128B">; + +def int_hexagon_V6_vadd_qf32 : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadd_qf32">; + +def int_hexagon_V6_vadd_qf32_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadd_qf32_128B">; + +def int_hexagon_V6_vadd_qf32_mix : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadd_qf32_mix">; + +def int_hexagon_V6_vadd_qf32_mix_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadd_qf32_mix_128B">; + +def int_hexagon_V6_vadd_sf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadd_sf">; + +def int_hexagon_V6_vadd_sf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadd_sf_128B">; + +def int_hexagon_V6_vadd_sf_hf : +Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadd_sf_hf">; + +def int_hexagon_V6_vadd_sf_hf_128B : +Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadd_sf_hf_128B">; + +def int_hexagon_V6_vadd_sf_sf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vadd_sf_sf">; + +def int_hexagon_V6_vadd_sf_sf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vadd_sf_sf_128B">; + +def int_hexagon_V6_vassign_fp : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vassign_fp">; + +def int_hexagon_V6_vassign_fp_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vassign_fp_128B">; + +def int_hexagon_V6_vconv_hf_qf16 : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vconv_hf_qf16">; + +def int_hexagon_V6_vconv_hf_qf16_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vconv_hf_qf16_128B">; + +def int_hexagon_V6_vconv_hf_qf32 : +Hexagon_v16i32_v32i32_Intrinsic<"HEXAGON_V6_vconv_hf_qf32">; + +def int_hexagon_V6_vconv_hf_qf32_128B : +Hexagon_v32i32_v64i32_Intrinsic<"HEXAGON_V6_vconv_hf_qf32_128B">; + +def int_hexagon_V6_vconv_sf_qf32 : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vconv_sf_qf32">; + +def int_hexagon_V6_vconv_sf_qf32_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vconv_sf_qf32_128B">; + +def int_hexagon_V6_vcvt_b_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vcvt_b_hf">; + +def int_hexagon_V6_vcvt_b_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vcvt_b_hf_128B">; + +def int_hexagon_V6_vcvt_h_hf : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vcvt_h_hf">; + +def int_hexagon_V6_vcvt_h_hf_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vcvt_h_hf_128B">; + +def int_hexagon_V6_vcvt_hf_b : +Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vcvt_hf_b">; + +def int_hexagon_V6_vcvt_hf_b_128B : +Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vcvt_hf_b_128B">; + +def int_hexagon_V6_vcvt_hf_h : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vcvt_hf_h">; + +def int_hexagon_V6_vcvt_hf_h_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vcvt_hf_h_128B">; + +def int_hexagon_V6_vcvt_hf_sf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vcvt_hf_sf">; + +def int_hexagon_V6_vcvt_hf_sf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vcvt_hf_sf_128B">; + +def int_hexagon_V6_vcvt_hf_ub : +Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vcvt_hf_ub">; + +def int_hexagon_V6_vcvt_hf_ub_128B : +Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vcvt_hf_ub_128B">; + +def int_hexagon_V6_vcvt_hf_uh : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vcvt_hf_uh">; + +def int_hexagon_V6_vcvt_hf_uh_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vcvt_hf_uh_128B">; + +def int_hexagon_V6_vcvt_sf_hf : +Hexagon_v32i32_v16i32_Intrinsic<"HEXAGON_V6_vcvt_sf_hf">; + +def int_hexagon_V6_vcvt_sf_hf_128B : +Hexagon_v64i32_v32i32_Intrinsic<"HEXAGON_V6_vcvt_sf_hf_128B">; + +def int_hexagon_V6_vcvt_ub_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vcvt_ub_hf">; + +def int_hexagon_V6_vcvt_ub_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vcvt_ub_hf_128B">; + +def int_hexagon_V6_vcvt_uh_hf : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vcvt_uh_hf">; + +def int_hexagon_V6_vcvt_uh_hf_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vcvt_uh_hf_128B">; + +def int_hexagon_V6_vdmpy_sf_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vdmpy_sf_hf">; + +def int_hexagon_V6_vdmpy_sf_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vdmpy_sf_hf_128B">; + +def int_hexagon_V6_vdmpy_sf_hf_acc : +Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vdmpy_sf_hf_acc">; + +def int_hexagon_V6_vdmpy_sf_hf_acc_128B : +Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vdmpy_sf_hf_acc_128B">; + +def int_hexagon_V6_vfmax_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vfmax_hf">; + +def int_hexagon_V6_vfmax_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vfmax_hf_128B">; + +def int_hexagon_V6_vfmax_sf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vfmax_sf">; + +def int_hexagon_V6_vfmax_sf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vfmax_sf_128B">; + +def int_hexagon_V6_vfmin_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vfmin_hf">; + +def int_hexagon_V6_vfmin_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vfmin_hf_128B">; + +def int_hexagon_V6_vfmin_sf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vfmin_sf">; + +def int_hexagon_V6_vfmin_sf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vfmin_sf_128B">; + +def int_hexagon_V6_vfneg_hf : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vfneg_hf">; + +def int_hexagon_V6_vfneg_hf_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vfneg_hf_128B">; + +def int_hexagon_V6_vfneg_sf : +Hexagon_v16i32_v16i32_Intrinsic<"HEXAGON_V6_vfneg_sf">; + +def int_hexagon_V6_vfneg_sf_128B : +Hexagon_v32i32_v32i32_Intrinsic<"HEXAGON_V6_vfneg_sf_128B">; + +def int_hexagon_V6_vgthf : +Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgthf">; + +def int_hexagon_V6_vgthf_128B : +Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgthf_128B">; + +def int_hexagon_V6_vgthf_and : +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgthf_and">; + +def int_hexagon_V6_vgthf_and_128B : +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgthf_and_128B">; + +def int_hexagon_V6_vgthf_or : +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgthf_or">; + +def int_hexagon_V6_vgthf_or_128B : +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgthf_or_128B">; + +def int_hexagon_V6_vgthf_xor : +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgthf_xor">; + +def int_hexagon_V6_vgthf_xor_128B : +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgthf_xor_128B">; + +def int_hexagon_V6_vgtsf : +Hexagon_v64i1_v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtsf">; + +def int_hexagon_V6_vgtsf_128B : +Hexagon_v128i1_v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtsf_128B">; + +def int_hexagon_V6_vgtsf_and : +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtsf_and">; + +def int_hexagon_V6_vgtsf_and_128B : +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtsf_and_128B">; + +def int_hexagon_V6_vgtsf_or : +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtsf_or">; + +def int_hexagon_V6_vgtsf_or_128B : +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtsf_or_128B">; + +def int_hexagon_V6_vgtsf_xor : +Hexagon_v64i1_v64i1v16i32v16i32_Intrinsic<"HEXAGON_V6_vgtsf_xor">; + +def int_hexagon_V6_vgtsf_xor_128B : +Hexagon_v128i1_v128i1v32i32v32i32_Intrinsic<"HEXAGON_V6_vgtsf_xor_128B">; + +def int_hexagon_V6_vmax_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmax_hf">; + +def int_hexagon_V6_vmax_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmax_hf_128B">; + +def int_hexagon_V6_vmax_sf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmax_sf">; + +def int_hexagon_V6_vmax_sf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmax_sf_128B">; + +def int_hexagon_V6_vmin_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmin_hf">; + +def int_hexagon_V6_vmin_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmin_hf_128B">; + +def int_hexagon_V6_vmin_sf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmin_sf">; + +def int_hexagon_V6_vmin_sf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmin_sf_128B">; + +def int_hexagon_V6_vmpy_hf_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_hf_hf">; + +def int_hexagon_V6_vmpy_hf_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_hf_hf_128B">; + +def int_hexagon_V6_vmpy_hf_hf_acc : +Hexagon_v16i32_v16i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_hf_hf_acc">; + +def int_hexagon_V6_vmpy_hf_hf_acc_128B : +Hexagon_v32i32_v32i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_hf_hf_acc_128B">; + +def int_hexagon_V6_vmpy_qf16 : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_qf16">; + +def int_hexagon_V6_vmpy_qf16_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_qf16_128B">; + +def int_hexagon_V6_vmpy_qf16_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_qf16_hf">; + +def int_hexagon_V6_vmpy_qf16_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_qf16_hf_128B">; + +def int_hexagon_V6_vmpy_qf16_mix_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_qf16_mix_hf">; + +def int_hexagon_V6_vmpy_qf16_mix_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_qf16_mix_hf_128B">; + +def int_hexagon_V6_vmpy_qf32 : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_qf32">; + +def int_hexagon_V6_vmpy_qf32_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_qf32_128B">; + +def int_hexagon_V6_vmpy_qf32_hf : +Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_qf32_hf">; + +def int_hexagon_V6_vmpy_qf32_hf_128B : +Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_qf32_hf_128B">; + +def int_hexagon_V6_vmpy_qf32_mix_hf : +Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_qf32_mix_hf">; + +def int_hexagon_V6_vmpy_qf32_mix_hf_128B : +Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_qf32_mix_hf_128B">; + +def int_hexagon_V6_vmpy_qf32_qf16 : +Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_qf32_qf16">; + +def int_hexagon_V6_vmpy_qf32_qf16_128B : +Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_qf32_qf16_128B">; + +def int_hexagon_V6_vmpy_qf32_sf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_qf32_sf">; + +def int_hexagon_V6_vmpy_qf32_sf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_qf32_sf_128B">; + +def int_hexagon_V6_vmpy_sf_hf : +Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_sf_hf">; + +def int_hexagon_V6_vmpy_sf_hf_128B : +Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_sf_hf_128B">; + +def int_hexagon_V6_vmpy_sf_hf_acc : +Hexagon_v32i32_v32i32v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_sf_hf_acc">; + +def int_hexagon_V6_vmpy_sf_hf_acc_128B : +Hexagon_v64i32_v64i32v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_sf_hf_acc_128B">; + +def int_hexagon_V6_vmpy_sf_sf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpy_sf_sf">; + +def int_hexagon_V6_vmpy_sf_sf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpy_sf_sf_128B">; + +def int_hexagon_V6_vsub_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsub_hf">; + +def int_hexagon_V6_vsub_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsub_hf_128B">; + +def int_hexagon_V6_vsub_hf_hf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsub_hf_hf">; + +def int_hexagon_V6_vsub_hf_hf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsub_hf_hf_128B">; + +def int_hexagon_V6_vsub_qf16 : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsub_qf16">; + +def int_hexagon_V6_vsub_qf16_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsub_qf16_128B">; + +def int_hexagon_V6_vsub_qf16_mix : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsub_qf16_mix">; + +def int_hexagon_V6_vsub_qf16_mix_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsub_qf16_mix_128B">; + +def int_hexagon_V6_vsub_qf32 : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsub_qf32">; + +def int_hexagon_V6_vsub_qf32_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsub_qf32_128B">; + +def int_hexagon_V6_vsub_qf32_mix : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsub_qf32_mix">; + +def int_hexagon_V6_vsub_qf32_mix_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsub_qf32_mix_128B">; + +def int_hexagon_V6_vsub_sf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsub_sf">; + +def int_hexagon_V6_vsub_sf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsub_sf_128B">; + +def int_hexagon_V6_vsub_sf_hf : +Hexagon_v32i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsub_sf_hf">; + +def int_hexagon_V6_vsub_sf_hf_128B : +Hexagon_v64i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsub_sf_hf_128B">; + +def int_hexagon_V6_vsub_sf_sf : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vsub_sf_sf">; + +def int_hexagon_V6_vsub_sf_sf_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vsub_sf_sf_128B">; + +// V69 HVX Instructions. + +def int_hexagon_V6_vasrvuhubrndsat : +Hexagon_v16i32_v32i32v16i32_Intrinsic<"HEXAGON_V6_vasrvuhubrndsat">; + +def int_hexagon_V6_vasrvuhubrndsat_128B : +Hexagon_v32i32_v64i32v32i32_Intrinsic<"HEXAGON_V6_vasrvuhubrndsat_128B">; + +def int_hexagon_V6_vasrvuhubsat : +Hexagon_v16i32_v32i32v16i32_Intrinsic<"HEXAGON_V6_vasrvuhubsat">; + +def int_hexagon_V6_vasrvuhubsat_128B : +Hexagon_v32i32_v64i32v32i32_Intrinsic<"HEXAGON_V6_vasrvuhubsat_128B">; + +def int_hexagon_V6_vasrvwuhrndsat : +Hexagon_v16i32_v32i32v16i32_Intrinsic<"HEXAGON_V6_vasrvwuhrndsat">; + +def int_hexagon_V6_vasrvwuhrndsat_128B : +Hexagon_v32i32_v64i32v32i32_Intrinsic<"HEXAGON_V6_vasrvwuhrndsat_128B">; + +def int_hexagon_V6_vasrvwuhsat : +Hexagon_v16i32_v32i32v16i32_Intrinsic<"HEXAGON_V6_vasrvwuhsat">; + +def int_hexagon_V6_vasrvwuhsat_128B : +Hexagon_v32i32_v64i32v32i32_Intrinsic<"HEXAGON_V6_vasrvwuhsat_128B">; + +def int_hexagon_V6_vmpyuhvs : +Hexagon_v16i32_v16i32v16i32_Intrinsic<"HEXAGON_V6_vmpyuhvs">; + +def int_hexagon_V6_vmpyuhvs_128B : +Hexagon_v32i32_v32i32v32i32_Intrinsic<"HEXAGON_V6_vmpyuhvs_128B">; + diff --git a/llvm/lib/Target/Hexagon/Hexagon.td b/llvm/lib/Target/Hexagon/Hexagon.td index 7518fd774a48..ae811b30434d 100644 --- a/llvm/lib/Target/Hexagon/Hexagon.td +++ b/llvm/lib/Target/Hexagon/Hexagon.td @@ -29,6 +29,8 @@ def ProcTinyCore: SubtargetFeature<"tinycore", "HexagonProcFamily", // Hexagon ISA Extensions def ExtensionZReg: SubtargetFeature<"zreg", "UseZRegOps", "true", "Hexagon ZReg extension instructions">; +def ExtensionHVXQFloat: SubtargetFeature<"hvx-qfloat", "UseHVXQFloatOps", + "true", "Hexagon HVX QFloating point instructions">; def ExtensionHVX: SubtargetFeature<"hvx", "HexagonHVXVersion", "Hexagon::ArchEnum::V60", "Hexagon HVX instructions">; @@ -52,6 +54,10 @@ def ExtensionHVXV68: SubtargetFeature<"hvxv68", "HexagonHVXVersion", "Hexagon::ArchEnum::V68", "Hexagon HVX instructions", [ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65, ExtensionHVXV66, ExtensionHVXV67]>; +def ExtensionHVXV69: SubtargetFeature<"hvxv69", "HexagonHVXVersion", + "Hexagon::ArchEnum::V69", "Hexagon HVX instructions", + [ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65, ExtensionHVXV66, + ExtensionHVXV67, ExtensionHVXV68]>; def ExtensionHVX64B: SubtargetFeature<"hvx-length64b", "UseHVX64BOps", "true", "Hexagon HVX 64B instructions", [ExtensionHVX]>; @@ -61,6 +67,9 @@ def ExtensionHVX128B: SubtargetFeature<"hvx-length128b", "UseHVX128BOps", def ExtensionAudio: SubtargetFeature<"audio", "UseAudioOps", "true", "Hexagon Audio extension instructions">; +def ExtensionHVXIEEEFP: SubtargetFeature<"hvx-ieee-fp", "UseHVXIEEEFPOps", + "true", "Hexagon HVX IEEE floating point instructions">; + def FeatureCompound: SubtargetFeature<"compound", "UseCompound", "true", "Use compound instructions">; def FeaturePackets: SubtargetFeature<"packets", "UsePackets", "true", @@ -88,6 +97,8 @@ def FeatureReservedR19: SubtargetFeature<"reserved-r19", "ReservedR19", def FeatureNoreturnStackElim: SubtargetFeature<"noreturn-stack-elim", "NoreturnStackElim", "true", "Eliminate stack allocation in a noreturn function when possible">; +def FeatureCabac: SubtargetFeature<"cabac", "UseCabac", "false", + "Emit the CABAC instruction">; //===----------------------------------------------------------------------===// // Hexagon Instruction Predicate Definitions. @@ -112,6 +123,8 @@ def UseHVXV67 : Predicate<"HST->useHVXV67Ops()">, AssemblerPredicate<(all_of ExtensionHVXV67)>; def UseHVXV68 : Predicate<"HST->useHVXV68Ops()">, AssemblerPredicate<(all_of ExtensionHVXV68)>; +def UseHVXV69 : Predicate<"HST->useHVXV69Ops()">, + AssemblerPredicate<(all_of ExtensionHVXV69)>; def UseAudio : Predicate<"HST->useAudioOps()">, AssemblerPredicate<(all_of ExtensionAudio)>; def UseZReg : Predicate<"HST->useZRegOps()">, @@ -119,6 +132,11 @@ def UseZReg : Predicate<"HST->useZRegOps()">, def UseCompound : Predicate<"HST->useCompound()">; def HasPreV65 : Predicate<"HST->hasPreV65()">, AssemblerPredicate<(all_of FeaturePreV65)>; +def UseHVXIEEEFP : Predicate<"HST->useHVXIEEEFPOps()">, + AssemblerPredicate<(all_of ExtensionHVXIEEEFP)>; +def UseHVXQFloat : Predicate<"HST->useHVXQFloatOps()">, + AssemblerPredicate<(all_of ExtensionHVXQFloat)>; +def UseHVXFloatingPoint: Predicate<"HST->useHVXFloatingPoint()">; def HasMemNoShuf : Predicate<"HST->hasMemNoShuf()">, AssemblerPredicate<(all_of FeatureMemNoShuf)>; def UseUnsafeMath : Predicate<"HST->useUnsafeMath()">; @@ -127,6 +145,8 @@ def NotOptTinyCore : Predicate<"!HST->isTinyCore() ||" let RecomputePerFunction = 1; } def UseSmallData : Predicate<"HST->useSmallData()">; +def UseCabac : Predicate<"HST->useCabac()">, + AssemblerPredicate<(any_of FeatureCabac)>; def Hvx64: HwMode<"+hvx-length64b">; def Hvx128: HwMode<"+hvx-length128b">; @@ -299,7 +319,7 @@ def changeAddrMode_rr_ur: InstrMapping { let ValueCols = [["BaseLongOffset"]]; } -def changeAddrMode_ur_rr : InstrMapping { +def changeAddrMode_ur_rr: InstrMapping { let FilterClass = "ImmRegShl"; let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"]; let ColFields = ["addrMode"]; @@ -370,40 +390,55 @@ class Proc; + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData, + FeatureCabac]>; def : Proc<"hexagonv5", HexagonModelV5, [ArchV5, FeatureCompound, FeatureDuplex, FeaturePreV65, FeatureMemops, - FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData, + FeatureCabac]>; def : Proc<"hexagonv55", HexagonModelV55, [ArchV5, ArchV55, FeatureCompound, FeatureDuplex, FeaturePreV65, FeatureMemops, - FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData, + FeatureCabac]>; def : Proc<"hexagonv60", HexagonModelV60, [ArchV5, ArchV55, ArchV60, FeatureCompound, FeatureDuplex, FeaturePreV65, FeatureMemops, - FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData, + FeatureCabac]>; def : Proc<"hexagonv62", HexagonModelV62, [ArchV5, ArchV55, ArchV60, ArchV62, FeatureCompound, FeatureDuplex, FeaturePreV65, FeatureMemops, - FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData, + FeatureCabac]>; def : Proc<"hexagonv65", HexagonModelV65, [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops, - FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData, + FeatureCabac]>; def : Proc<"hexagonv66", HexagonModelV66, [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66, FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops, - FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData, + FeatureCabac]>; def : Proc<"hexagonv67", HexagonModelV67, [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66, ArchV67, FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops, - FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData, + FeatureCabac]>; def : Proc<"hexagonv68", HexagonModelV68, [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66, ArchV67, ArchV68, FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops, - FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData]>; + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData, + FeatureCabac]>; +def : Proc<"hexagonv69", HexagonModelV69, + [ArchV5, ArchV55, ArchV60, ArchV62, ArchV65, ArchV66, ArchV67, + ArchV68, ArchV69, + FeatureCompound, FeatureDuplex, FeatureMemNoShuf, FeatureMemops, + FeatureNVJ, FeatureNVS, FeaturePackets, FeatureSmallData, + FeatureCabac]>; // Need to update the correct features for tiny core. // Disable NewValueJumps since the packetizer is unable to handle a packet with // a new value jump and another SLOT0 instruction. diff --git a/llvm/lib/Target/Hexagon/HexagonDepArch.h b/llvm/lib/Target/Hexagon/HexagonDepArch.h index 7a43a4440b2d..56174dc7e136 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepArch.h +++ b/llvm/lib/Target/Hexagon/HexagonDepArch.h @@ -21,31 +21,32 @@ namespace llvm { namespace Hexagon { -enum class ArchEnum { NoArch, Generic, V5, V55, V60, V62, V65, V66, V67, V68 }; +enum class ArchEnum { NoArch, Generic, V5, V55, V60, V62, V65, V66, V67, V68, V69 }; -static constexpr unsigned ArchValsNumArray[] = {5, 55, 60, 62, 65, 66, 67, 68}; +static constexpr unsigned ArchValsNumArray[] = {5, 55, 60, 62, 65, 66, 67, 68, 69}; static constexpr ArrayRef ArchValsNum(ArchValsNumArray); -static constexpr StringLiteral ArchValsTextArray[] = { "v5", "v55", "v60", "v62", "v65", "v66", "v67", "v68" }; +static constexpr StringLiteral ArchValsTextArray[] = { "v5", "v55", "v60", "v62", "v65", "v66", "v67", "v68", "v69" }; static constexpr ArrayRef ArchValsText(ArchValsTextArray); -static constexpr StringLiteral CpuValsTextArray[] = { "hexagonv5", "hexagonv55", "hexagonv60", "hexagonv62", "hexagonv65", "hexagonv66", "hexagonv67", "hexagonv67t", "hexagonv68" }; +static constexpr StringLiteral CpuValsTextArray[] = { "hexagonv5", "hexagonv55", "hexagonv60", "hexagonv62", "hexagonv65", "hexagonv66", "hexagonv67", "hexagonv67t", "hexagonv68", "hexagonv69" }; static constexpr ArrayRef CpuValsText(CpuValsTextArray); -static constexpr StringLiteral CpuNickTextArray[] = { "v5", "v55", "v60", "v62", "v65", "v66", "v67", "v67t", "v68" }; +static constexpr StringLiteral CpuNickTextArray[] = { "v5", "v55", "v60", "v62", "v65", "v66", "v67", "v67t", "v68", "v69" }; static constexpr ArrayRef CpuNickText(CpuNickTextArray); static const std::map CpuTable{ - {"generic", Hexagon::ArchEnum::V5}, - {"hexagonv5", Hexagon::ArchEnum::V5}, - {"hexagonv55", Hexagon::ArchEnum::V55}, - {"hexagonv60", Hexagon::ArchEnum::V60}, - {"hexagonv62", Hexagon::ArchEnum::V62}, - {"hexagonv65", Hexagon::ArchEnum::V65}, - {"hexagonv66", Hexagon::ArchEnum::V66}, - {"hexagonv67", Hexagon::ArchEnum::V67}, - {"hexagonv67t", Hexagon::ArchEnum::V67}, - {"hexagonv68", Hexagon::ArchEnum::V68}, + {"generic", Hexagon::ArchEnum::V5}, + {"hexagonv5", Hexagon::ArchEnum::V5}, + {"hexagonv55", Hexagon::ArchEnum::V55}, + {"hexagonv60", Hexagon::ArchEnum::V60}, + {"hexagonv62", Hexagon::ArchEnum::V62}, + {"hexagonv65", Hexagon::ArchEnum::V65}, + {"hexagonv66", Hexagon::ArchEnum::V66}, + {"hexagonv67", Hexagon::ArchEnum::V67}, + {"hexagonv67t", Hexagon::ArchEnum::V67}, + {"hexagonv68", Hexagon::ArchEnum::V68}, + {"hexagonv69", Hexagon::ArchEnum::V69}, }; static const std::map ElfFlagsByCpuStr = { @@ -59,6 +60,7 @@ static const std::map ElfFlagsByCpuStr = { {"hexagonv67", llvm::ELF::EF_HEXAGON_MACH_V67}, {"hexagonv67t", llvm::ELF::EF_HEXAGON_MACH_V67T}, {"hexagonv68", llvm::ELF::EF_HEXAGON_MACH_V68}, + {"hexagonv69", llvm::ELF::EF_HEXAGON_MACH_V69}, }; static const std::map ElfArchByMachFlags = { {llvm::ELF::EF_HEXAGON_MACH_V5, "V5"}, @@ -70,6 +72,7 @@ static const std::map ElfArchByMachFlags = { {llvm::ELF::EF_HEXAGON_MACH_V67, "V67"}, {llvm::ELF::EF_HEXAGON_MACH_V67T, "V67T"}, {llvm::ELF::EF_HEXAGON_MACH_V68, "V68"}, + {llvm::ELF::EF_HEXAGON_MACH_V69, "V69"}, }; static const std::map ElfCpuByMachFlags = { {llvm::ELF::EF_HEXAGON_MACH_V5, "hexagonv5"}, @@ -81,6 +84,7 @@ static const std::map ElfCpuByMachFlags = { {llvm::ELF::EF_HEXAGON_MACH_V67, "hexagonv67"}, {llvm::ELF::EF_HEXAGON_MACH_V67T, "hexagonv67t"}, {llvm::ELF::EF_HEXAGON_MACH_V68, "hexagonv68"}, + {llvm::ELF::EF_HEXAGON_MACH_V69, "hexagonv69"}, }; } // namespace Hexagon diff --git a/llvm/lib/Target/Hexagon/HexagonDepArch.td b/llvm/lib/Target/Hexagon/HexagonDepArch.td index e743a291f1e5..e4f24e3c2e66 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepArch.td +++ b/llvm/lib/Target/Hexagon/HexagonDepArch.td @@ -24,3 +24,5 @@ def ArchV67: SubtargetFeature<"v67", "HexagonArchVersion", "Hexagon::ArchEnum::V def HasV67 : Predicate<"HST->hasV67Ops()">, AssemblerPredicate<(all_of ArchV67)>; def ArchV68: SubtargetFeature<"v68", "HexagonArchVersion", "Hexagon::ArchEnum::V68", "Enable Hexagon V68 architecture">; def HasV68 : Predicate<"HST->hasV68Ops()">, AssemblerPredicate<(all_of ArchV68)>; +def ArchV69: SubtargetFeature<"v69", "HexagonArchVersion", "Hexagon::ArchEnum::V69", "Enable Hexagon V69 architecture">; +def HasV69 : Predicate<"HST->hasV69Ops()">, AssemblerPredicate<(all_of ArchV69)>; diff --git a/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc b/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc index 40f6e14aed13..7164af3ad5c6 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc +++ b/llvm/lib/Target/Hexagon/HexagonDepDecoders.inc @@ -8,6 +8,7 @@ // Automatically generated file, do not edit! //===----------------------------------------------------------------------===// + #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-function" diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td index a1db3ae7239d..d195df918293 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td +++ b/llvm/lib/Target/Hexagon/HexagonDepIICHVX.td @@ -11,6 +11,7 @@ def tc_04da405a : InstrItinClass; def tc_05ca8cfd : InstrItinClass; def tc_08a4f1b6 : InstrItinClass; +def tc_0afc8be9 : InstrItinClass; def tc_0b04c6c7 : InstrItinClass; def tc_0ec46cf9 : InstrItinClass; def tc_131f1c81 : InstrItinClass; @@ -21,6 +22,7 @@ def tc_191381c1 : InstrItinClass; def tc_1ad8a370 : InstrItinClass; def tc_1ba8a0cd : InstrItinClass; def tc_20a4bbec : InstrItinClass; +def tc_2120355e : InstrItinClass; def tc_257f6f7c : InstrItinClass; def tc_26a377fe : InstrItinClass; def tc_2b4c548e : InstrItinClass; @@ -28,15 +30,18 @@ def tc_2c745bb8 : InstrItinClass; def tc_2d4051cd : InstrItinClass; def tc_2e8f5f6e : InstrItinClass; def tc_309dbb4f : InstrItinClass; +def tc_37820f4c : InstrItinClass; def tc_3904b926 : InstrItinClass; def tc_3aacf4a8 : InstrItinClass; def tc_3ad719fb : InstrItinClass; def tc_3c56e5ce : InstrItinClass; +def tc_3c8c15d0 : InstrItinClass; def tc_3ce09744 : InstrItinClass; def tc_3e2aaafc : InstrItinClass; def tc_447d9895 : InstrItinClass; def tc_453fe68d : InstrItinClass; def tc_46d6c3e0 : InstrItinClass; +def tc_4942646a : InstrItinClass; def tc_51d0ecc3 : InstrItinClass; def tc_52447ecc : InstrItinClass; def tc_540c3da3 : InstrItinClass; @@ -46,6 +51,7 @@ def tc_56c4f9fe : InstrItinClass; def tc_56e64202 : InstrItinClass; def tc_58d21193 : InstrItinClass; def tc_5bf8afbb : InstrItinClass; +def tc_5cdf8c84 : InstrItinClass; def tc_61bf7c03 : InstrItinClass; def tc_649072c2 : InstrItinClass; def tc_660769f1 : InstrItinClass; @@ -57,6 +63,8 @@ def tc_71646d06 : InstrItinClass; def tc_7177e272 : InstrItinClass; def tc_718b5c53 : InstrItinClass; def tc_7273323b : InstrItinClass; +def tc_72e2b393 : InstrItinClass; +def tc_73efe966 : InstrItinClass; def tc_7417e785 : InstrItinClass; def tc_767c4e9d : InstrItinClass; def tc_7d68d5c2 : InstrItinClass; @@ -71,9 +79,11 @@ def tc_9d1dc972 : InstrItinClass; def tc_9f363d21 : InstrItinClass; def tc_a02a10a8 : InstrItinClass; def tc_a0dbea28 : InstrItinClass; +def tc_a19b9305 : InstrItinClass; def tc_a28f32b5 : InstrItinClass; def tc_a69eeee1 : InstrItinClass; def tc_a7e6707d : InstrItinClass; +def tc_aa047364 : InstrItinClass; def tc_ab23f776 : InstrItinClass; def tc_abe8c3b2 : InstrItinClass; def tc_ac4046bc : InstrItinClass; @@ -89,8 +99,10 @@ def tc_c4edf264 : InstrItinClass; def tc_c5dba46e : InstrItinClass; def tc_c7039829 : InstrItinClass; def tc_cd94bfe0 : InstrItinClass; +def tc_cda936da : InstrItinClass; def tc_d8287c14 : InstrItinClass; def tc_db5555f3 : InstrItinClass; +def tc_dcca380f : InstrItinClass; def tc_dd5b0695 : InstrItinClass; def tc_df80eeb0 : InstrItinClass; def tc_e2d2e9e5 : InstrItinClass; @@ -99,6 +111,7 @@ def tc_e3f68a46 : InstrItinClass; def tc_e675c45a : InstrItinClass; def tc_e699ae41 : InstrItinClass; def tc_e99d4c2e : InstrItinClass; +def tc_f175e046 : InstrItinClass; def tc_f1de44ef : InstrItinClass; def tc_f21e8abb : InstrItinClass; @@ -119,6 +132,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -174,6 +192,10 @@ class DepHVXItinV55 { InstrStage<1, [CVI_ST]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], @@ -209,6 +231,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], @@ -231,6 +258,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2], @@ -259,6 +291,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_XLANE]>], [9, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5], @@ -306,6 +343,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_XLANE]>], [9, 2], [HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2], @@ -363,6 +405,16 @@ class DepHVXItinV55 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], @@ -437,6 +489,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_ZW]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD], 0>, @@ -456,6 +513,10 @@ class DepHVXItinV55 { InstrStage<1, [CVI_XLANE]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2, 5], @@ -537,6 +598,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 5], @@ -547,6 +613,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_ZW]>], [2, 1, 2], @@ -589,6 +660,11 @@ class DepHVXItinV55 { InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -620,6 +696,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -675,6 +756,10 @@ class DepHVXItinV60 { InstrStage<1, [CVI_ST]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], @@ -710,6 +795,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], @@ -732,6 +822,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2], @@ -760,6 +855,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_XLANE]>], [9, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5], @@ -807,6 +907,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_XLANE]>], [9, 2], [HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2], @@ -864,6 +969,16 @@ class DepHVXItinV60 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], @@ -938,6 +1053,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_ZW]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD], 0>, @@ -957,6 +1077,10 @@ class DepHVXItinV60 { InstrStage<1, [CVI_XLANE]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2, 5], @@ -1038,6 +1162,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 5], @@ -1048,6 +1177,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_ZW]>], [2, 1, 2], @@ -1090,6 +1224,11 @@ class DepHVXItinV60 { InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -1121,6 +1260,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -1176,6 +1320,10 @@ class DepHVXItinV62 { InstrStage<1, [CVI_ST]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], @@ -1211,6 +1359,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], @@ -1233,6 +1386,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2], @@ -1261,6 +1419,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_XLANE]>], [9, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5], @@ -1308,6 +1471,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_XLANE]>], [9, 2], [HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2], @@ -1365,6 +1533,16 @@ class DepHVXItinV62 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], @@ -1439,6 +1617,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_ZW]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD], 0>, @@ -1458,6 +1641,10 @@ class DepHVXItinV62 { InstrStage<1, [CVI_XLANE]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2, 5], @@ -1539,6 +1726,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 5], @@ -1549,6 +1741,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_ZW]>], [2, 1, 2], @@ -1591,6 +1788,11 @@ class DepHVXItinV62 { InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -1622,6 +1824,11 @@ class DepHVXItinV65 { InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -1677,6 +1884,10 @@ class DepHVXItinV65 { InstrStage<1, [CVI_ST]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], @@ -1712,6 +1923,11 @@ class DepHVXItinV65 { InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], @@ -1734,6 +1950,11 @@ class DepHVXItinV65 { InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2], @@ -1762,6 +1983,11 @@ class DepHVXItinV65 { InstrStage<1, [CVI_XLANE]>], [9, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5], @@ -1809,6 +2035,11 @@ class DepHVXItinV65 { InstrStage<1, [CVI_XLANE]>], [9, 2], [HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2], @@ -1866,6 +2097,16 @@ class DepHVXItinV65 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], @@ -1940,6 +2181,11 @@ class DepHVXItinV65 { InstrStage<1, [CVI_ZW]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD], 0>, @@ -1959,6 +2205,10 @@ class DepHVXItinV65 { InstrStage<1, [CVI_XLANE]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2, 5], @@ -2040,6 +2290,11 @@ class DepHVXItinV65 { InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 5], @@ -2050,6 +2305,11 @@ class DepHVXItinV65 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_ZW]>], [2, 1, 2], @@ -2092,6 +2352,11 @@ class DepHVXItinV65 { InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -2123,6 +2388,11 @@ class DepHVXItinV66 { InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -2178,6 +2448,10 @@ class DepHVXItinV66 { InstrStage<1, [CVI_ST]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], @@ -2213,6 +2487,11 @@ class DepHVXItinV66 { InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], @@ -2235,6 +2514,11 @@ class DepHVXItinV66 { InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2], @@ -2263,6 +2547,11 @@ class DepHVXItinV66 { InstrStage<1, [CVI_XLANE]>], [9, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5], @@ -2310,6 +2599,11 @@ class DepHVXItinV66 { InstrStage<1, [CVI_XLANE]>], [9, 2], [HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2], @@ -2367,6 +2661,16 @@ class DepHVXItinV66 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], @@ -2441,6 +2745,11 @@ class DepHVXItinV66 { InstrStage<1, [CVI_ZW]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD], 0>, @@ -2460,6 +2769,10 @@ class DepHVXItinV66 { InstrStage<1, [CVI_XLANE]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2, 5], @@ -2541,6 +2854,11 @@ class DepHVXItinV66 { InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 5], @@ -2551,6 +2869,11 @@ class DepHVXItinV66 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_ZW]>], [2, 1, 2], @@ -2593,6 +2916,11 @@ class DepHVXItinV66 { InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -2624,6 +2952,11 @@ class DepHVXItinV67 { InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -2679,6 +3012,10 @@ class DepHVXItinV67 { InstrStage<1, [CVI_ST]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], @@ -2714,6 +3051,11 @@ class DepHVXItinV67 { InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], @@ -2736,6 +3078,11 @@ class DepHVXItinV67 { InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2], @@ -2764,6 +3111,11 @@ class DepHVXItinV67 { InstrStage<1, [CVI_XLANE]>], [9, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5], @@ -2811,6 +3163,11 @@ class DepHVXItinV67 { InstrStage<1, [CVI_XLANE]>], [9, 2], [HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2], @@ -2868,6 +3225,16 @@ class DepHVXItinV67 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], @@ -2942,6 +3309,11 @@ class DepHVXItinV67 { InstrStage<1, [CVI_ZW]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD], 0>, @@ -2961,6 +3333,10 @@ class DepHVXItinV67 { InstrStage<1, [CVI_XLANE]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2, 5], @@ -3042,6 +3418,11 @@ class DepHVXItinV67 { InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 5], @@ -3052,6 +3433,11 @@ class DepHVXItinV67 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_ZW]>], [2, 1, 2], @@ -3094,6 +3480,11 @@ class DepHVXItinV67 { InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -3125,6 +3516,11 @@ class DepHVXItinV68 { InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], @@ -3180,6 +3576,10 @@ class DepHVXItinV68 { InstrStage<1, [CVI_ST]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], @@ -3215,6 +3615,11 @@ class DepHVXItinV68 { InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], @@ -3237,7 +3642,576 @@ class DepHVXItinV68 { InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, - InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [4, 7, 1], + [Hex_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 2, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9], + [HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], + [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [3, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [7, 1, 2, 7, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLANE]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7, 7], + [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [2, 1, 2, 7], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ZW]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [7, 1, 2, 7], + [HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 2, 7, 7], + [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [2], + [Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 3, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 2], + [HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [1, 2, 7], + [Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [3, 2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ZW]>], [2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [3, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [3], + [HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 2, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ZW]>], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [1, 2, 5], + [Hex_FWD, Hex_FWD, HVX_FWD]> + ]; +} + +class DepHVXItinV69 { + list DepHVXItinV69_list = [ + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_XLANE]>], [2, 1, 2, 5], + [Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL]>], [], + []>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1], 0>, + InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [3, 7, 1, 2, 7], + [Hex_FWD, HVX_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 2, 2], + [HVX_FWD, HVX_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ST]>], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [9, 7], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 7, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 5, 2], + [HVX_FWD, Hex_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY01]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_XLSHF]>], [9, 7, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ALL_NOMEM]>], [9, 3, 7, 5, 2], + [HVX_FWD, Hex_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_SHIFT]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_LD]>], [9, 2, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1, CVI_SHIFT, CVI_XLANE]>], [9, 2, 7], + [HVX_FWD, Hex_FWD, HVX_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_ZW]>], [3, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [SLOT1], 0>, + InstrStage<1, [CVI_LD], 0>, + InstrStage<1, [CVI_XLANE]>], [9, 3, 1, 2], + [HVX_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5], + [HVX_FWD, HVX_FWD]>, + + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2], [Hex_FWD, Hex_FWD]>, @@ -3265,6 +4239,11 @@ class DepHVXItinV68 { InstrStage<1, [CVI_XLANE]>], [9, 5, 5], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5], @@ -3312,6 +4291,11 @@ class DepHVXItinV68 { InstrStage<1, [CVI_XLANE]>], [9, 2], [HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7], + [HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ALL_NOMEM]>], [9, 5, 2], @@ -3369,6 +4353,16 @@ class DepHVXItinV68 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [1, 2, 7, 7], [Hex_FWD, Hex_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_SHIFT]>], [9, 5, 2], @@ -3443,6 +4437,11 @@ class DepHVXItinV68 { InstrStage<1, [CVI_ZW]>], [3, 1, 2], [Hex_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 5, 5], + [HVX_FWD, HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_LD], 0>, @@ -3462,6 +4461,10 @@ class DepHVXItinV68 { InstrStage<1, [CVI_XLANE]>], [9, 1, 2], [HVX_FWD, Hex_FWD, Hex_FWD]>, + InstrItinData ], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_ST]>], [1, 2, 5], @@ -3543,6 +4546,11 @@ class DepHVXItinV68 { InstrStage<1, [CVI_SHIFT, CVI_XLANE]>], [9, 5, 2], [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 7, 7], + [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 5], @@ -3553,6 +4561,11 @@ class DepHVXItinV68 { InstrStage<1, [CVI_MPY01, CVI_XLSHF]>], [9, 7, 7], [HVX_FWD, HVX_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 2], + [HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_ZW]>], [2, 1, 2], @@ -3595,6 +4608,11 @@ class DepHVXItinV68 { InstrStage<1, [CVI_ST]>], [3, 2, 1, 2, 5], [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, HVX_FWD]>, + InstrItinData , + InstrStage<1, [CVI_MPY0, CVI_MPY1]>], [9, 5, 5, 2], + [HVX_FWD, HVX_FWD, HVX_FWD, Hex_FWD]>, + InstrItinData , InstrStage<1, [CVI_MPY01]>], [9, 5, 2], diff --git a/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td b/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td index a3766652794b..a979bafe8e33 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td +++ b/llvm/lib/Target/Hexagon/HexagonDepIICScalar.td @@ -7338,3 +7338,771 @@ class DepScalarItinV68 { [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]> ]; } + +class DepScalarItinV69 { + list DepScalarItinV69_list = [ + InstrItinData ], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3], + [Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4], + [Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 3], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 5, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 3, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [3, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [3, 1, 2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [], + []>, + + InstrItinData ], [3, 2, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 1, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [4, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [1], + [Hex_FWD]>, + + InstrItinData ], [2, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [3, 2, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 1, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2], + [Hex_FWD]>, + + InstrItinData ], [4, 3, 1, 2, 3], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [3, 2, 2], + [Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [5, 5, 1, 1], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]>, + + InstrItinData ], [2, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2], + [Hex_FWD, Hex_FWD]>, + + InstrItinData ], [4, 2, 1, 2], + [Hex_FWD, Hex_FWD, Hex_FWD, Hex_FWD]> + ]; +} diff --git a/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td b/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td index b3f1b6638193..65d36924ba48 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td +++ b/llvm/lib/Target/Hexagon/HexagonDepInstrFormats.td @@ -2288,6 +2288,12 @@ class Enc_a30110 : OpcodeHexagon { bits <5> Vd32; let Inst{4-0} = Vd32{4-0}; } +class Enc_a33d04 : OpcodeHexagon { + bits <5> Vuu32; + let Inst{12-8} = Vuu32{4-0}; + bits <5> Vd32; + let Inst{4-0} = Vd32{4-0}; +} class Enc_a42857 : OpcodeHexagon { bits <11> Ii; let Inst{21-20} = Ii{10-9}; @@ -3109,6 +3115,14 @@ class Enc_de0214 : OpcodeHexagon { bits <5> Rd32; let Inst{4-0} = Rd32{4-0}; } +class Enc_de5ea0 : OpcodeHexagon { + bits <5> Vuu32; + let Inst{12-8} = Vuu32{4-0}; + bits <5> Vv32; + let Inst{20-16} = Vv32{4-0}; + bits <5> Vd32; + let Inst{4-0} = Vd32{4-0}; +} class Enc_e07374 : OpcodeHexagon { bits <5> Rs32; let Inst{20-16} = Rs32{4-0}; diff --git a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td index 4f00409c336c..c02988266584 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td +++ b/llvm/lib/Target/Hexagon/HexagonDepInstrInfo.td @@ -5824,8 +5824,8 @@ let Inst{31-21} = 0b01010100100; let hasNewValue = 1; let opNewValue = 0; let isSolo = 1; -let Uses = [GOSP]; -let Defs = [GOSP, PC]; +let Uses = [CCR, GOSP]; +let Defs = [CCR, GOSP, PC]; let hasSideEffects = 1; let Constraints = "$Rx32 = $Rx32in"; } @@ -8500,6 +8500,8 @@ let Inst{31-21} = 0b01010010101; let isTerminator = 1; let isIndirectBranch = 1; let isBranch = 1; +let cofRelax1 = 1; +let cofRelax2 = 1; let cofMax1 = 1; } def J4_jumpseti : HInst< @@ -18210,16 +18212,6 @@ let opExtentBits = 18; let opExtentAlign = 2; let opNewValue = 1; } -def PS_trap1 : HInst< -(outs), -(ins u8_0Imm:$Ii), -"trap1(#$Ii)", -tc_53c851ab, TypeJ>, Enc_a51a9a, Requires<[HasPreV65]> { -let Inst{1-0} = 0b00; -let Inst{7-5} = 0b000; -let Inst{13-13} = 0b0; -let Inst{31-16} = 0b0101010010000000; -} def R6_release_at_vi : HInst< (outs), (ins IntRegs:$Rs32), @@ -18964,7 +18956,7 @@ def S2_cabacdecbin : HInst< (outs DoubleRegs:$Rdd32), (ins DoubleRegs:$Rss32, DoubleRegs:$Rtt32), "$Rdd32 = decbin($Rss32,$Rtt32)", -tc_db596beb, TypeS_3op>, Enc_a56825 { +tc_db596beb, TypeS_3op>, Enc_a56825, Requires<[UseCabac]> { let Inst{7-5} = 0b110; let Inst{13-13} = 0b0; let Inst{31-21} = 0b11000001110; @@ -26883,17 +26875,6 @@ let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; } -def V6_ldntnt0 : HInst< -(outs HvxVR:$Vd32), -(ins IntRegs:$Rt32), -"$Vd32 = vmem($Rt32):nt", -PSEUDO, TypeMAPPING>, Requires<[HasV62]> { -let hasNewValue = 1; -let opNewValue = 0; -let isPseudo = 1; -let isCodeGenOnly = 1; -let DecoderNamespace = "EXT_mmvec"; -} def V6_ldp0 : HInst< (outs HvxVR:$Vd32), (ins PredRegs:$Pv4, IntRegs:$Rt32), @@ -27312,6 +27293,30 @@ let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_v10mpyubs10 : HInst< +(outs HvxWR:$Vdd32), +(ins HvxWR:$Vuu32, HvxWR:$Vvv32, u1_0Imm:$Ii), +"$Vdd32.w = v10mpy($Vuu32.ub,$Vvv32.b,#$Ii)", +tc_f175e046, TypeCVI_VX>, Requires<[UseHVXV69]> { +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let isPseudo = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_v10mpyubs10_vxx : HInst< +(outs HvxWR:$Vxx32), +(ins HvxWR:$Vxx32in, HvxWR:$Vuu32, HvxWR:$Vvv32, u1_0Imm:$Ii), +"$Vxx32.w += v10mpy($Vuu32.ub,$Vvv32.b,#$Ii)", +tc_4942646a, TypeCVI_VX>, Requires<[UseHVXV69]> { +let hasNewValue = 1; +let opNewValue = 0; +let isAccumulator = 1; +let isCVI = 1; +let isPseudo = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vxx32 = $Vxx32in"; +} def V6_v6mpyhubs10 : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32, HvxWR:$Vvv32, u2_0Imm:$Ii), @@ -27396,7 +27401,7 @@ def V6_vL32Ub_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32 = vmemu($Rt32+#$Ii)", -tc_a7e6707d, TypeCVI_VM_VP_LDU>, Enc_f3f408, Requires<[UseHVXV60]> { +tc_a7e6707d, TypeCVI_VM_VP_LDU>, Enc_f3f408, Requires<[UseHVXV60]>, PostInc_BaseImm { let Inst{7-5} = 0b111; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000000; @@ -27408,13 +27413,15 @@ let isCVLoad = 1; let isCVI = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; +let BaseOpcode = "V6_vL32Ub_ai"; +let CextOpcode = "V6_vL32Ub"; let DecoderNamespace = "EXT_mmvec"; } def V6_vL32Ub_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32 = vmemu($Rx32++#$Ii)", -tc_3c56e5ce, TypeCVI_VM_VP_LDU>, Enc_a255dc, Requires<[UseHVXV60]> { +tc_3c56e5ce, TypeCVI_VM_VP_LDU>, Enc_a255dc, Requires<[UseHVXV60]>, PostInc_BaseImm { let Inst{7-5} = 0b111; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001000; @@ -27427,6 +27434,7 @@ let isCVI = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_pi"; +let CextOpcode = "V6_vL32Ub"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -27452,7 +27460,7 @@ def V6_vL32b_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32 = vmem($Rt32+#$Ii)", -tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel { +tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000000; @@ -27465,6 +27473,7 @@ let isCVI = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_ai"; +let CextOpcode = "V6_vL32b"; let isCVLoadable = 1; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -27473,7 +27482,7 @@ def V6_vL32b_cur_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.cur = vmem($Rt32+#$Ii)", -tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel { +tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b001; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000000; @@ -27487,6 +27496,7 @@ let CVINew = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_cur_ai"; +let CextOpcode = "V6_vL32b_cur"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -27560,7 +27570,7 @@ def V6_vL32b_cur_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.cur = vmem($Rx32++#$Ii)", -tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel { +tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b001; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001000; @@ -27574,6 +27584,7 @@ let CVINew = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_cur_pi"; +let CextOpcode = "V6_vL32b_cur"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -27729,7 +27740,7 @@ def V6_vL32b_nt_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32 = vmem($Rt32+#$Ii):nt", -tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel { +tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000010; @@ -27743,6 +27754,7 @@ let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_ai"; +let CextOpcode = "V6_vL32b_nt"; let isCVLoadable = 1; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -27751,7 +27763,7 @@ def V6_vL32b_nt_cur_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.cur = vmem($Rt32+#$Ii):nt", -tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel { +tc_c0749f3c, TypeCVI_VM_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b001; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000010; @@ -27766,6 +27778,7 @@ let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_cur_ai"; +let CextOpcode = "V6_vL32b_nt_cur"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -27842,7 +27855,7 @@ def V6_vL32b_nt_cur_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.cur = vmem($Rx32++#$Ii):nt", -tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel { +tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b001; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001010; @@ -27857,6 +27870,7 @@ let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_cur_pi"; +let CextOpcode = "V6_vL32b_nt_cur"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -28019,7 +28033,7 @@ def V6_vL32b_nt_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32 = vmem($Rx32++#$Ii):nt", -tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel { +tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001010; @@ -28033,6 +28047,7 @@ let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_pi"; +let CextOpcode = "V6_vL32b_nt"; let isCVLoadable = 1; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -28127,7 +28142,7 @@ def V6_vL32b_nt_tmp_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.tmp = vmem($Rt32+#$Ii):nt", -tc_52447ecc, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel { +tc_52447ecc, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b010; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000010; @@ -28137,11 +28152,12 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_tmp_ai"; +let CextOpcode = "V6_vL32b_nt_tmp"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -28160,7 +28176,7 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -28183,7 +28199,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -28206,7 +28222,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -28218,7 +28234,7 @@ def V6_vL32b_nt_tmp_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.tmp = vmem($Rx32++#$Ii):nt", -tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel { +tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b010; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001010; @@ -28228,11 +28244,12 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_nt_tmp_pi"; +let CextOpcode = "V6_vL32b_nt_tmp"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -28250,7 +28267,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -28273,7 +28290,7 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -28295,7 +28312,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -28317,7 +28334,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isNonTemporal = 1; let isRestrictNoSlot1Store = 1; @@ -28329,7 +28346,7 @@ def V6_vL32b_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32 = vmem($Rx32++#$Ii)", -tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel { +tc_1ba8a0cd, TypeCVI_VM_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001000; @@ -28342,6 +28359,7 @@ let isCVI = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_pi"; +let CextOpcode = "V6_vL32b"; let isCVLoadable = 1; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -28432,7 +28450,7 @@ def V6_vL32b_tmp_ai : HInst< (outs HvxVR:$Vd32), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "$Vd32.tmp = vmem($Rt32+#$Ii)", -tc_52447ecc, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel { +tc_52447ecc, TypeCVI_VM_TMP_LD>, Enc_f3f408, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b010; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000000; @@ -28442,10 +28460,11 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_ai"; +let CextOpcode = "V6_vL32b_tmp"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -28464,7 +28483,7 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_ai"; @@ -28486,7 +28505,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_pi"; @@ -28508,7 +28527,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_ppu"; @@ -28519,7 +28538,7 @@ def V6_vL32b_tmp_pi : HInst< (outs HvxVR:$Vd32, IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "$Vd32.tmp = vmem($Rx32++#$Ii)", -tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel { +tc_663c80a7, TypeCVI_VM_TMP_LD>, Enc_a255dc, Requires<[UseHVXV60]>, PredRel, PostInc_BaseImm { let Inst{7-5} = 0b010; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001000; @@ -28529,10 +28548,11 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_pi"; +let CextOpcode = "V6_vL32b_tmp"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -28550,7 +28570,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_ppu"; @@ -28572,7 +28592,7 @@ let addrMode = BaseImmOffset; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_ai"; @@ -28593,7 +28613,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_pi"; @@ -28614,7 +28634,7 @@ let addrMode = PostInc; let accessSize = HVXVectorAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; +let hasHvxTmp = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; let BaseOpcode = "V6_vL32b_tmp_ppu"; @@ -28625,7 +28645,7 @@ def V6_vS32Ub_ai : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32), "vmemu($Rt32+#$Ii) = $Vs32", -tc_f21e8abb, TypeCVI_VM_STU>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel { +tc_f21e8abb, TypeCVI_VM_STU>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm { let Inst{7-5} = 0b111; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000001; @@ -28634,6 +28654,7 @@ let accessSize = HVXVectorAccess; let isCVI = 1; let mayStore = 1; let BaseOpcode = "V6_vS32Ub_ai"; +let CextOpcode = "V6_vS32Ub"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; } @@ -28692,7 +28713,7 @@ def V6_vS32Ub_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32), "vmemu($Rx32++#$Ii) = $Vs32", -tc_e2d2e9e5, TypeCVI_VM_STU>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel { +tc_e2d2e9e5, TypeCVI_VM_STU>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm { let Inst{7-5} = 0b111; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001001; @@ -28701,6 +28722,7 @@ let accessSize = HVXVectorAccess; let isCVI = 1; let mayStore = 1; let BaseOpcode = "V6_vS32Ub_pi"; +let CextOpcode = "V6_vS32Ub"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; @@ -28773,7 +28795,7 @@ def V6_vS32b_ai : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32), "vmem($Rt32+#$Ii) = $Vs32", -tc_c5dba46e, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel { +tc_c5dba46e, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm { let Inst{7-5} = 0b000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000001; @@ -28782,6 +28804,7 @@ let accessSize = HVXVectorAccess; let isCVI = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; +let CextOpcode = "V6_vS32b"; let isNVStorable = 1; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -28790,7 +28813,7 @@ def V6_vS32b_new_ai : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8), "vmem($Rt32+#$Ii) = $Os8.new", -tc_ab23f776, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel { +tc_ab23f776, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm { let Inst{7-3} = 0b00100; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000001; @@ -28802,6 +28825,7 @@ let CVINew = 1; let isNewValue = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; +let CextOpcode = "V6_vS32b_new"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 2; @@ -28873,7 +28897,7 @@ def V6_vS32b_new_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8), "vmem($Rx32++#$Ii) = $Os8.new", -tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel { +tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm { let Inst{7-3} = 0b00100; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001001; @@ -28885,6 +28909,7 @@ let CVINew = 1; let isNewValue = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; +let CextOpcode = "V6_vS32b_new"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 3; @@ -29070,7 +29095,7 @@ def V6_vS32b_nt_ai : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Vs32), "vmem($Rt32+#$Ii):nt = $Vs32", -tc_c5dba46e, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel { +tc_c5dba46e, TypeCVI_VM_ST>, Enc_c9e3bc, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm { let Inst{7-5} = 0b000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000011; @@ -29080,6 +29105,7 @@ let isCVI = 1; let isNonTemporal = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; +let CextOpcode = "V6_vS32b_nt"; let isNVStorable = 1; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29088,7 +29114,7 @@ def V6_vS32b_nt_new_ai : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii, HvxVR:$Os8), "vmem($Rt32+#$Ii):nt = $Os8.new", -tc_ab23f776, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel { +tc_ab23f776, TypeCVI_VM_NEW_ST>, Enc_f77fbc, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm { let Inst{7-3} = 0b00100; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101000011; @@ -29101,6 +29127,7 @@ let isNewValue = 1; let isNonTemporal = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_ai"; +let CextOpcode = "V6_vS32b_nt_new"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 2; @@ -29175,7 +29202,7 @@ def V6_vS32b_nt_new_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Os8), "vmem($Rx32++#$Ii):nt = $Os8.new", -tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel { +tc_6942b6e0, TypeCVI_VM_NEW_ST>, Enc_1aaec1, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm { let Inst{7-3} = 0b00100; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001011; @@ -29188,6 +29215,7 @@ let isNewValue = 1; let isNonTemporal = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; +let CextOpcode = "V6_vS32b_nt_new"; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; let opNewValue = 3; @@ -29383,7 +29411,7 @@ def V6_vS32b_nt_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32), "vmem($Rx32++#$Ii):nt = $Vs32", -tc_3e2aaafc, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel { +tc_3e2aaafc, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm { let Inst{7-5} = 0b000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001011; @@ -29393,6 +29421,7 @@ let isCVI = 1; let isNonTemporal = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; +let CextOpcode = "V6_vS32b_nt"; let isNVStorable = 1; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29519,7 +29548,7 @@ def V6_vS32b_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii, HvxVR:$Vs32), "vmem($Rx32++#$Ii) = $Vs32", -tc_3e2aaafc, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel { +tc_3e2aaafc, TypeCVI_VM_ST>, Enc_b62ef7, Requires<[UseHVXV60]>, NewValueRel, PostInc_BaseImm { let Inst{7-5} = 0b000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101001001; @@ -29528,6 +29557,7 @@ let accessSize = HVXVectorAccess; let isCVI = 1; let mayStore = 1; let BaseOpcode = "V6_vS32b_pi"; +let CextOpcode = "V6_vS32b"; let isNVStorable = 1; let isPredicable = 1; let DecoderNamespace = "EXT_mmvec"; @@ -29689,6 +29719,32 @@ let mayStore = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } +def V6_vabs_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.hf = vabs($Vu32.hf)", +tc_5cdf8c84, TypeCVI_VX_LATE>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vabs_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.sf = vabs($Vu32.sf)", +tc_5cdf8c84, TypeCVI_VX_LATE>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vabsb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32), @@ -29975,6 +30031,123 @@ let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vadd_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf16 = vadd($Vu32.hf,$Vv32.hf)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vadd_hf_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.hf = vadd($Vu32.hf,$Vv32.hf)", +tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vadd_qf16 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf16 = vadd($Vu32.qf16,$Vv32.qf16)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vadd_qf16_mix : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf16 = vadd($Vu32.qf16,$Vv32.hf)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vadd_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf32 = vadd($Vu32.qf32,$Vv32.qf32)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vadd_qf32_mix : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf32 = vadd($Vu32.qf32,$Vv32.sf)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vadd_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf32 = vadd($Vu32.sf,$Vv32.sf)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vadd_sf_hf : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vdd32.sf = vadd($Vu32.hf,$Vv32.hf)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vadd_sf_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.sf = vadd($Vu32.sf,$Vv32.sf)", +tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vaddb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -31440,6 +31613,58 @@ let opNewValue = 0; let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vasrvuhubrndsat : HInst< +(outs HvxVR:$Vd32), +(ins HvxWR:$Vuu32, HvxVR:$Vv32), +"$Vd32.ub = vasr($Vuu32.uh,$Vv32.ub):rnd:sat", +tc_05ca8cfd, TypeCVI_VS>, Enc_de5ea0, Requires<[UseHVXV69]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b00011101000; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vasrvuhubsat : HInst< +(outs HvxVR:$Vd32), +(ins HvxWR:$Vuu32, HvxVR:$Vv32), +"$Vd32.ub = vasr($Vuu32.uh,$Vv32.ub):sat", +tc_05ca8cfd, TypeCVI_VS>, Enc_de5ea0, Requires<[UseHVXV69]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b00011101000; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vasrvwuhrndsat : HInst< +(outs HvxVR:$Vd32), +(ins HvxWR:$Vuu32, HvxVR:$Vv32), +"$Vd32.uh = vasr($Vuu32.w,$Vv32.uh):rnd:sat", +tc_05ca8cfd, TypeCVI_VS>, Enc_de5ea0, Requires<[UseHVXV69]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b00011101000; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vasrvwuhsat : HInst< +(outs HvxVR:$Vd32), +(ins HvxWR:$Vuu32, HvxVR:$Vv32), +"$Vd32.uh = vasr($Vuu32.w,$Vv32.uh):sat", +tc_05ca8cfd, TypeCVI_VS>, Enc_de5ea0, Requires<[UseHVXV69]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b00011101000; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vasrw : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), @@ -31597,6 +31822,33 @@ let opNewValue = 0; let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vassign_fp : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.w = vfmv($Vu32.w)", +tc_5cdf8c84, TypeCVI_VX_LATE>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vassign_tmp : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.tmp = $Vu32", +tc_2120355e, TypeCVI_VX>, Enc_e7581c, Requires<[UseHVXV69]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b0; +let Inst{31-16} = 0b0001111000000001; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let hasHvxTmp = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vassignp : HInst< (outs HvxWR:$Vdd32), (ins HvxWR:$Vuu32), @@ -32000,6 +32252,189 @@ let isCVI = 1; let isRegSequence = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vcombine_tmp : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vdd32.tmp = vcombine($Vu32,$Vv32)", +tc_aa047364, TypeCVI_VX>, Enc_71bb9b, Requires<[UseHVXV69]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b0; +let Inst{31-21} = 0b00011110101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let hasHvxTmp = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vconv_hf_qf16 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.hf = $Vu32.qf16", +tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vconv_hf_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxWR:$Vuu32), +"$Vd32.hf = $Vuu32.qf32", +tc_51d0ecc3, TypeCVI_VS>, Enc_a33d04, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vconv_sf_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.sf = $Vu32.qf32", +tc_51d0ecc3, TypeCVI_VS>, Enc_e7581c, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vcvt_b_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.b = vcvt($Vu32.hf,$Vv32.hf)", +tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vcvt_h_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.h = vcvt($Vu32.hf)", +tc_3c8c15d0, TypeCVI_VX>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vcvt_hf_b : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32), +"$Vdd32.hf = vcvt($Vu32.b)", +tc_0afc8be9, TypeCVI_VX_DV>, Enc_dd766a, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vcvt_hf_h : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.hf = vcvt($Vu32.h)", +tc_3c8c15d0, TypeCVI_VX>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vcvt_hf_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.hf = vcvt($Vu32.sf,$Vv32.sf)", +tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vcvt_hf_ub : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32), +"$Vdd32.hf = vcvt($Vu32.ub)", +tc_0afc8be9, TypeCVI_VX_DV>, Enc_dd766a, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vcvt_hf_uh : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.hf = vcvt($Vu32.uh)", +tc_3c8c15d0, TypeCVI_VX>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vcvt_sf_hf : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32), +"$Vdd32.sf = vcvt($Vu32.hf)", +tc_0afc8be9, TypeCVI_VX_DV>, Enc_dd766a, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vcvt_ub_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.ub = vcvt($Vu32.hf,$Vv32.hf)", +tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vcvt_uh_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.uh = vcvt($Vu32.hf)", +tc_3c8c15d0, TypeCVI_VX>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vd0 : HInst< (outs HvxVR:$Vd32), (ins), @@ -32141,6 +32576,34 @@ let opNewValue = 0; let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vdmpy_sf_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.sf = vdmpy($Vu32.hf,$Vv32.hf)", +tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vdmpy_sf_hf_acc : HInst< +(outs HvxVR:$Vx32), +(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Vx32.sf += vdmpy($Vu32.hf,$Vv32.hf)", +tc_a19b9305, TypeCVI_VX>, Enc_a7341a, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100010; +let hasNewValue = 1; +let opNewValue = 0; +let isAccumulator = 1; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vx32 = $Vx32in"; +} def V6_vdmpybus : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), @@ -32415,7 +32878,7 @@ def V6_vdmpyhsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.w = vdmpy($Vu32.h,$Rt32.h):sat", -tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> { +tc_dcca380f, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -32428,7 +32891,7 @@ def V6_vdmpyhsat_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32.w += vdmpy($Vu32.h,$Rt32.h):sat", -tc_660769f1, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> { +tc_72e2b393, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -32523,7 +32986,7 @@ def V6_vdmpyhsusat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.w = vdmpy($Vu32.h,$Rt32.uh):sat", -tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> { +tc_dcca380f, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001001; @@ -32536,7 +32999,7 @@ def V6_vdmpyhsusat_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, IntRegs:$Rt32), "$Vx32.w += vdmpy($Vu32.h,$Rt32.uh):sat", -tc_660769f1, TypeCVI_VX_DV>, Enc_5138b3, Requires<[UseHVXV60]> { +tc_72e2b393, TypeCVI_VX>, Enc_5138b3, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011001001; @@ -32577,7 +33040,7 @@ def V6_vdmpyhvsat : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.w = vdmpy($Vu32.h,$Vv32.h):sat", -tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> { +tc_73efe966, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b011; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100000; @@ -32831,6 +33294,84 @@ let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } +def V6_vfmax_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.hf = vfmax($Vu32.hf,$Vv32.hf)", +tc_cda936da, TypeCVI_VX_LATE>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vfmax_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.sf = vfmax($Vu32.sf,$Vv32.sf)", +tc_cda936da, TypeCVI_VX_LATE>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vfmin_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.hf = vfmin($Vu32.hf,$Vv32.hf)", +tc_cda936da, TypeCVI_VX_LATE>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vfmin_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.sf = vfmin($Vu32.sf,$Vv32.sf)", +tc_cda936da, TypeCVI_VX_LATE>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vfneg_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.hf = vfneg($Vu32.hf)", +tc_5cdf8c84, TypeCVI_VX_LATE>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vfneg_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32), +"$Vd32.sf = vfneg($Vu32.sf)", +tc_5cdf8c84, TypeCVI_VX_LATE>, Enc_e7581c, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-16} = 0b0001111000000110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vgathermh : HInst< (outs), (ins IntRegs:$Rt32, ModRegs:$Mu2, HvxVR:$Vv32), @@ -32843,7 +33384,6 @@ let opNewValue = 0; let accessSize = HalfWordAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; let mayLoad = 1; let Defs = [VTMP]; let DecoderNamespace = "EXT_mmvec"; @@ -32860,7 +33400,6 @@ let opNewValue = 0; let accessSize = HalfWordAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; let mayLoad = 1; let Defs = [VTMP]; let DecoderNamespace = "EXT_mmvec"; @@ -32877,7 +33416,6 @@ let opNewValue = 0; let accessSize = HalfWordAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; let mayLoad = 1; let Defs = [VTMP]; let DecoderNamespace = "EXT_mmvec"; @@ -32894,7 +33432,6 @@ let opNewValue = 0; let accessSize = HalfWordAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; let mayLoad = 1; let Defs = [VTMP]; let DecoderNamespace = "EXT_mmvec"; @@ -32911,7 +33448,6 @@ let opNewValue = 0; let accessSize = WordAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; let mayLoad = 1; let Defs = [VTMP]; let DecoderNamespace = "EXT_mmvec"; @@ -32928,7 +33464,6 @@ let opNewValue = 0; let accessSize = WordAccess; let isCVLoad = 1; let isCVI = 1; -let hasTmpDst = 1; let mayLoad = 1; let Defs = [VTMP]; let DecoderNamespace = "EXT_mmvec"; @@ -33033,6 +33568,106 @@ let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Qx4 = $Qx4in"; } +def V6_vgthf : HInst< +(outs HvxQR:$Qd4), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Qd4 = vcmp.gt($Vu32.hf,$Vv32.hf)", +tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV68,UseHVXFloatingPoint]> { +let Inst{7-2} = 0b011101; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vgthf_and : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 &= vcmp.gt($Vu32.hf,$Vv32.hf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV68,UseHVXFloatingPoint]> { +let Inst{7-2} = 0b110011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_vgthf_or : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 |= vcmp.gt($Vu32.hf,$Vv32.hf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV68,UseHVXFloatingPoint]> { +let Inst{7-2} = 0b001101; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isAccumulator = 1; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_vgthf_xor : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 ^= vcmp.gt($Vu32.hf,$Vv32.hf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV68,UseHVXFloatingPoint]> { +let Inst{7-2} = 0b111011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_vgtsf : HInst< +(outs HvxQR:$Qd4), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Qd4 = vcmp.gt($Vu32.sf,$Vv32.sf)", +tc_56c4f9fe, TypeCVI_VA>, Enc_95441f, Requires<[UseHVXV68,UseHVXFloatingPoint]> { +let Inst{7-2} = 0b011100; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vgtsf_and : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 &= vcmp.gt($Vu32.sf,$Vv32.sf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV68,UseHVXFloatingPoint]> { +let Inst{7-2} = 0b110010; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_vgtsf_or : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 |= vcmp.gt($Vu32.sf,$Vv32.sf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV68,UseHVXFloatingPoint]> { +let Inst{7-2} = 0b001100; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isAccumulator = 1; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} +def V6_vgtsf_xor : HInst< +(outs HvxQR:$Qx4), +(ins HvxQR:$Qx4in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Qx4 ^= vcmp.gt($Vu32.sf,$Vv32.sf)", +tc_257f6f7c, TypeCVI_VA>, Enc_eaa9f8, Requires<[UseHVXV68,UseHVXFloatingPoint]> { +let Inst{7-2} = 0b111010; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100100; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Qx4 = $Qx4in"; +} def V6_vgtub : HInst< (outs HvxQR:$Qd4), (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -33552,6 +34187,32 @@ let opNewValue = 0; let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vmax_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.hf = vmax($Vu32.hf,$Vv32.hf)", +tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmax_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.sf = vmax($Vu32.sf,$Vv32.sf)", +tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vmaxb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -33677,6 +34338,32 @@ let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vmin_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.hf = vmin($Vu32.hf,$Vv32.hf)", +tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmin_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.sf = vmin($Vu32.sf,$Vv32.sf)", +tc_56c4f9fe, TypeCVI_VA>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vminb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -34110,6 +34797,179 @@ let isCVI = 1; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Vx32 = $Vx32in"; } +def V6_vmpy_hf_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.hf = vmpy($Vu32.hf,$Vv32.hf)", +tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpy_hf_hf_acc : HInst< +(outs HvxVR:$Vx32), +(ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Vx32.hf += vmpy($Vu32.hf,$Vv32.hf)", +tc_a19b9305, TypeCVI_VX>, Enc_a7341a, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100010; +let hasNewValue = 1; +let opNewValue = 0; +let isAccumulator = 1; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vx32 = $Vx32in"; +} +def V6_vmpy_qf16 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf16 = vmpy($Vu32.qf16,$Vv32.qf16)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111111; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpy_qf16_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf16 = vmpy($Vu32.hf,$Vv32.hf)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111111; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpy_qf16_mix_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf16 = vmpy($Vu32.qf16,$Vv32.hf)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111111; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpy_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf32 = vmpy($Vu32.qf32,$Vv32.qf32)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111111; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpy_qf32_hf : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vdd32.qf32 = vmpy($Vu32.hf,$Vv32.hf)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111111; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpy_qf32_mix_hf : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vdd32.qf32 = vmpy($Vu32.qf16,$Vv32.hf)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpy_qf32_qf16 : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vdd32.qf32 = vmpy($Vu32.qf16,$Vv32.qf16)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111111; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpy_qf32_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf32 = vmpy($Vu32.sf,$Vv32.sf)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111111; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpy_sf_hf : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vdd32.sf = vmpy($Vu32.hf,$Vv32.hf)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b010; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vmpy_sf_hf_acc : HInst< +(outs HvxWR:$Vxx32), +(ins HvxWR:$Vxx32in, HvxVR:$Vu32, HvxVR:$Vv32), +"$Vxx32.sf += vmpy($Vu32.hf,$Vv32.hf)", +tc_08a4f1b6, TypeCVI_VX_DV>, Enc_3fc427, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011100010; +let hasNewValue = 1; +let opNewValue = 0; +let isAccumulator = 1; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +let Constraints = "$Vxx32 = $Vxx32in"; +} +def V6_vmpy_sf_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.sf = vmpy($Vu32.sf,$Vv32.sf)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b001; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vmpybus : HInst< (outs HvxWR:$Vdd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), @@ -34397,7 +35257,7 @@ def V6_vmpyhsrs : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:rnd:sat", -tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> { +tc_dcca380f, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001010; @@ -34422,7 +35282,7 @@ def V6_vmpyhss : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, IntRegs:$Rt32), "$Vd32.h = vmpy($Vu32.h,$Rt32.h):<<1:sat", -tc_0b04c6c7, TypeCVI_VX_DV>, Enc_b087ac, Requires<[UseHVXV60]> { +tc_dcca380f, TypeCVI_VX>, Enc_b087ac, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011001010; @@ -34555,7 +35415,7 @@ def V6_vmpyhvsrs : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), "$Vd32.h = vmpy($Vu32.h,$Vv32.h):<<1:rnd:sat", -tc_d8287c14, TypeCVI_VX_DV>, Enc_45364e, Requires<[UseHVXV60]> { +tc_73efe966, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b0; let Inst{31-21} = 0b00011100001; @@ -35332,6 +36192,19 @@ let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vmpyuhvs : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.uh = vmpy($Vu32.uh,$Vv32.uh):>>16", +tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV69]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111110; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vmux : HInst< (outs HvxVR:$Vd32), (ins HvxQR:$Qt4, HvxVR:$Vu32, HvxVR:$Vv32), @@ -36007,7 +36880,7 @@ def V6_vrmpybusv_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32.w += vrmpy($Vu32.ub,$Vv32.b)", -tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> { +tc_37820f4c, TypeCVI_VX>, Enc_a7341a, Requires<[UseHVXV60]> { let Inst{7-5} = 0b010; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -36061,7 +36934,7 @@ def V6_vrmpybv_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32.w += vrmpy($Vu32.b,$Vv32.b)", -tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> { +tc_37820f4c, TypeCVI_VX>, Enc_a7341a, Requires<[UseHVXV60]> { let Inst{7-5} = 0b001; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -36277,7 +37150,7 @@ def V6_vrmpyubv_acc : HInst< (outs HvxVR:$Vx32), (ins HvxVR:$Vx32in, HvxVR:$Vu32, HvxVR:$Vv32), "$Vx32.uw += vrmpy($Vu32.ub,$Vv32.ub)", -tc_08a4f1b6, TypeCVI_VX_DV>, Enc_a7341a, Requires<[UseHVXV60]> { +tc_37820f4c, TypeCVI_VX>, Enc_a7341a, Requires<[UseHVXV60]> { let Inst{7-5} = 0b000; let Inst{13-13} = 0b1; let Inst{31-21} = 0b00011100000; @@ -37412,6 +38285,123 @@ let isPseudo = 1; let isCodeGenOnly = 1; let DecoderNamespace = "EXT_mmvec"; } +def V6_vsub_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf16 = vsub($Vu32.hf,$Vv32.hf)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b110; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vsub_hf_hf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.hf = vsub($Vu32.hf,$Vv32.hf)", +tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b000; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vsub_qf16 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf16 = vsub($Vu32.qf16,$Vv32.qf16)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vsub_qf16_mix : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf16 = vsub($Vu32.qf16,$Vv32.hf)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111011; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vsub_qf32 : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf32 = vsub($Vu32.qf32,$Vv32.qf32)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b011; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vsub_qf32_mix : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf32 = vsub($Vu32.qf32,$Vv32.sf)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vsub_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.qf32 = vsub($Vu32.sf,$Vv32.sf)", +tc_05ca8cfd, TypeCVI_VS>, Enc_45364e, Requires<[UseHVXV68,UseHVXQFloat]> { +let Inst{7-5} = 0b100; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111101; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vsub_sf_hf : HInst< +(outs HvxWR:$Vdd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vdd32.sf = vsub($Vu32.hf,$Vv32.hf)", +tc_d8287c14, TypeCVI_VX_DV>, Enc_71bb9b, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b101; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} +def V6_vsub_sf_sf : HInst< +(outs HvxVR:$Vd32), +(ins HvxVR:$Vu32, HvxVR:$Vv32), +"$Vd32.sf = vsub($Vu32.sf,$Vv32.sf)", +tc_c127de3a, TypeCVI_VX>, Enc_45364e, Requires<[UseHVXV68,UseHVXIEEEFP]> { +let Inst{7-5} = 0b111; +let Inst{13-13} = 0b1; +let Inst{31-21} = 0b00011111100; +let hasNewValue = 1; +let opNewValue = 0; +let isCVI = 1; +let DecoderNamespace = "EXT_mmvec"; +} def V6_vsubb : HInst< (outs HvxVR:$Vd32), (ins HvxVR:$Vu32, HvxVR:$Vv32), @@ -38647,7 +39637,7 @@ def V6_zLd_ai : HInst< (outs), (ins IntRegs:$Rt32, s4_0Imm:$Ii), "z = vmem($Rt32+#$Ii)", -tc_e699ae41, TypeCVI_ZW>, Enc_ff3442, Requires<[UseHVXV66,UseZReg]> { +tc_e699ae41, TypeCVI_ZW>, Enc_ff3442, Requires<[UseHVXV66,UseZReg]>, PostInc_BaseImm { let Inst{7-0} = 0b00000000; let Inst{12-11} = 0b00; let Inst{31-21} = 0b00101100000; @@ -38655,13 +39645,14 @@ let addrMode = BaseImmOffset; let isCVI = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; +let CextOpcode = "V6_zLd"; let DecoderNamespace = "EXT_mmvec"; } def V6_zLd_pi : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, s3_0Imm:$Ii), "z = vmem($Rx32++#$Ii)", -tc_a0dbea28, TypeCVI_ZW>, Enc_6c9ee0, Requires<[UseHVXV66,UseZReg]> { +tc_a0dbea28, TypeCVI_ZW>, Enc_6c9ee0, Requires<[UseHVXV66,UseZReg]>, PostInc_BaseImm { let Inst{7-0} = 0b00000000; let Inst{13-11} = 0b000; let Inst{31-21} = 0b00101101000; @@ -38669,6 +39660,7 @@ let addrMode = PostInc; let isCVI = 1; let mayLoad = 1; let isRestrictNoSlot1Store = 1; +let CextOpcode = "V6_zLd"; let DecoderNamespace = "EXT_mmvec"; let Constraints = "$Rx32 = $Rx32in"; } @@ -38782,6 +39774,17 @@ let Inst{13-0} = 0b00000000000000; let Inst{31-16} = 0b0110110000100000; let isSolo = 1; } +def Y2_crswap_old : HInst< +(outs IntRegs:$Rx32), +(ins IntRegs:$Rx32in), +"crswap($Rx32,sgp)", +PSEUDO, TypeMAPPING> { +let hasNewValue = 1; +let opNewValue = 0; +let isPseudo = 1; +let isCodeGenOnly = 1; +let Constraints = "$Rx32 = $Rx32in"; +} def Y2_dccleana : HInst< (outs), (ins IntRegs:$Rs32), @@ -38861,6 +39864,22 @@ let Inst{13-0} = 0b00000000000010; let Inst{31-16} = 0b0101011111000000; let isSolo = 1; } +def Y2_k1lock_map : HInst< +(outs), +(ins), +"k1lock", +PSEUDO, TypeMAPPING>, Requires<[HasV65]> { +let isPseudo = 1; +let isCodeGenOnly = 1; +} +def Y2_k1unlock_map : HInst< +(outs), +(ins), +"k1unlock", +PSEUDO, TypeMAPPING>, Requires<[HasV65]> { +let isPseudo = 1; +let isCodeGenOnly = 1; +} def Y2_syncht : HInst< (outs), (ins), @@ -39083,7 +40102,7 @@ def dup_A2_add : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, IntRegs:$Rt32), "$Rd32 = add($Rs32,$Rt32)", -tc_388f9897, TypeALU32_3op>, Requires<[HasV68]> { +tc_388f9897, TypeALU32_3op>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -39093,7 +40112,7 @@ def dup_A2_addi : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = add($Rs32,#$Ii)", -tc_388f9897, TypeALU32_ADDI>, Requires<[HasV68]> { +tc_388f9897, TypeALU32_ADDI>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -39108,7 +40127,7 @@ def dup_A2_andir : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = and($Rs32,#$Ii)", -tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -39123,7 +40142,7 @@ def dup_A2_combineii : HInst< (outs DoubleRegs:$Rdd32), (ins s32_0Imm:$Ii, s8_0Imm:$II), "$Rdd32 = combine(#$Ii,#$II)", -tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { let AsmVariantName = "NonParsable"; let isPseudo = 1; let isExtendable = 1; @@ -39136,7 +40155,7 @@ def dup_A2_sxtb : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = sxtb($Rs32)", -tc_9124c04f, TypeALU32_2op>, Requires<[HasV68]> { +tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -39146,7 +40165,7 @@ def dup_A2_sxth : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = sxth($Rs32)", -tc_9124c04f, TypeALU32_2op>, Requires<[HasV68]> { +tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -39156,7 +40175,7 @@ def dup_A2_tfr : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = $Rs32", -tc_9124c04f, TypeALU32_2op>, Requires<[HasV68]> { +tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -39166,7 +40185,7 @@ def dup_A2_tfrsi : HInst< (outs IntRegs:$Rd32), (ins s32_0Imm:$Ii), "$Rd32 = #$Ii", -tc_9124c04f, TypeALU32_2op>, Requires<[HasV68]> { +tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -39181,7 +40200,7 @@ def dup_A2_zxtb : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = zxtb($Rs32)", -PSEUDO, TypeMAPPING>, Requires<[HasV68]> { +PSEUDO, TypeMAPPING>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -39191,7 +40210,7 @@ def dup_A2_zxth : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32), "$Rd32 = zxth($Rs32)", -tc_9124c04f, TypeALU32_2op>, Requires<[HasV68]> { +tc_9124c04f, TypeALU32_2op>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let AsmVariantName = "NonParsable"; @@ -39201,7 +40220,7 @@ def dup_A4_combineii : HInst< (outs DoubleRegs:$Rdd32), (ins s8_0Imm:$Ii, u32_0Imm:$II), "$Rdd32 = combine(#$Ii,#$II)", -tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { let AsmVariantName = "NonParsable"; let isPseudo = 1; let isExtendable = 1; @@ -39214,7 +40233,7 @@ def dup_A4_combineir : HInst< (outs DoubleRegs:$Rdd32), (ins s32_0Imm:$Ii, IntRegs:$Rs32), "$Rdd32 = combine(#$Ii,$Rs32)", -tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { let AsmVariantName = "NonParsable"; let isPseudo = 1; let isExtendable = 1; @@ -39227,7 +40246,7 @@ def dup_A4_combineri : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rdd32 = combine($Rs32,#$Ii)", -tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { let AsmVariantName = "NonParsable"; let isPseudo = 1; let isExtendable = 1; @@ -39240,7 +40259,7 @@ def dup_C2_cmoveif : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii), "if (!$Pu4) $Rd32 = #$Ii", -tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { let isPredicated = 1; let isPredicatedFalse = 1; let hasNewValue = 1; @@ -39257,7 +40276,7 @@ def dup_C2_cmoveit : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii), "if ($Pu4) $Rd32 = #$Ii", -tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { let isPredicated = 1; let hasNewValue = 1; let opNewValue = 0; @@ -39273,7 +40292,7 @@ def dup_C2_cmovenewif : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii), "if (!$Pu4.new) $Rd32 = #$Ii", -tc_4ac61d92, TypeALU32_2op>, Requires<[HasV68]> { +tc_4ac61d92, TypeALU32_2op>, Requires<[HasV69]> { let isPredicated = 1; let isPredicatedFalse = 1; let hasNewValue = 1; @@ -39291,7 +40310,7 @@ def dup_C2_cmovenewit : HInst< (outs IntRegs:$Rd32), (ins PredRegs:$Pu4, s32_0Imm:$Ii), "if ($Pu4.new) $Rd32 = #$Ii", -tc_4ac61d92, TypeALU32_2op>, Requires<[HasV68]> { +tc_4ac61d92, TypeALU32_2op>, Requires<[HasV69]> { let isPredicated = 1; let hasNewValue = 1; let opNewValue = 0; @@ -39308,7 +40327,7 @@ def dup_C2_cmpeqi : HInst< (outs PredRegs:$Pd4), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Pd4 = cmp.eq($Rs32,#$Ii)", -tc_388f9897, TypeALU32_2op>, Requires<[HasV68]> { +tc_388f9897, TypeALU32_2op>, Requires<[HasV69]> { let AsmVariantName = "NonParsable"; let isPseudo = 1; let isExtendable = 1; @@ -39321,7 +40340,7 @@ def dup_L2_deallocframe : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32), "$Rdd32 = deallocframe($Rs32):raw", -tc_aee6250c, TypeLD>, Requires<[HasV68]> { +tc_aee6250c, TypeLD>, Requires<[HasV69]> { let accessSize = DoubleWordAccess; let AsmVariantName = "NonParsable"; let mayLoad = 1; @@ -39333,7 +40352,7 @@ def dup_L2_loadrb_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = memb($Rs32+#$Ii)", -tc_eed07714, TypeLD>, Requires<[HasV68]> { +tc_eed07714, TypeLD>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseImmOffset; @@ -39351,7 +40370,7 @@ def dup_L2_loadrd_io : HInst< (outs DoubleRegs:$Rdd32), (ins IntRegs:$Rs32, s29_3Imm:$Ii), "$Rdd32 = memd($Rs32+#$Ii)", -tc_eed07714, TypeLD>, Requires<[HasV68]> { +tc_eed07714, TypeLD>, Requires<[HasV69]> { let addrMode = BaseImmOffset; let accessSize = DoubleWordAccess; let AsmVariantName = "NonParsable"; @@ -39367,7 +40386,7 @@ def dup_L2_loadrh_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s31_1Imm:$Ii), "$Rd32 = memh($Rs32+#$Ii)", -tc_eed07714, TypeLD>, Requires<[HasV68]> { +tc_eed07714, TypeLD>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseImmOffset; @@ -39385,7 +40404,7 @@ def dup_L2_loadri_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s30_2Imm:$Ii), "$Rd32 = memw($Rs32+#$Ii)", -tc_eed07714, TypeLD>, Requires<[HasV68]> { +tc_eed07714, TypeLD>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseImmOffset; @@ -39403,7 +40422,7 @@ def dup_L2_loadrub_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s32_0Imm:$Ii), "$Rd32 = memub($Rs32+#$Ii)", -tc_eed07714, TypeLD>, Requires<[HasV68]> { +tc_eed07714, TypeLD>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseImmOffset; @@ -39421,7 +40440,7 @@ def dup_L2_loadruh_io : HInst< (outs IntRegs:$Rd32), (ins IntRegs:$Rs32, s31_1Imm:$Ii), "$Rd32 = memuh($Rs32+#$Ii)", -tc_eed07714, TypeLD>, Requires<[HasV68]> { +tc_eed07714, TypeLD>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseImmOffset; @@ -39439,7 +40458,7 @@ def dup_S2_allocframe : HInst< (outs IntRegs:$Rx32), (ins IntRegs:$Rx32in, u11_3Imm:$Ii), "allocframe($Rx32,#$Ii):raw", -tc_74a42bda, TypeST>, Requires<[HasV68]> { +tc_74a42bda, TypeST>, Requires<[HasV69]> { let hasNewValue = 1; let opNewValue = 0; let addrMode = BaseImmOffset; @@ -39455,7 +40474,7 @@ def dup_S2_storerb_io : HInst< (outs), (ins IntRegs:$Rs32, s32_0Imm:$Ii, IntRegs:$Rt32), "memb($Rs32+#$Ii) = $Rt32", -tc_a9edeffa, TypeST>, Requires<[HasV68]> { +tc_a9edeffa, TypeST>, Requires<[HasV69]> { let addrMode = BaseImmOffset; let accessSize = ByteAccess; let AsmVariantName = "NonParsable"; @@ -39471,7 +40490,7 @@ def dup_S2_storerd_io : HInst< (outs), (ins IntRegs:$Rs32, s29_3Imm:$Ii, DoubleRegs:$Rtt32), "memd($Rs32+#$Ii) = $Rtt32", -tc_a9edeffa, TypeST>, Requires<[HasV68]> { +tc_a9edeffa, TypeST>, Requires<[HasV69]> { let addrMode = BaseImmOffset; let accessSize = DoubleWordAccess; let AsmVariantName = "NonParsable"; @@ -39487,7 +40506,7 @@ def dup_S2_storerh_io : HInst< (outs), (ins IntRegs:$Rs32, s31_1Imm:$Ii, IntRegs:$Rt32), "memh($Rs32+#$Ii) = $Rt32", -tc_a9edeffa, TypeST>, Requires<[HasV68]> { +tc_a9edeffa, TypeST>, Requires<[HasV69]> { let addrMode = BaseImmOffset; let accessSize = HalfWordAccess; let AsmVariantName = "NonParsable"; @@ -39503,7 +40522,7 @@ def dup_S2_storeri_io : HInst< (outs), (ins IntRegs:$Rs32, s30_2Imm:$Ii, IntRegs:$Rt32), "memw($Rs32+#$Ii) = $Rt32", -tc_a9edeffa, TypeST>, Requires<[HasV68]> { +tc_a9edeffa, TypeST>, Requires<[HasV69]> { let addrMode = BaseImmOffset; let accessSize = WordAccess; let AsmVariantName = "NonParsable"; @@ -39519,7 +40538,7 @@ def dup_S4_storeirb_io : HInst< (outs), (ins IntRegs:$Rs32, u6_0Imm:$Ii, s32_0Imm:$II), "memb($Rs32+#$Ii) = #$II", -tc_838c4d7a, TypeV4LDST>, Requires<[HasV68]> { +tc_838c4d7a, TypeV4LDST>, Requires<[HasV69]> { let addrMode = BaseImmOffset; let accessSize = ByteAccess; let AsmVariantName = "NonParsable"; @@ -39535,7 +40554,7 @@ def dup_S4_storeiri_io : HInst< (outs), (ins IntRegs:$Rs32, u6_2Imm:$Ii, s32_0Imm:$II), "memw($Rs32+#$Ii) = #$II", -tc_838c4d7a, TypeV4LDST>, Requires<[HasV68]> { +tc_838c4d7a, TypeV4LDST>, Requires<[HasV69]> { let addrMode = BaseImmOffset; let accessSize = WordAccess; let AsmVariantName = "NonParsable"; diff --git a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td index e5c78d122c9e..64bc5091d1d1 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td +++ b/llvm/lib/Target/Hexagon/HexagonDepMapAsm2Intrin.td @@ -1661,8 +1661,6 @@ def: Pat<(int_hexagon_Y2_dccleana IntRegs:$src1), (Y2_dccleana IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_Y2_dccleaninva IntRegs:$src1), (Y2_dccleaninva IntRegs:$src1)>, Requires<[HasV5]>; -def: Pat<(int_hexagon_Y2_dcfetch IntRegs:$src1), - (Y2_dcfetch IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_Y2_dcinva IntRegs:$src1), (Y2_dcinva IntRegs:$src1)>, Requires<[HasV5]>; def: Pat<(int_hexagon_Y2_dczeroa IntRegs:$src1), @@ -3380,3 +3378,294 @@ def: Pat<(int_hexagon_V6_v6mpyvubs10_vxx HvxWR:$src1, HvxWR:$src2, HvxWR:$src3, (V6_v6mpyvubs10_vxx HvxWR:$src1, HvxWR:$src2, HvxWR:$src3, u2_0ImmPred_timm:$src4)>, Requires<[HasV68, UseHVX64B]>; def: Pat<(int_hexagon_V6_v6mpyvubs10_vxx_128B HvxWR:$src1, HvxWR:$src2, HvxWR:$src3, u2_0ImmPred_timm:$src4), (V6_v6mpyvubs10_vxx HvxWR:$src1, HvxWR:$src2, HvxWR:$src3, u2_0ImmPred_timm:$src4)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vabs_hf HvxVR:$src1), + (V6_vabs_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vabs_hf_128B HvxVR:$src1), + (V6_vabs_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vabs_sf HvxVR:$src1), + (V6_vabs_sf HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vabs_sf_128B HvxVR:$src1), + (V6_vabs_sf HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vadd_hf HvxVR:$src1, HvxVR:$src2), + (V6_vadd_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vadd_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_hf_hf HvxVR:$src1, HvxVR:$src2), + (V6_vadd_hf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vadd_hf_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vadd_hf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vadd_qf16 HvxVR:$src1, HvxVR:$src2), + (V6_vadd_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_qf16_128B HvxVR:$src1, HvxVR:$src2), + (V6_vadd_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_qf16_mix HvxVR:$src1, HvxVR:$src2), + (V6_vadd_qf16_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_qf16_mix_128B HvxVR:$src1, HvxVR:$src2), + (V6_vadd_qf16_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_qf32 HvxVR:$src1, HvxVR:$src2), + (V6_vadd_qf32 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_qf32_128B HvxVR:$src1, HvxVR:$src2), + (V6_vadd_qf32 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_qf32_mix HvxVR:$src1, HvxVR:$src2), + (V6_vadd_qf32_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_qf32_mix_128B HvxVR:$src1, HvxVR:$src2), + (V6_vadd_qf32_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_sf HvxVR:$src1, HvxVR:$src2), + (V6_vadd_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_sf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vadd_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vadd_sf_hf HvxVR:$src1, HvxVR:$src2), + (V6_vadd_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vadd_sf_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vadd_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vadd_sf_sf HvxVR:$src1, HvxVR:$src2), + (V6_vadd_sf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vadd_sf_sf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vadd_sf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vassign_fp HvxVR:$src1), + (V6_vassign_fp HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vassign_fp_128B HvxVR:$src1), + (V6_vassign_fp HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vconv_hf_qf16 HvxVR:$src1), + (V6_vconv_hf_qf16 HvxVR:$src1)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_hf_qf16_128B HvxVR:$src1), + (V6_vconv_hf_qf16 HvxVR:$src1)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_hf_qf32 HvxWR:$src1), + (V6_vconv_hf_qf32 HvxWR:$src1)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_hf_qf32_128B HvxWR:$src1), + (V6_vconv_hf_qf32 HvxWR:$src1)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_sf_qf32 HvxVR:$src1), + (V6_vconv_sf_qf32 HvxVR:$src1)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vconv_sf_qf32_128B HvxVR:$src1), + (V6_vconv_sf_qf32 HvxVR:$src1)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vcvt_b_hf HvxVR:$src1, HvxVR:$src2), + (V6_vcvt_b_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vcvt_b_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vcvt_b_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vcvt_h_hf HvxVR:$src1), + (V6_vcvt_h_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vcvt_h_hf_128B HvxVR:$src1), + (V6_vcvt_h_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vcvt_hf_b HvxVR:$src1), + (V6_vcvt_hf_b HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vcvt_hf_b_128B HvxVR:$src1), + (V6_vcvt_hf_b HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vcvt_hf_h HvxVR:$src1), + (V6_vcvt_hf_h HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vcvt_hf_h_128B HvxVR:$src1), + (V6_vcvt_hf_h HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vcvt_hf_sf HvxVR:$src1, HvxVR:$src2), + (V6_vcvt_hf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vcvt_hf_sf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vcvt_hf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vcvt_hf_ub HvxVR:$src1), + (V6_vcvt_hf_ub HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vcvt_hf_ub_128B HvxVR:$src1), + (V6_vcvt_hf_ub HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vcvt_hf_uh HvxVR:$src1), + (V6_vcvt_hf_uh HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vcvt_hf_uh_128B HvxVR:$src1), + (V6_vcvt_hf_uh HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vcvt_sf_hf HvxVR:$src1), + (V6_vcvt_sf_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vcvt_sf_hf_128B HvxVR:$src1), + (V6_vcvt_sf_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vcvt_ub_hf HvxVR:$src1, HvxVR:$src2), + (V6_vcvt_ub_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vcvt_ub_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vcvt_ub_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vcvt_uh_hf HvxVR:$src1), + (V6_vcvt_uh_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vcvt_uh_hf_128B HvxVR:$src1), + (V6_vcvt_uh_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vdmpy_sf_hf HvxVR:$src1, HvxVR:$src2), + (V6_vdmpy_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vdmpy_sf_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vdmpy_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vdmpy_sf_hf_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vdmpy_sf_hf_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vdmpy_sf_hf_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vdmpy_sf_hf_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vfmax_hf HvxVR:$src1, HvxVR:$src2), + (V6_vfmax_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vfmax_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vfmax_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vfmax_sf HvxVR:$src1, HvxVR:$src2), + (V6_vfmax_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vfmax_sf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vfmax_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vfmin_hf HvxVR:$src1, HvxVR:$src2), + (V6_vfmin_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vfmin_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vfmin_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vfmin_sf HvxVR:$src1, HvxVR:$src2), + (V6_vfmin_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vfmin_sf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vfmin_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vfneg_hf HvxVR:$src1), + (V6_vfneg_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vfneg_hf_128B HvxVR:$src1), + (V6_vfneg_hf HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vfneg_sf HvxVR:$src1), + (V6_vfneg_sf HvxVR:$src1)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vfneg_sf_128B HvxVR:$src1), + (V6_vfneg_sf HvxVR:$src1)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vgthf HvxVR:$src1, HvxVR:$src2), + (V6_vgthf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgthf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vgthf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgthf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgthf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgthf_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgthf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgthf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgthf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgthf_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgthf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgthf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgthf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgthf_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgthf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgtsf HvxVR:$src1, HvxVR:$src2), + (V6_vgtsf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgtsf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vgtsf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgtsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgtsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgtsf_and_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgtsf_and HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgtsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgtsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgtsf_or_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgtsf_or HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgtsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgtsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vgtsf_xor_128B HvxQR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vgtsf_xor HvxQR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmax_hf HvxVR:$src1, HvxVR:$src2), + (V6_vmax_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmax_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmax_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmax_sf HvxVR:$src1, HvxVR:$src2), + (V6_vmax_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmax_sf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmax_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmin_hf HvxVR:$src1, HvxVR:$src2), + (V6_vmin_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmin_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmin_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmin_sf HvxVR:$src1, HvxVR:$src2), + (V6_vmin_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmin_sf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmin_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_hf_hf HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_hf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vmpy_hf_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_hf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vmpy_hf_hf_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vmpy_hf_hf_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vmpy_hf_hf_acc_128B HvxVR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vmpy_hf_hf_acc HvxVR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vmpy_qf16 HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf16_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf16_hf HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf16_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf16_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf16_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf16_mix_hf HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf16_mix_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf16_mix_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf16_mix_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf32 HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf32 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf32_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf32 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf32_hf HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf32_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf32_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf32_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf32_mix_hf HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf32_mix_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf32_mix_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf32_mix_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf32_qf16 HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf32_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf32_qf16_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf32_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf32_sf HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf32_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_qf32_sf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_qf32_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vmpy_sf_hf HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vmpy_sf_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vmpy_sf_hf_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vmpy_sf_hf_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vmpy_sf_hf_acc_128B HvxWR:$src1, HvxVR:$src2, HvxVR:$src3), + (V6_vmpy_sf_hf_acc HvxWR:$src1, HvxVR:$src2, HvxVR:$src3)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vmpy_sf_sf HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_sf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vmpy_sf_sf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpy_sf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vsub_hf HvxVR:$src1, HvxVR:$src2), + (V6_vsub_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_hf_hf HvxVR:$src1, HvxVR:$src2), + (V6_vsub_hf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vsub_hf_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_hf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vsub_qf16 HvxVR:$src1, HvxVR:$src2), + (V6_vsub_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_qf16_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_qf16 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_qf16_mix HvxVR:$src1, HvxVR:$src2), + (V6_vsub_qf16_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_qf16_mix_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_qf16_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_qf32 HvxVR:$src1, HvxVR:$src2), + (V6_vsub_qf32 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_qf32_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_qf32 HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_qf32_mix HvxVR:$src1, HvxVR:$src2), + (V6_vsub_qf32_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_qf32_mix_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_qf32_mix HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_sf HvxVR:$src1, HvxVR:$src2), + (V6_vsub_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_sf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B, UseHVXQFloat]>; +def: Pat<(int_hexagon_V6_vsub_sf_hf HvxVR:$src1, HvxVR:$src2), + (V6_vsub_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vsub_sf_hf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_sf_hf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vsub_sf_sf HvxVR:$src1, HvxVR:$src2), + (V6_vsub_sf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vsub_sf_sf_128B HvxVR:$src1, HvxVR:$src2), + (V6_vsub_sf_sf HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV68, UseHVX128B]>; + +// V69 HVX Instructions. + +def: Pat<(int_hexagon_V6_vasrvuhubrndsat HvxWR:$src1, HvxVR:$src2), + (V6_vasrvuhubrndsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vasrvuhubrndsat_128B HvxWR:$src1, HvxVR:$src2), + (V6_vasrvuhubrndsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vasrvuhubsat HvxWR:$src1, HvxVR:$src2), + (V6_vasrvuhubsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vasrvuhubsat_128B HvxWR:$src1, HvxVR:$src2), + (V6_vasrvuhubsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vasrvwuhrndsat HvxWR:$src1, HvxVR:$src2), + (V6_vasrvwuhrndsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vasrvwuhrndsat_128B HvxWR:$src1, HvxVR:$src2), + (V6_vasrvwuhrndsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vasrvwuhsat HvxWR:$src1, HvxVR:$src2), + (V6_vasrvwuhsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vasrvwuhsat_128B HvxWR:$src1, HvxVR:$src2), + (V6_vasrvwuhsat HvxWR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX128B]>; +def: Pat<(int_hexagon_V6_vmpyuhvs HvxVR:$src1, HvxVR:$src2), + (V6_vmpyuhvs HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX64B]>; +def: Pat<(int_hexagon_V6_vmpyuhvs_128B HvxVR:$src1, HvxVR:$src2), + (V6_vmpyuhvs HvxVR:$src1, HvxVR:$src2)>, Requires<[HasV69, UseHVX128B]>; diff --git a/llvm/lib/Target/Hexagon/HexagonDepMappings.td b/llvm/lib/Target/Hexagon/HexagonDepMappings.td index 919cb996ad15..2f7b76b893a9 100644 --- a/llvm/lib/Target/Hexagon/HexagonDepMappings.td +++ b/llvm/lib/Target/Hexagon/HexagonDepMappings.td @@ -174,7 +174,6 @@ def V6_ldcpnt0Alias : InstAlias<"if ($Pv4) $Vd32.cur = vmem($Rt32):nt", (V6_vL32 def V6_ldnp0Alias : InstAlias<"if (!$Pv4) $Vd32 = vmem($Rt32)", (V6_vL32b_npred_pi HvxVR:$Vd32, IntRegs:$Rt32, PredRegs:$Pv4, 0)>, Requires<[UseHVX]>; def V6_ldnpnt0Alias : InstAlias<"if (!$Pv4) $Vd32 = vmem($Rt32):nt", (V6_vL32b_nt_npred_pi HvxVR:$Vd32, IntRegs:$Rt32, PredRegs:$Pv4, 0)>, Requires<[UseHVX]>; def V6_ldnt0Alias : InstAlias<"$Vd32 = vmem($Rt32):nt", (V6_vL32b_nt_ai HvxVR:$Vd32, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; -def V6_ldntnt0Alias : InstAlias<"$Vd32 = vmem($Rt32):nt", (V6_vL32b_nt_ai HvxVR:$Vd32, IntRegs:$Rt32, 0)>; def V6_ldp0Alias : InstAlias<"if ($Pv4) $Vd32 = vmem($Rt32)", (V6_vL32b_pred_ai HvxVR:$Vd32, PredRegs:$Pv4, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; def V6_ldpnt0Alias : InstAlias<"if ($Pv4) $Vd32 = vmem($Rt32):nt", (V6_vL32b_nt_pred_ai HvxVR:$Vd32, PredRegs:$Pv4, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; def V6_ldtnp0Alias : InstAlias<"if (!$Pv4) $Vd32.tmp = vmem($Rt32)", (V6_vL32b_npred_ai HvxVR:$Vd32, PredRegs:$Pv4, IntRegs:$Rt32, 0)>, Requires<[UseHVX]>; diff --git a/llvm/lib/Target/Hexagon/HexagonInstrFormats.td b/llvm/lib/Target/Hexagon/HexagonInstrFormats.td index 45adaf50774f..898ef51bd48f 100644 --- a/llvm/lib/Target/Hexagon/HexagonInstrFormats.td +++ b/llvm/lib/Target/Hexagon/HexagonInstrFormats.td @@ -146,9 +146,6 @@ class InstHexagon pattern, bits<1> isFP = 0; let TSFlags {50} = isFP; // Floating-point. - bits<1> isSomeOK = 0; - let TSFlags {51} = isSomeOK; // Relax some grouping constraints. - bits<1> hasNewValue2 = 0; let TSFlags{52} = hasNewValue2; // Second New-value producer insn. bits<3> opNewValue2 = 0; @@ -160,8 +157,8 @@ class InstHexagon pattern, bits<1> prefersSlot3 = 0; let TSFlags{57} = prefersSlot3; // Complex XU - bits<1> hasTmpDst = 0; - let TSFlags{60} = hasTmpDst; // v65 : 'fake" register VTMP is set + bits<1> hasHvxTmp = 0; + let TSFlags{60} = hasHvxTmp; // vector register vX.tmp false-write bit CVINew = 0; let TSFlags{62} = CVINew; diff --git a/llvm/lib/Target/Hexagon/HexagonPseudo.td b/llvm/lib/Target/Hexagon/HexagonPseudo.td index 11f8af7c41a0..afd63d6d4aa7 100644 --- a/llvm/lib/Target/Hexagon/HexagonPseudo.td +++ b/llvm/lib/Target/Hexagon/HexagonPseudo.td @@ -572,3 +572,14 @@ defm PS_storerd : NewCircularStore; // __builtin_trap. let hasSideEffects = 1, isPseudo = 1, isCodeGenOnly = 1, isSolo = 1 in def PS_crash: InstHexagon<(outs), (ins), "", [], "", PSEUDO, TypePSEUDO>; + +// This is actual trap1 instruction from before v65. It's here since it is +// no longer included in DepInstrInfo.td. +def PS_trap1 : HInst<(outs), (ins u8_0Imm:$Ii), "trap1(#$Ii)", tc_53c851ab, + TypeJ>, Enc_a51a9a, Requires<[HasPreV65]> { + let Inst{1-0} = 0b00; + let Inst{7-5} = 0b000; + let Inst{13-13} = 0b0; + let Inst{31-16} = 0b0101010010000000; +} + diff --git a/llvm/lib/Target/Hexagon/HexagonSchedule.td b/llvm/lib/Target/Hexagon/HexagonSchedule.td index 88d775f16a7f..931578c9e78d 100644 --- a/llvm/lib/Target/Hexagon/HexagonSchedule.td +++ b/llvm/lib/Target/Hexagon/HexagonSchedule.td @@ -69,3 +69,4 @@ include "HexagonScheduleV66.td" include "HexagonScheduleV67.td" include "HexagonScheduleV67T.td" include "HexagonScheduleV68.td" +include "HexagonScheduleV69.td" diff --git a/llvm/lib/Target/Hexagon/HexagonScheduleV69.td b/llvm/lib/Target/Hexagon/HexagonScheduleV69.td new file mode 100644 index 000000000000..ddd246866e20 --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonScheduleV69.td @@ -0,0 +1,40 @@ +//=-HexagonScheduleV69.td - HexagonV69 Scheduling Definitions *- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// +// ScalarItin and HVXItin contain some old itineraries +// still used by a handful of instructions. Hopefully, we will be able +// to get rid of them soon. +def HexagonV69ItinList : DepScalarItinV69, ScalarItin, + DepHVXItinV69, HVXItin, PseudoItin { + list ItinList = + !listconcat(DepScalarItinV69_list, ScalarItin_list, + DepHVXItinV69_list, HVXItin_list, PseudoItin_list); +} + +def HexagonItinerariesV69 : + ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP, + CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1, + CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL, + CVI_ALL_NOMEM, CVI_ZW], + [Hex_FWD, HVX_FWD], + HexagonV69ItinList.ItinList>; + +def HexagonModelV69 : SchedMachineModel { + // Max issue per cycle == bundle width. + let IssueWidth = 4; + let Itineraries = HexagonItinerariesV69; + let LoadLatency = 1; + let CompleteModel = 0; +} + +//===----------------------------------------------------------------------===// +// Hexagon V69 Resource Definitions - +//===----------------------------------------------------------------------===// + diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index ecb2f88d8096..08bb4580b585 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -75,6 +75,10 @@ static cl::opt EnableCheckBankConflict("hexagon-check-bank-conflict", cl::Hidden, cl::ZeroOrMore, cl::init(true), cl::desc("Enable checking for cache bank conflicts")); +static cl::opt EnableV68FloatCodeGen( + "force-hvx-float", cl::Hidden, cl::ZeroOrMore, cl::init(false), + cl::desc("Enable the code-generation for vector float instructions on v68.")); + HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS, const TargetMachine &TM) : HexagonGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), @@ -103,13 +107,71 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { UseAudioOps = false; UseLongCalls = false; - UseBSBScheduling = hasV60Ops() && EnableBSBSched; + SubtargetFeatures Features(FS); - ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FS); + // Turn on QFloat if the HVX version is v68+. + // The function ParseSubtargetFeatures will set feature bits and initialize + // subtarget's variables all in one, so there isn't a good way to preprocess + // the feature string, other than by tinkering with it directly. + auto IsQFloatFS = [](StringRef F) { + return F == "+hvx-qfloat" || F == "-hvx-qfloat"; + }; + if (!llvm::count_if(Features.getFeatures(), IsQFloatFS)) { + auto getHvxVersion = [&Features](StringRef FS) -> StringRef { + for (StringRef F : llvm::reverse(Features.getFeatures())) { + if (F.startswith("+hvxv")) + return F; + } + for (StringRef F : llvm::reverse(Features.getFeatures())) { + if (F == "-hvx") + return StringRef(); + if (F.startswith("+hvx") || F == "-hvx") + return F.take_front(4); // Return "+hvx" or "-hvx". + } + return StringRef(); + }; + + bool AddQFloat = false; + StringRef HvxVer = getHvxVersion(FS); + if (HvxVer.startswith("+hvxv")) { + int Ver = 0; + if (!HvxVer.drop_front(5).consumeInteger(10, Ver) && Ver >= 68) + AddQFloat = true; + } else if (HvxVer == "+hvx") { + if (hasV68Ops()) + AddQFloat = true; + } + + if (AddQFloat) + Features.AddFeature("+hvx-qfloat"); + } + + std::string FeatureString = Features.getString(); + ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FeatureString); + + // Enable float code generation only if the flag(s) are set and + // the feature is enabled. v68 is guarded by additional flags. + bool GreaterThanV68 = false; + if (useHVXV69Ops()) + GreaterThanV68 = true; + + // Support for deprecated qfloat/ieee codegen flags + if (!GreaterThanV68) { + if (EnableV68FloatCodeGen) + UseHVXFloatingPoint = true; + } else { + UseHVXFloatingPoint = true; + } + + if (UseHVXQFloatOps && UseHVXIEEEFPOps && UseHVXFloatingPoint) + LLVM_DEBUG( + dbgs() << "Behavior is undefined for simultaneous qfloat and ieee hvx codegen..."); if (OverrideLongCalls.getPosition()) UseLongCalls = OverrideLongCalls; + UseBSBScheduling = hasV60Ops() && EnableBSBSched; + if (isTinyCore()) { // Tiny core has a single thread, so back-to-back scheduling is enabled by // default. @@ -117,10 +179,10 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { UseBSBScheduling = false; } - FeatureBitset Features = getFeatureBits(); + FeatureBitset FeatureBits = getFeatureBits(); if (HexagonDisableDuplex) - setFeatureBits(Features.reset(Hexagon::FeatureDuplex)); - setFeatureBits(Hexagon_MC::completeHVXFeatures(Features)); + setFeatureBits(FeatureBits.reset(Hexagon::FeatureDuplex)); + setFeatureBits(Hexagon_MC::completeHVXFeatures(FeatureBits)); return *this; } diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h index 8ca6018bd81c..e4f375440be1 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -56,6 +56,10 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { bool UseSmallData = false; bool UseUnsafeMath = false; bool UseZRegOps = false; + bool UseHVXIEEEFPOps = false; + bool UseHVXQFloatOps = false; + bool UseHVXFloatingPoint = false; + bool UseCabac = false; bool HasPreV65 = false; bool HasMemNoShuf = false; @@ -188,6 +192,12 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { bool hasV68OpsOnly() const { return getHexagonArchVersion() == Hexagon::ArchEnum::V68; } + bool hasV69Ops() const { + return getHexagonArchVersion() >= Hexagon::ArchEnum::V69; + } + bool hasV69OpsOnly() const { + return getHexagonArchVersion() == Hexagon::ArchEnum::V69; + } bool useAudioOps() const { return UseAudioOps; } bool useCompound() const { return UseCompound; } @@ -199,10 +209,16 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { bool useSmallData() const { return UseSmallData; } bool useUnsafeMath() const { return UseUnsafeMath; } bool useZRegOps() const { return UseZRegOps; } + bool useCabac() const { return UseCabac; } bool isTinyCore() const { return HexagonProcFamily == TinyCore; } bool isTinyCoreWithDuplex() const { return isTinyCore() && EnableDuplex; } + bool useHVXIEEEFPOps() const { return UseHVXIEEEFPOps && useHVXOps(); } + bool useHVXQFloatOps() const { + return UseHVXQFloatOps && HexagonHVXVersion >= Hexagon::ArchEnum::V68; + } + bool useHVXFloatingPoint() const { return UseHVXFloatingPoint; } bool useHVXOps() const { return HexagonHVXVersion > Hexagon::ArchEnum::NoArch; } @@ -224,6 +240,9 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { bool useHVXV68Ops() const { return HexagonHVXVersion >= Hexagon::ArchEnum::V68; } + bool useHVXV69Ops() const { + return HexagonHVXVersion >= Hexagon::ArchEnum::V69; + } bool useHVX128BOps() const { return useHVXOps() && UseHVX128BOps; } bool useHVX64BOps() const { return useHVXOps() && UseHVX64BOps; } @@ -283,7 +302,11 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo { } ArrayRef getHVXElementTypes() const { - static MVT Types[] = { MVT::i8, MVT::i16, MVT::i32 }; + static MVT Types[] = {MVT::i8, MVT::i16, MVT::i32}; + static MVT TypesV68[] = {MVT::i8, MVT::i16, MVT::i32, MVT::f16, MVT::f32}; + + if (useHVXV68Ops() && useHVXFloatingPoint()) + return makeArrayRef(TypesV68); return makeArrayRef(Types); } diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index 4125566bc58a..c9a1781a4543 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -154,9 +154,8 @@ namespace HexagonII { PrefersSlot3Pos = 57, PrefersSlot3Mask = 0x1, - // v65 - HasTmpDstPos = 60, - HasTmpDstMask = 0x1, + HasHvxTmpPos = 60, + HasHvxTmpMask = 0x1, CVINewPos = 62, CVINewMask = 0x1, diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp index fee1acdbbe8a..96c2965296ca 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp @@ -98,6 +98,10 @@ void HexagonMCChecker::init(MCInst const &MCI) { for (unsigned i = 0; i < MCID.getNumImplicitUses(); ++i) initReg(MCI, MCID.getImplicitUses()[i], PredReg, isTrue); + const bool IgnoreTmpDst = (HexagonMCInstrInfo::hasTmpDst(MCII, MCI) || + HexagonMCInstrInfo::hasHvxTmp(MCII, MCI)) && + STI.getFeatureBits()[Hexagon::ArchV69]; + // Get implicit register definitions. if (const MCPhysReg *ImpDef = MCID.getImplicitDefs()) for (; *ImpDef; ++ImpDef) { @@ -123,7 +127,7 @@ void HexagonMCChecker::init(MCInst const &MCI) { HexagonMCInstrInfo::isPredicateLate(MCII, MCI)) // Include implicit late predicates. LatePreds.insert(R); - else + else if (!IgnoreTmpDst) Defs[R].insert(PredSense(PredReg, isTrue)); } @@ -178,7 +182,7 @@ void HexagonMCChecker::init(MCInst const &MCI) { // vshuff(Vx, Vy, Rx) <- Vx(0) and Vy(1) are both source and // destination registers with this instruction. same for vdeal(Vx,Vy,Rx) Uses.insert(*SRI); - else + else if (!IgnoreTmpDst) Defs[*SRI].insert(PredSense(PredReg, isTrue)); } } @@ -227,9 +231,11 @@ bool HexagonMCChecker::check(bool FullCheck) { bool chkAXOK = checkAXOK(); bool chkCofMax1 = checkCOFMax1(); bool chkHWLoop = checkHWLoop(); + bool chkValidTmpDst = FullCheck ? checkValidTmpDst() : true; bool chkLegalVecRegPair = checkLegalVecRegPair(); bool chk = chkP && chkNV && chkR && chkRRO && chkS && chkSh && chkSl && - chkAXOK && chkCofMax1 && chkHWLoop && chkLegalVecRegPair; + chkAXOK && chkCofMax1 && chkHWLoop && chkValidTmpDst && + chkLegalVecRegPair; return chk; } @@ -676,6 +682,32 @@ bool HexagonMCChecker::checkShuffle() { return MCSDX.check(); } +bool HexagonMCChecker::checkValidTmpDst() { + if (!STI.getFeatureBits()[Hexagon::ArchV69]) { + return true; + } + auto HasTmp = [&](MCInst const &I) { + return HexagonMCInstrInfo::hasTmpDst(MCII, I) || + HexagonMCInstrInfo::hasHvxTmp(MCII, I); + }; + unsigned HasTmpCount = + llvm::count_if(HexagonMCInstrInfo::bundleInstructions(MCII, MCB), HasTmp); + + if (HasTmpCount > 1) { + reportError( + MCB.getLoc(), + "this packet has more than one HVX vtmp/.tmp destination instruction"); + + for (auto const &I : HexagonMCInstrInfo::bundleInstructions(MCII, MCB)) + if (HasTmp(I)) + reportNote(I.getLoc(), + "this is an HVX vtmp/.tmp destination instruction"); + + return false; + } + return true; +} + void HexagonMCChecker::compoundRegisterMap(unsigned &Register) { switch (Register) { default: diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h index 00afdb664ba5..dbd3d8ae45e6 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h @@ -99,6 +99,7 @@ class HexagonMCChecker { bool checkHWLoop(); bool checkCOFMax1(); bool checkLegalVecRegPair(); + bool checkValidTmpDst(); static void compoundRegisterMap(unsigned &); diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp index fa12fe1da448..68ccb20f4f15 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp @@ -939,10 +939,24 @@ bool HexagonMCInstrInfo::prefersSlot3(MCInstrInfo const &MCII, return (F >> HexagonII::PrefersSlot3Pos) & HexagonII::PrefersSlot3Mask; } -/// return true if instruction has hasTmpDst attribute. bool HexagonMCInstrInfo::hasTmpDst(MCInstrInfo const &MCII, MCInst const &MCI) { + switch (MCI.getOpcode()) { + default: + return false; + case Hexagon::V6_vgathermh: + case Hexagon::V6_vgathermhq: + case Hexagon::V6_vgathermhw: + case Hexagon::V6_vgathermhwq: + case Hexagon::V6_vgathermw: + case Hexagon::V6_vgathermwq: + return true; + } + return false; +} + +bool HexagonMCInstrInfo::hasHvxTmp(MCInstrInfo const &MCII, MCInst const &MCI) { const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags; - return (F >> HexagonII::HasTmpDstPos) & HexagonII::HasTmpDstMask; + return (F >> HexagonII::HasHvxTmpPos) & HexagonII::HasHvxTmpMask; } bool HexagonMCInstrInfo::requiresSlot(MCSubtargetInfo const &STI, diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h index 7b3c079880f8..5c56db14798f 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h @@ -41,7 +41,8 @@ class DuplexCandidate { namespace Hexagon { -class PacketIterator { +class PacketIterator : public std::iterator { MCInstrInfo const &MCII; MCInst::const_iterator BundleCurrent; MCInst::const_iterator BundleEnd; @@ -188,6 +189,7 @@ bool hasImmExt(MCInst const &MCI); bool hasNewValue(MCInstrInfo const &MCII, MCInst const &MCI); bool hasNewValue2(MCInstrInfo const &MCII, MCInst const &MCI); bool hasTmpDst(MCInstrInfo const &MCII, MCInst const &MCI); +bool hasHvxTmp(MCInstrInfo const &MCII, MCInst const &MCI); unsigned iClassOfDuplexPair(unsigned Ga, unsigned Gb); int64_t minConstant(MCInst const &MCI, size_t Index); diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp index d832a756cb92..dfdddb50657c 100644 --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -80,6 +80,8 @@ cl::opt MV67T("mv67t", cl::Hidden, cl::desc("Build for Hexagon V67T"), cl::init(false)); cl::opt MV68("mv68", cl::Hidden, cl::desc("Build for Hexagon V68"), cl::init(false)); +cl::opt MV69("mv69", cl::Hidden, cl::desc("Build for Hexagon V69"), + cl::init(false)); cl::opt EnableHVX("mhvx", @@ -91,6 +93,7 @@ cl::opt clEnumValN(Hexagon::ArchEnum::V66, "v66", "Build for HVX v66"), clEnumValN(Hexagon::ArchEnum::V67, "v67", "Build for HVX v67"), clEnumValN(Hexagon::ArchEnum::V68, "v68", "Build for HVX v68"), + clEnumValN(Hexagon::ArchEnum::V69, "v69", "Build for HVX v69"), // Sentinel for no value specified. clEnumValN(Hexagon::ArchEnum::Generic, "", "")), // Sentinel for flag not present. @@ -101,6 +104,11 @@ static cl::opt DisableHVX("mno-hvx", cl::Hidden, cl::desc("Disable Hexagon Vector eXtensions")); +static cl::opt + EnableHvxIeeeFp("mhvx-ieee-fp", cl::Hidden, + cl::desc("Enable HVX IEEE floating point extensions")); +static cl::opt EnableHexagonCabac + ("mcabac", cl::desc("tbd"), cl::init(false)); static StringRef DefaultArch = "hexagonv60"; @@ -123,6 +131,8 @@ static StringRef HexagonGetArchVariant() { return "hexagonv67t"; if (MV68) return "hexagonv68"; + if (MV69) + return "hexagonv69"; return ""; } @@ -371,6 +381,9 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) { case Hexagon::ArchEnum::V68: Result.push_back("+hvxv68"); break; + case Hexagon::ArchEnum::V69: + Result.push_back("+hvxv69"); + break; case Hexagon::ArchEnum::Generic:{ Result.push_back(StringSwitch(CPU) .Case("hexagonv60", "+hvxv60") @@ -379,13 +392,19 @@ std::string selectHexagonFS(StringRef CPU, StringRef FS) { .Case("hexagonv66", "+hvxv66") .Case("hexagonv67", "+hvxv67") .Case("hexagonv67t", "+hvxv67") - .Case("hexagonv68", "+hvxv68")); + .Case("hexagonv68", "+hvxv68") + .Case("hexagonv69", "+hvxv69")); break; } case Hexagon::ArchEnum::NoArch: // Sentinel if -mhvx isn't specified break; } + if (EnableHvxIeeeFp) + Result.push_back("+hvx-ieee-fp"); + if (EnableHexagonCabac) + Result.push_back("+cabac"); + return join(Result.begin(), Result.end(), ","); } } @@ -422,8 +441,8 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) { // turns on hvxvNN, corresponding to the existing ArchVNN. FeatureBitset FB = S; unsigned CpuArch = ArchV5; - for (unsigned F : {ArchV68, ArchV67, ArchV66, ArchV65, ArchV62, ArchV60, - ArchV55, ArchV5}) { + for (unsigned F : {ArchV69, ArchV68, ArchV67, ArchV66, ArchV65, ArchV62, + ArchV60, ArchV55, ArchV5}) { if (!FB.test(F)) continue; CpuArch = F; @@ -438,7 +457,8 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) { } bool HasHvxVer = false; for (unsigned F : {ExtensionHVXV60, ExtensionHVXV62, ExtensionHVXV65, - ExtensionHVXV66, ExtensionHVXV67, ExtensionHVXV68}) { + ExtensionHVXV66, ExtensionHVXV67, ExtensionHVXV68, + ExtensionHVXV69}) { if (!FB.test(F)) continue; HasHvxVer = true; @@ -451,6 +471,9 @@ FeatureBitset Hexagon_MC::completeHVXFeatures(const FeatureBitset &S) { // HasHvxVer is false, and UseHvx is true. switch (CpuArch) { + case ArchV69: + FB.set(ExtensionHVXV69); + LLVM_FALLTHROUGH; case ArchV68: FB.set(ExtensionHVXV68); LLVM_FALLTHROUGH; @@ -538,6 +561,7 @@ unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) { {"hexagonv67", ELF::EF_HEXAGON_MACH_V67}, {"hexagonv67t", ELF::EF_HEXAGON_MACH_V67T}, {"hexagonv68", ELF::EF_HEXAGON_MACH_V68}, + {"hexagonv69", ELF::EF_HEXAGON_MACH_V69}, }; auto F = ElfFlags.find(STI.getCPU()); From 4c8becbeee18a4ef83a6d8005b0e0fe19a5077ed Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 22 Dec 2021 07:46:14 -0800 Subject: [PATCH 160/293] [Hexagon] Add Hexagon v69 builtins to clang --- clang/include/clang/Basic/BuiltinsHexagon.def | 10 +- .../clang/Basic/BuiltinsHexagonDep.def | 147 ++++ clang/lib/Headers/hexagon_protos.h | 11 - clang/lib/Headers/hvx_hexagon_protos.h | 814 +++++++++++++++++- 4 files changed, 958 insertions(+), 24 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsHexagon.def b/clang/include/clang/Basic/BuiltinsHexagon.def index 0001bd556117..0f62c235bb62 100644 --- a/clang/include/clang/Basic/BuiltinsHexagon.def +++ b/clang/include/clang/Basic/BuiltinsHexagon.def @@ -17,8 +17,10 @@ # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) #endif +#pragma push_macro("V69") +#define V69 "v69" #pragma push_macro("V68") -#define V68 "v68" +#define V68 "v68|" V69 #pragma push_macro("V67") #define V67 "v67|" V68 #pragma push_macro("V66") @@ -34,8 +36,10 @@ #pragma push_macro("V5") #define V5 "v5|" V55 +#pragma push_macro("HVXV69") +#define HVXV69 "hvxv69" #pragma push_macro("HVXV68") -#define HVXV68 "hvxv68" +#define HVXV68 "hvxv68|" HVXV69 #pragma push_macro("HVXV67") #define HVXV67 "hvxv67|" HVXV68 #pragma push_macro("HVXV66") @@ -128,6 +132,7 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_vrmpyub_rtt_acc_128B,"V64iV64iV32iLLi","", " #pragma pop_macro("HVXV66") #pragma pop_macro("HVXV67") #pragma pop_macro("HVXV68") +#pragma pop_macro("HVXV69") #pragma pop_macro("V5") #pragma pop_macro("V55") @@ -137,6 +142,7 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_vrmpyub_rtt_acc_128B,"V64iV64iV32iLLi","", " #pragma pop_macro("V66") #pragma pop_macro("V67") #pragma pop_macro("V68") +#pragma pop_macro("V69") #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/include/clang/Basic/BuiltinsHexagonDep.def b/clang/include/clang/Basic/BuiltinsHexagonDep.def index 152c9c4dd8ad..2eb4ca69c7bd 100644 --- a/clang/include/clang/Basic/BuiltinsHexagonDep.def +++ b/clang/include/clang/Basic/BuiltinsHexagonDep.def @@ -1739,3 +1739,150 @@ TARGET_BUILTIN(__builtin_HEXAGON_V6_v6mpyvubs10, "V32iV32iV32iUIi", "", HVXV68) TARGET_BUILTIN(__builtin_HEXAGON_V6_v6mpyvubs10_128B, "V64iV64iV64iUIi", "", HVXV68) TARGET_BUILTIN(__builtin_HEXAGON_V6_v6mpyvubs10_vxx, "V32iV32iV32iV32iUIi", "", HVXV68) TARGET_BUILTIN(__builtin_HEXAGON_V6_v6mpyvubs10_vxx_128B, "V64iV64iV64iV64iUIi", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vabs_hf, "V16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vabs_hf_128B, "V32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vabs_sf, "V16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vabs_sf_128B, "V32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_hf_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_hf_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_qf16, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_qf16_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_qf16_mix, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_qf16_mix_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_qf32, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_qf32_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_qf32_mix, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_qf32_mix_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_sf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_sf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_sf_hf, "V32iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_sf_hf_128B, "V64iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_sf_sf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vadd_sf_sf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vassign_fp, "V16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vassign_fp_128B, "V32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_hf_qf16, "V16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_hf_qf16_128B, "V32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_hf_qf32, "V16iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_hf_qf32_128B, "V32iV64i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_sf_qf32, "V16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vconv_sf_qf32_128B, "V32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_b_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_b_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_h_hf, "V16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_h_hf_128B, "V32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_hf_b, "V32iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_hf_b_128B, "V64iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_hf_h, "V16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_hf_h_128B, "V32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_hf_sf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_hf_sf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_hf_ub, "V32iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_hf_ub_128B, "V64iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_hf_uh, "V16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_hf_uh_128B, "V32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_sf_hf, "V32iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_sf_hf_128B, "V64iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_ub_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_ub_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_uh_hf, "V16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vcvt_uh_hf_128B, "V32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vdmpy_sf_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vdmpy_sf_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vdmpy_sf_hf_acc, "V16iV16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vdmpy_sf_hf_acc_128B, "V32iV32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfmax_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfmax_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfmax_sf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfmax_sf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfmin_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfmin_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfmin_sf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfmin_sf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfneg_hf, "V16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfneg_hf_128B, "V32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfneg_sf, "V16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vfneg_sf_128B, "V32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgthf, "V64bV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgthf_128B, "V128bV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgthf_and, "V64bV64bV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgthf_and_128B, "V128bV128bV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgthf_or, "V64bV64bV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgthf_or_128B, "V128bV128bV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgthf_xor, "V64bV64bV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgthf_xor_128B, "V128bV128bV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtsf, "V64bV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtsf_128B, "V128bV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtsf_and, "V64bV64bV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtsf_and_128B, "V128bV128bV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtsf_or, "V64bV64bV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtsf_or_128B, "V128bV128bV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtsf_xor, "V64bV64bV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vgtsf_xor_128B, "V128bV128bV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmax_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmax_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmax_sf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmax_sf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmin_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmin_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmin_sf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmin_sf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_hf_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_hf_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_hf_hf_acc, "V16iV16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_hf_hf_acc_128B, "V32iV32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf16, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf16_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf16_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf16_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf16_mix_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf16_mix_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf32, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf32_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf32_hf, "V32iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf32_hf_128B, "V64iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf32_mix_hf, "V32iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf32_mix_hf_128B, "V64iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf32_qf16, "V32iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf32_qf16_128B, "V64iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf32_sf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_qf32_sf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_sf_hf, "V32iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_sf_hf_128B, "V64iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_sf_hf_acc, "V32iV32iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_sf_hf_acc_128B, "V64iV64iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_sf_sf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpy_sf_sf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_hf_hf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_hf_hf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_qf16, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_qf16_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_qf16_mix, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_qf16_mix_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_qf32, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_qf32_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_qf32_mix, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_qf32_mix_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_sf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_sf_128B, "V32iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_sf_hf, "V32iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_sf_hf_128B, "V64iV32iV32i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_sf_sf, "V16iV16iV16i", "", HVXV68) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vsub_sf_sf_128B, "V32iV32iV32i", "", HVXV68) + +// V69 HVX Instructions. + +TARGET_BUILTIN(__builtin_HEXAGON_V6_vasrvuhubrndsat, "V16iV32iV16i", "", HVXV69) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vasrvuhubrndsat_128B, "V32iV64iV32i", "", HVXV69) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vasrvuhubsat, "V16iV32iV16i", "", HVXV69) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vasrvuhubsat_128B, "V32iV64iV32i", "", HVXV69) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vasrvwuhrndsat, "V16iV32iV16i", "", HVXV69) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vasrvwuhrndsat_128B, "V32iV64iV32i", "", HVXV69) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vasrvwuhsat, "V16iV32iV16i", "", HVXV69) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vasrvwuhsat_128B, "V32iV64iV32i", "", HVXV69) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpyuhvs, "V16iV16iV16i", "", HVXV69) +TARGET_BUILTIN(__builtin_HEXAGON_V6_vmpyuhvs_128B, "V32iV32iV32i", "", HVXV69) diff --git a/clang/lib/Headers/hexagon_protos.h b/clang/lib/Headers/hexagon_protos.h index cdffd93bb859..2642f3c8428d 100644 --- a/clang/lib/Headers/hexagon_protos.h +++ b/clang/lib/Headers/hexagon_protos.h @@ -8003,17 +8003,6 @@ #define Q6_P_vtrunohb_PP __builtin_HEXAGON_S6_vtrunohb_ppp #endif /* __HEXAGON_ARCH___ >= 62 */ -#if __HEXAGON_ARCH__ >= 62 -/* ========================================================================== - Assembly Syntax: Vd32=vmem(Rt32):nt - C Intrinsic Prototype: HVX_Vector Q6_V_vmem_R_nt(Word32 Rt) - Instruction Type: MAPPING - Execution Slots: SLOT0123 - ========================================================================== */ - -#define Q6_V_vmem_R_nt __builtin_HEXAGON_V6_ldntnt0 -#endif /* __HEXAGON_ARCH___ >= 62 */ - #if __HEXAGON_ARCH__ >= 65 /* ========================================================================== Assembly Syntax: Pd4=!any8(vcmpb.eq(Rss32,Rtt32)) diff --git a/clang/lib/Headers/hvx_hexagon_protos.h b/clang/lib/Headers/hvx_hexagon_protos.h index dab19949b889..7e3679a38b2c 100644 --- a/clang/lib/Headers/hvx_hexagon_protos.h +++ b/clang/lib/Headers/hvx_hexagon_protos.h @@ -1112,7 +1112,7 @@ /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vu32.h,Rt32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhRh_sat(HVX_Vector Vu, Word32 Rt) - Instruction Type: CVI_VX_DV + Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ @@ -1123,7 +1123,7 @@ /* ========================================================================== Assembly Syntax: Vx32.w+=vdmpy(Vu32.h,Rt32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhRh_sat(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) - Instruction Type: CVI_VX_DV + Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ @@ -1156,7 +1156,7 @@ /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vu32.h,Rt32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhRuh_sat(HVX_Vector Vu, Word32 Rt) - Instruction Type: CVI_VX_DV + Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ @@ -1167,7 +1167,7 @@ /* ========================================================================== Assembly Syntax: Vx32.w+=vdmpy(Vu32.h,Rt32.uh):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpyacc_VwVhRuh_sat(HVX_Vector Vx, HVX_Vector Vu, Word32 Rt) - Instruction Type: CVI_VX_DV + Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ @@ -1178,7 +1178,7 @@ /* ========================================================================== Assembly Syntax: Vd32.w=vdmpy(Vu32.h,Vv32.h):sat C Intrinsic Prototype: HVX_Vector Q6_Vw_vdmpy_VhVh_sat(HVX_Vector Vu, HVX_Vector Vv) - Instruction Type: CVI_VX_DV + Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ @@ -1992,7 +1992,7 @@ /* ========================================================================== Assembly Syntax: Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhRh_s1_rnd_sat(HVX_Vector Vu, Word32 Rt) - Instruction Type: CVI_VX_DV + Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ @@ -2003,7 +2003,7 @@ /* ========================================================================== Assembly Syntax: Vd32.h=vmpy(Vu32.h,Rt32.h):<<1:sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhRh_s1_sat(HVX_Vector Vu, Word32 Rt) - Instruction Type: CVI_VX_DV + Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ @@ -2058,7 +2058,7 @@ /* ========================================================================== Assembly Syntax: Vd32.h=vmpy(Vu32.h,Vv32.h):<<1:rnd:sat C Intrinsic Prototype: HVX_Vector Q6_Vh_vmpy_VhVh_s1_rnd_sat(HVX_Vector Vu, HVX_Vector Vv) - Instruction Type: CVI_VX_DV + Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ @@ -2597,7 +2597,7 @@ /* ========================================================================== Assembly Syntax: Vx32.w+=vrmpy(Vu32.ub,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpyacc_VwVubVb(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) - Instruction Type: CVI_VX_DV + Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ @@ -2619,7 +2619,7 @@ /* ========================================================================== Assembly Syntax: Vx32.w+=vrmpy(Vu32.b,Vv32.b) C Intrinsic Prototype: HVX_Vector Q6_Vw_vrmpyacc_VwVbVb(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) - Instruction Type: CVI_VX_DV + Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ @@ -2685,7 +2685,7 @@ /* ========================================================================== Assembly Syntax: Vx32.uw+=vrmpy(Vu32.ub,Vv32.ub) C Intrinsic Prototype: HVX_Vector Q6_Vuw_vrmpyacc_VuwVubVub(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) - Instruction Type: CVI_VX_DV + Instruction Type: CVI_VX Execution Slots: SLOT23 ========================================================================== */ @@ -4386,6 +4386,798 @@ #define Q6_Ww_v6mpyacc_WwWubWbI_v(Vxx,Vuu,Vvv,Iu2) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_v6mpyvubs10_vxx)(Vxx,Vuu,Vvv,Iu2) #endif /* __HEXAGON_ARCH___ >= 68 */ +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vabs(Vu32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vabs_Vhf(HVX_Vector Vu) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vhf_vabs_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_hf)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.sf=vabs(Vu32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vsf_vabs_Vsf(HVX_Vector Vu) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vsf_vabs_Vsf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_sf)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vadd(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vadd_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf16_vadd_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vadd(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vadd_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vhf_vadd_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_hf_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vadd(Vu32.qf16,Vv32.qf16) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vadd_Vqf16Vqf16(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf16_vadd_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf16)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vadd(Vu32.qf16,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vadd_Vqf16Vhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf16_vadd_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf16_mix)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=vadd(Vu32.qf32,Vv32.qf32) + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vadd_Vqf32Vqf32(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf32_vadd_Vqf32Vqf32(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf32)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=vadd(Vu32.qf32,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vadd_Vqf32Vsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf32_vadd_Vqf32Vsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_qf32_mix)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=vadd(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vadd_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf32_vadd_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vdd32.sf=vadd(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vadd_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wsf_vadd_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.sf=vadd(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vsf_vadd_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vsf_vadd_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf_sf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.w=vfmv(Vu32.w) + C Intrinsic Prototype: HVX_Vector Q6_Vw_vfmv_Vw(HVX_Vector Vu) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vw_vfmv_Vw(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vassign_fp)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=Vu32.qf16 + C Intrinsic Prototype: HVX_Vector Q6_Vhf_equals_Vqf16(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vhf_equals_Vqf16(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_hf_qf16)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=Vuu32.qf32 + C Intrinsic Prototype: HVX_Vector Q6_Vhf_equals_Wqf32(HVX_VectorPair Vuu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vhf_equals_Wqf32(Vuu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_hf_qf32)(Vuu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.sf=Vu32.qf32 + C Intrinsic Prototype: HVX_Vector Q6_Vsf_equals_Vqf32(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vsf_equals_Vqf32(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_sf_qf32)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.b=vcvt(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vb_vcvt_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vb_vcvt_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_b_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.h=vcvt(Vu32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vh_vcvt_Vhf(HVX_Vector Vu) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vh_vcvt_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_h_hf)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vdd32.hf=vcvt(Vu32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt_Vb(HVX_Vector Vu) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Whf_vcvt_Vb(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_b)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vcvt(Vu32.h) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vcvt_Vh(HVX_Vector Vu) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vhf_vcvt_Vh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_h)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vcvt(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vcvt_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vhf_vcvt_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_sf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vdd32.hf=vcvt(Vu32.ub) + C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt_Vub(HVX_Vector Vu) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Whf_vcvt_Vub(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_ub)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vcvt(Vu32.uh) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vcvt_Vuh(HVX_Vector Vu) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vhf_vcvt_Vuh(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_uh)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vdd32.sf=vcvt(Vu32.hf) + C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vcvt_Vhf(HVX_Vector Vu) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wsf_vcvt_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_sf_hf)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vcvt(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vub_vcvt_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vub_vcvt_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_ub_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vcvt(Vu32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vcvt_Vhf(HVX_Vector Vu) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vuh_vcvt_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_uh_hf)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.sf=vdmpy(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vsf_vdmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vsf_vdmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpy_sf_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vx32.sf+=vdmpy(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vsf_vdmpyacc_VsfVhfVhf(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vsf_vdmpyacc_VsfVhfVhf(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vdmpy_sf_hf_acc)(Vx,Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vfmax(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vfmax_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vhf_vfmax_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmax_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.sf=vfmax(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vsf_vfmax_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vsf_vfmax_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmax_sf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vfmin(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vfmin_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vhf_vfmin_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmin_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.sf=vfmin(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vsf_vfmin_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vsf_vfmin_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmin_sf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vfneg(Vu32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vfneg_Vhf(HVX_Vector Vu) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vhf_vfneg_Vhf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfneg_hf)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.sf=vfneg(Vu32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vsf_vfneg_Vsf(HVX_Vector Vu) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vsf_vfneg_Vsf(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfneg_sf)(Vu) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Qd4=vcmp.gt(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gt_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf)(Vu,Vv)),-1) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Qx4&=vcmp.gt(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVhfVhf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtand_QVhfVhf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Qx4|=vcmp.gt(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVhfVhf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtor_QVhfVhf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Qx4^=vcmp.gt(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVhfVhf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtxacc_QVhfVhf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgthf_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Qd4=vcmp.gt(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gt_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf)(Vu,Vv)),-1) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Qx4&=vcmp.gt(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVsfVsf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtand_QVsfVsf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf_and)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Qx4|=vcmp.gt(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVsfVsf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtor_QVsfVsf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf_or)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Qx4^=vcmp.gt(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVsfVsf(HVX_VectorPred Qx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtxacc_QVsfVsf(Qx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt)((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtsf_xor)(__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx),-1),Vu,Vv)),-1) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vmax(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmax_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vhf_vmax_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmax_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.sf=vmax(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vsf_vmax_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vsf_vmax_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmax_sf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vmin(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmin_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vhf_vmin_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmin_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.sf=vmin(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vsf_vmin_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VA + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vsf_vmin_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmin_sf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vmpy(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vhf_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_hf_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vx32.hf+=vmpy(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vmpyacc_VhfVhfVhf(HVX_Vector Vx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vhf_vmpyacc_VhfVhfVhf(Vx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_hf_hf_acc)(Vx,Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vmpy(Vu32.qf16,Vv32.qf16) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vmpy_Vqf16Vqf16(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vqf16_vmpy_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf16)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vmpy(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vqf16_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf16_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vmpy(Vu32.qf16,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vmpy_Vqf16Vhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vqf16_vmpy_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf16_mix_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=vmpy(Vu32.qf32,Vv32.qf32) + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vmpy_Vqf32Vqf32(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vqf32_vmpy_Vqf32Vqf32(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vdd32.qf32=vmpy(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPair Q6_Wqf32_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wqf32_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vdd32.qf32=vmpy(Vu32.qf16,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPair Q6_Wqf32_vmpy_Vqf16Vhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wqf32_vmpy_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_mix_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vdd32.qf32=vmpy(Vu32.qf16,Vv32.qf16) + C Intrinsic Prototype: HVX_VectorPair Q6_Wqf32_vmpy_Vqf16Vqf16(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wqf32_vmpy_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_qf16)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=vmpy(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vmpy_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vqf32_vmpy_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_qf32_sf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vdd32.sf=vmpy(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vmpy_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wsf_vmpy_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vxx32.sf+=vmpy(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vmpyacc_WsfVhfVhf(HVX_VectorPair Vxx, HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wsf_vmpyacc_WsfVhfVhf(Vxx,Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_hf_acc)(Vxx,Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.sf=vmpy(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vsf_vmpy_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vsf_vmpy_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_sf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vsub(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vsub_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf16_vsub_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.hf=vsub(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vhf_vsub_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vhf_vsub_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_hf_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vsub(Vu32.qf16,Vv32.qf16) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vsub_Vqf16Vqf16(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf16_vsub_Vqf16Vqf16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf16)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vsub(Vu32.qf16,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vsub_Vqf16Vhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf16_vsub_Vqf16Vhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf16_mix)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=vsub(Vu32.qf32,Vv32.qf32) + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vsub_Vqf32Vqf32(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf32_vsub_Vqf32Vqf32(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf32)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=vsub(Vu32.qf32,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vsub_Vqf32Vsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf32_vsub_Vqf32Vsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_qf32_mix)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=vsub(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vsub_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vqf32_vsub_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vdd32.sf=vsub(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vsub_VhfVhf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wsf_vsub_VhfVhf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf_hf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 68 +/* ========================================================================== + Assembly Syntax: Vd32.sf=vsub(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vsf_vsub_VsfVsf(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vsf_vsub_VsfVsf(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf_sf)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 68 */ + +#if __HVX_ARCH__ >= 69 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vasr(Vuu32.uh,Vv32.ub):rnd:sat + C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_WuhVub_rnd_sat(HVX_VectorPair Vuu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vub_vasr_WuhVub_rnd_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvuhubrndsat)(Vuu,Vv) +#endif /* __HEXAGON_ARCH___ >= 69 */ + +#if __HVX_ARCH__ >= 69 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vasr(Vuu32.uh,Vv32.ub):sat + C Intrinsic Prototype: HVX_Vector Q6_Vub_vasr_WuhVub_sat(HVX_VectorPair Vuu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vub_vasr_WuhVub_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvuhubsat)(Vuu,Vv) +#endif /* __HEXAGON_ARCH___ >= 69 */ + +#if __HVX_ARCH__ >= 69 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vasr(Vuu32.w,Vv32.uh):rnd:sat + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_WwVuh_rnd_sat(HVX_VectorPair Vuu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuh_vasr_WwVuh_rnd_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvwuhrndsat)(Vuu,Vv) +#endif /* __HEXAGON_ARCH___ >= 69 */ + +#if __HVX_ARCH__ >= 69 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vasr(Vuu32.w,Vv32.uh):sat + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vasr_WwVuh_sat(HVX_VectorPair Vuu, HVX_Vector Vv) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vuh_vasr_WwVuh_sat(Vuu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vasrvwuhsat)(Vuu,Vv) +#endif /* __HEXAGON_ARCH___ >= 69 */ + +#if __HVX_ARCH__ >= 69 +/* ========================================================================== + Assembly Syntax: Vd32.uh=vmpy(Vu32.uh,Vv32.uh):>>16 + C Intrinsic Prototype: HVX_Vector Q6_Vuh_vmpy_VuhVuh_rs16(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vuh_vmpy_VuhVuh_rs16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhvs)(Vu,Vv) +#endif /* __HEXAGON_ARCH___ >= 69 */ + #endif /* __HVX__ */ #endif From d45a2479989985937b016a4d66ffa1ed6c885613 Mon Sep 17 00:00:00 2001 From: Brendon Cahoon Date: Sat, 18 Dec 2021 15:01:50 -0600 Subject: [PATCH 161/293] [AMDGPU] Don't remove VGPR to AGPR dead spills from frame info Removing dead frame indices for VGPR to AGPR spills is incorrect when the frame index is shared by multiple objects, which may occur due to stack slot coloring. The problem is that subsequent code that processes the other object will assert because the stack frame index is marked dead. Removing dead frame indices is needed prior to stack slot coloring, which is what happens with SGPR to VGPR spills. These spills are lowered prior to stack slot coloring, but the VGPR to AGPR spills are processed afterwards during the Prolog/Epilog Inserter pass. This patch marks the VGPR to AGPR spill slot as dead if the slot is not used by another object. Differential Revision: https://reviews.llvm.org/D115996 --- llvm/lib/Target/AMDGPU/SIFrameLowering.cpp | 12 ++- .../Target/AMDGPU/SIMachineFunctionInfo.cpp | 2 +- .../lib/Target/AMDGPU/SIMachineFunctionInfo.h | 7 ++ .../CodeGen/AMDGPU/same-slot-agpr-sgpr.mir | 88 +++++++++++++++++++ 4 files changed, 107 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp index 4706c74be721..d4fe74ecb96e 100644 --- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -1167,11 +1167,13 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( if (SpillVGPRToAGPR) { // To track the spill frame indices handled in this pass. BitVector SpillFIs(MFI.getObjectIndexEnd(), false); + BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false); bool SeenDbgInstr = false; for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { + int FrameIndex; if (MI.isDebugInstr()) SeenDbgInstr = true; @@ -1191,10 +1193,18 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized( SpillFIs.set(FI); continue; } - } + } else if (TII->isStoreToStackSlot(MI, FrameIndex) || + TII->isLoadFromStackSlot(MI, FrameIndex)) + NonVGPRSpillFIs.set(FrameIndex); } } + // Stack slot coloring may assign different objets to the same stack slot. + // If not, then the VGPR to AGPR spill slot is dead. + for (unsigned FI : SpillFIs.set_bits()) + if (!NonVGPRSpillFIs.test(FI)) + FuncInfo->setVGPRToAGPRSpillDead(FI); + for (MachineBasicBlock &MBB : MF) { for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs()) MBB.addLiveIn(Reg); diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 6f92ac6dac45..3ce368ef4db9 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -444,7 +444,7 @@ void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) { MFI.setStackID(i, TargetStackID::Default); for (auto &R : VGPRToAGPRSpills) { - if (R.second.FullyAllocated) + if (R.second.IsDead) MFI.RemoveStackObject(R.first); } } diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h index c305bc20e40d..8accbf611c5f 100644 --- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -465,6 +465,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { struct VGPRSpillToAGPR { SmallVector Lanes; bool FullyAllocated = false; + bool IsDead = false; }; // Map WWM VGPR to a stack slot that is used to save/restore it in the @@ -546,6 +547,12 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { : I->second.Lanes[Lane]; } + void setVGPRToAGPRSpillDead(int FrameIndex) { + auto I = VGPRToAGPRSpills.find(FrameIndex); + if (I != VGPRToAGPRSpills.end()) + I->second.IsDead = true; + } + bool haveFreeLanesForSGPRSpill(const MachineFunction &MF, unsigned NumLane) const; bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI); diff --git a/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir b/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir new file mode 100644 index 000000000000..9cbdafe7f97c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir @@ -0,0 +1,88 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs -run-pass=prologepilog -o - %s | FileCheck %s + +--- +name: same_slot_agpr_sgpr +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4 } +machineFunctionInfo: + hasSpilledVGPRs: true + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; CHECK-LABEL: name: same_slot_agpr_sgpr + ; CHECK: liveins: $agpr0, $agpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF + ; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; CHECK-NEXT: $sgpr4_sgpr5 = IMPLICIT_DEF + ; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec + ; CHECK-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0, implicit $sgpr4_sgpr5 + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit killed $sgpr4_sgpr5 + ; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5) + ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr0 + ; CHECK-NEXT: S_ENDPGM 0 + $vgpr0 = IMPLICIT_DEF + SI_SPILL_AV32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + $sgpr4_sgpr5 = IMPLICIT_DEF + SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) + S_ENDPGM 0 +... +--- +name: diff_slot_agpr_sgpr +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 } + - { id: 1, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4 } +machineFunctionInfo: + hasSpilledVGPRs: true + scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3 + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; CHECK-LABEL: name: diff_slot_agpr_sgpr + ; CHECK: liveins: $agpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF + ; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; CHECK-NEXT: $sgpr4_sgpr5 = IMPLICIT_DEF + ; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec + ; CHECK-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5) + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, undef $vgpr0, implicit $sgpr4_sgpr5 + ; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit killed $sgpr4_sgpr5 + ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5) + ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5) + ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr0 + ; CHECK-NEXT: S_ENDPGM 0 + $vgpr0 = IMPLICIT_DEF + SI_SPILL_AV32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + $sgpr4_sgpr5 = IMPLICIT_DEF + SI_SPILL_S64_SAVE killed renamable $sgpr4_sgpr5, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) + S_ENDPGM 0 +... +--- +name: dead_vgpr_slot +tracksRegLiveness: true +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4 } +machineFunctionInfo: + hasSpilledVGPRs: true + stackPtrOffsetReg: $sgpr32 +body: | + bb.0: + ; CHECK-LABEL: name: dead_vgpr_slot + ; CHECK: liveins: $agpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $vgpr0 = IMPLICIT_DEF + ; CHECK-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec + ; CHECK-NEXT: S_ENDPGM 0 + $vgpr0 = IMPLICIT_DEF + SI_SPILL_AV32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5) + S_ENDPGM 0 +... From 4bf31659fac73ed6ebeb4b4d95dc41aac5dcd666 Mon Sep 17 00:00:00 2001 From: Kirill Stoimenov Date: Thu, 23 Dec 2021 17:11:25 +0000 Subject: [PATCH 162/293] Revert "[ASan] Moved optimized callbacks into a separate library." This reverts commit ab3640aa0e8361921a5d0cdc393a5b75e78ec22b. Reviewed By: kstoimenov Differential Revision: https://reviews.llvm.org/D116223 --- clang/lib/Driver/ToolChains/CommonArgs.cpp | 5 ---- clang/test/Driver/sanitizer-ld.c | 2 +- compiler-rt/lib/asan/CMakeLists.txt | 29 +--------------------- compiler-rt/lib/asan/tests/CMakeLists.txt | 2 -- 4 files changed, 2 insertions(+), 36 deletions(-) diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 267625c70b25..407f81a2ae09 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -826,11 +826,6 @@ collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args, if (SanArgs.needsStatsRt() && SanArgs.linkRuntimes()) StaticRuntimes.push_back("stats_client"); - // Always link the static runtime regardless of DSO or executable. - if (SanArgs.needsAsanRt()) { - HelperStaticRuntimes.push_back("asan_static"); - } - // Collect static runtimes. if (Args.hasArg(options::OPT_shared)) { // Don't link static runtimes into DSOs. diff --git a/clang/test/Driver/sanitizer-ld.c b/clang/test/Driver/sanitizer-ld.c index ea8c49f2384a..d62e19fd4021 100644 --- a/clang/test/Driver/sanitizer-ld.c +++ b/clang/test/Driver/sanitizer-ld.c @@ -22,7 +22,7 @@ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-ASAN-NO-LINK-RUNTIME-LINUX %s // -// CHECK-ASAN-NO-LINK-RUNTIME-LINUX-NOT: libclang_rt.asan-x86_64 +// CHECK-ASAN-NO-LINK-RUNTIME-LINUX-NOT: libclang_rt.asan // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ // RUN: -target i386-unknown-linux -fuse-ld=ld -fsanitize=address -shared-libsan \ diff --git a/compiler-rt/lib/asan/CMakeLists.txt b/compiler-rt/lib/asan/CMakeLists.txt index 2ca59c94cc75..2e63c8d05168 100644 --- a/compiler-rt/lib/asan/CMakeLists.txt +++ b/compiler-rt/lib/asan/CMakeLists.txt @@ -34,6 +34,7 @@ set(ASAN_SOURCES if (NOT WIN32 AND NOT APPLE) list(APPEND ASAN_SOURCES + asan_rtl_x86_64.S asan_interceptors_vfork.S ) endif() @@ -42,12 +43,6 @@ set(ASAN_CXX_SOURCES asan_new_delete.cpp ) -if (NOT WIN32 AND NOT APPLE) - set(ASAN_STATIC_SOURCES - asan_rtl_x86_64.S - ) -endif() - set(ASAN_PREINIT_SOURCES asan_preinit.cpp ) @@ -140,12 +135,6 @@ if(NOT APPLE) ADDITIONAL_HEADERS ${ASAN_HEADERS} CFLAGS ${ASAN_CFLAGS} DEFS ${ASAN_COMMON_DEFINITIONS}) - add_compiler_rt_object_libraries(RTAsan_static - ARCHS ${ASAN_SUPPORTED_ARCH} - SOURCES ${ASAN_STATIC_SOURCES} - ADDITIONAL_HEADERS ${ASAN_HEADERS} - CFLAGS ${ASAN_CFLAGS} - DEFS ${ASAN_COMMON_DEFINITIONS}) add_compiler_rt_object_libraries(RTAsan_preinit ARCHS ${ASAN_SUPPORTED_ARCH} SOURCES ${ASAN_PREINIT_SOURCES} @@ -187,14 +176,6 @@ if(APPLE) LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS} DEFS ${ASAN_DYNAMIC_DEFINITIONS} PARENT_TARGET asan) - - add_compiler_rt_runtime(clang_rt.asan_static - STATIC - ARCHS ${ASAN_SUPPORTED_ARCH} - OBJECT_LIBS RTAsan_static - CFLAGS ${ASAN_CFLAGS} - DEFS ${ASAN_COMMON_DEFINITIONS} - PARENT_TARGET asan) else() # Build separate libraries for each target. @@ -226,14 +207,6 @@ else() DEFS ${ASAN_COMMON_DEFINITIONS} PARENT_TARGET asan) - add_compiler_rt_runtime(clang_rt.asan_static - STATIC - ARCHS ${ASAN_SUPPORTED_ARCH} - OBJECT_LIBS RTAsan_static - CFLAGS ${ASAN_CFLAGS} - DEFS ${ASAN_COMMON_DEFINITIONS} - PARENT_TARGET asan) - add_compiler_rt_runtime(clang_rt.asan-preinit STATIC ARCHS ${ASAN_SUPPORTED_ARCH} diff --git a/compiler-rt/lib/asan/tests/CMakeLists.txt b/compiler-rt/lib/asan/tests/CMakeLists.txt index 95a324766ae7..cc375acf2dfb 100644 --- a/compiler-rt/lib/asan/tests/CMakeLists.txt +++ b/compiler-rt/lib/asan/tests/CMakeLists.txt @@ -261,7 +261,6 @@ if(COMPILER_RT_CAN_EXECUTE_TESTS AND NOT ANDROID) set(ASAN_TEST_RUNTIME_OBJECTS $ $ - $ $ $ $ @@ -287,7 +286,6 @@ if(ANDROID) # Test w/o ASan instrumentation. Link it with ASan statically. add_executable(AsanNoinstTest # FIXME: .arch? $ - $ $ $ $ From 4374824ccf6e7ae68264d996a9ae5bb5e3be7fc5 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 23 Dec 2021 09:53:08 -0800 Subject: [PATCH 163/293] [ELF] --gc-sections: combine two iterations over inputSections There is a slight speed-up. --- lld/ELF/MarkLive.cpp | 47 ++++++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp index 1a17f3bbfdc5..2c712ea964b1 100644 --- a/lld/ELF/MarkLive.cpp +++ b/lld/ELF/MarkLive.cpp @@ -244,8 +244,6 @@ template void MarkLive::run() { for (StringRef s : script->referencedSymbols) markSymbol(symtab->find(s)); - // Preserve special sections and those which are specified in linker - // script KEEP command. for (InputSectionBase *sec : inputSections) { // Mark .eh_frame sections as live because there are usually no relocations // that point to .eh_frames. Otherwise, the garbage collector would drop @@ -259,6 +257,7 @@ template void MarkLive::run() { scanEhFrameSection(*eh, rels.rels); else if (rels.relas.size()) scanEhFrameSection(*eh, rels.relas); + continue; } if (sec->flags & SHF_GNU_RETAIN) { @@ -268,6 +267,39 @@ template void MarkLive::run() { if (sec->flags & SHF_LINK_ORDER) continue; + // Usually, non-SHF_ALLOC sections are not removed even if they are + // unreachable through relocations because reachability is not a good signal + // whether they are garbage or not (e.g. there is usually no section + // referring to a .comment section, but we want to keep it.) When a + // non-SHF_ALLOC section is retained, we also retain sections dependent on + // it. + // + // Note on SHF_LINK_ORDER: Such sections contain metadata and they + // have a reverse dependency on the InputSection they are linked with. + // We are able to garbage collect them. + // + // Note on SHF_REL{,A}: Such sections reach here only when -r + // or --emit-reloc were given. And they are subject of garbage + // collection because, if we remove a text section, we also + // remove its relocation section. + // + // Note on nextInSectionGroup: The ELF spec says that group sections are + // included or omitted as a unit. We take the interpretation that: + // + // - Group members (nextInSectionGroup != nullptr) are subject to garbage + // collection. + // - Groups members are retained or discarded as a unit. + if (!(sec->flags & SHF_ALLOC)) { + bool isRel = sec->type == SHT_REL || sec->type == SHT_RELA; + if (!isRel && !sec->nextInSectionGroup) { + sec->markLive(); + for (InputSection *isec : sec->dependentSections) + isec->markLive(); + } + } + + // Preserve special sections and those which are specified in linker + // script KEEP command. if (isReserved(sec) || script->shouldKeep(sec)) { enqueue(sec, 0); } else if ((!config->zStartStopGC || sec->name.startswith("__libc_")) && @@ -377,17 +409,6 @@ template void elf::markLive() { // - Group members (nextInSectionGroup != nullptr) are subject to garbage // collection. // - Groups members are retained or discarded as a unit. - for (InputSectionBase *sec : inputSections) { - bool isAlloc = (sec->flags & SHF_ALLOC); - bool isLinkOrder = (sec->flags & SHF_LINK_ORDER); - bool isRel = (sec->type == SHT_REL || sec->type == SHT_RELA); - - if (!isAlloc && !isLinkOrder && !isRel && !sec->nextInSectionGroup) { - sec->markLive(); - for (InputSection *isec : sec->dependentSections) - isec->markLive(); - } - } // Follow the graph to mark all live sections. for (unsigned curPart = 1; curPart <= partitions.size(); ++curPart) From 5410152827cc143b99514df543a5db9949c3c7d0 Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Thu, 23 Dec 2021 13:13:25 -0500 Subject: [PATCH 164/293] [Bazel] Add target for llvm-tli-checker Reviewed By: vettoreldaniele, GMNGeoffrey Differential Revision: https://reviews.llvm.org/D116222 --- .../llvm-project-overlay/llvm/BUILD.bazel | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index 7c31359fb4ed..85d79a29b571 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -4207,6 +4207,45 @@ cc_binary( ], ) +gentbl( + name = "TLICheckerOptsTableGen", + strip_include_prefix = "tools/llvm-tli-checker", + tbl_outs = [( + "-gen-opt-parser-defs", + "tools/llvm-tli-checker/Opts.inc", + )], + tblgen = ":llvm-tblgen", + td_file = "tools/llvm-tli-checker/Opts.td", + td_srcs = ["include/llvm/Option/OptParser.td"], +) + +cc_binary( + name = "llvm-tli-checker", + testonly = True, + srcs = glob([ + "tools/llvm-tli-checker/*.cpp", + "tools/llvm-tli-checker/*.h", + ]), + copts = llvm_copts, + stamp = 0, + deps = [ + ":Analysis", + ":BinaryFormat", + ":BitReader", + ":BitstreamReader", + ":Core", + ":Demangle", + ":MC", + ":MCParser", + ":Object", + ":Option", + ":Remarks", + ":Support", + ":TextAPI", + ":TLICheckerOptsTableGen", + ], +) + cc_binary( name = "obj2yaml", testonly = True, From 1d1b5efdef49fa814a7e4feadd175a3dfc6460a0 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 23 Dec 2021 10:02:24 -0800 Subject: [PATCH 165/293] [Hexagon] Driver/preprocessor options for Hexagon v69 --- .../clang/Basic/DiagnosticDriverKinds.td | 5 +++ clang/include/clang/Driver/Options.td | 14 ++++++++ clang/lib/Basic/Targets/Hexagon.cpp | 9 +++++- clang/lib/Driver/ToolChains/Hexagon.cpp | 27 ++++++++++++++++ clang/lib/Headers/hexagon_types.h | 32 ------------------- clang/test/Driver/hexagon-hvx-ieee-fp.c | 14 ++++++++ clang/test/Driver/hexagon-hvx-qfloat.c | 25 +++++++++++++++ clang/test/Driver/hexagon-hvx.c | 31 +++++++++++++++--- clang/test/Driver/hexagon-toolchain-elf.c | 16 ++++++++++ clang/test/Misc/target-invalid-cpu-note.c | 2 +- clang/test/Preprocessor/hexagon-predefines.c | 11 +++++++ 11 files changed, 148 insertions(+), 38 deletions(-) create mode 100644 clang/test/Driver/hexagon-hvx-ieee-fp.c create mode 100644 clang/test/Driver/hexagon-hvx-qfloat.c diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 9776cd7b7b25..3025e6fe3c02 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -449,6 +449,11 @@ def warn_drv_vectorize_needs_hvx : Warning< "auto-vectorization requires HVX, use -mhvx to enable it">, InGroup; +def err_drv_invalid_hvx_qfloat : Error< + "-mhvx-qfloat is not supported without a -mhvx/-mhvx= flag.">; +def err_drv_invalid_arch_hvx_qfloat : Error< + "-mhvx-qfloat is not supported on HVX %0.">; + def err_drv_module_header_wrong_kind : Error< "header file '%0' input type '%1' does not match type of prior input " "in module compilation; use '-x %2' to override">; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 08e9e1a3432a..dc8bd831f2a2 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4160,6 +4160,8 @@ def mv67t : Flag<["-"], "mv67t">, Group, Alias, AliasArgs<["hexagonv67t"]>; def mv68 : Flag<["-"], "mv68">, Group, Alias, AliasArgs<["hexagonv68"]>; +def mv69 : Flag<["-"], "mv69">, Group, + Alias, AliasArgs<["hexagonv69"]>; def mhexagon_hvx : Flag<["-"], "mhvx">, Group, HelpText<"Enable Hexagon Vector eXtensions">; def mhexagon_hvx_EQ : Joined<["-"], "mhvx=">, @@ -4171,6 +4173,18 @@ def mno_hexagon_hvx : Flag<["-"], "mno-hvx">, def mhexagon_hvx_length_EQ : Joined<["-"], "mhvx-length=">, Group, HelpText<"Set Hexagon Vector Length">, Values<"64B,128B">; +def mhexagon_hvx_qfloat : Flag<["-"], "mhvx-qfloat">, + Group, + HelpText<"Enable Hexagon HVX QFloat instructions">; +def mno_hexagon_hvx_qfloat : Flag<["-"], "mno-hvx-qfloat">, + Group, + HelpText<"Disable Hexagon HVX QFloat instructions">; +def mhexagon_hvx_ieee_fp : Flag<["-"], "mhvx-ieee-fp">, + Group, + HelpText<"Enable Hexagon HVX IEEE floating-point">; +def mno_hexagon_hvx_ieee_fp : Flag<["-"], "mno-hvx-ieee-fp">, + Group, + HelpText<"Disable Hexagon HVX IEEE floating-point">; def ffixed_r19: Flag<["-"], "ffixed-r19">, HelpText<"Reserve register r19 (Hexagon only)">; def mmemops : Flag<["-"], "mmemops">, Group, diff --git a/clang/lib/Basic/Targets/Hexagon.cpp b/clang/lib/Basic/Targets/Hexagon.cpp index 9c37dee7e89a..161369242926 100644 --- a/clang/lib/Basic/Targets/Hexagon.cpp +++ b/clang/lib/Basic/Targets/Hexagon.cpp @@ -68,6 +68,9 @@ void HexagonTargetInfo::getTargetDefines(const LangOptions &Opts, } else if (CPU == "hexagonv68") { Builder.defineMacro("__HEXAGON_V68__"); Builder.defineMacro("__HEXAGON_ARCH__", "68"); + } else if (CPU == "hexagonv69") { + Builder.defineMacro("__HEXAGON_V69__"); + Builder.defineMacro("__HEXAGON_ARCH__", "69"); } if (hasFeature("hvx-length64b")) { @@ -128,6 +131,10 @@ bool HexagonTargetInfo::handleTargetFeatures(std::vector &Features, else if (F == "+audio") HasAudio = true; } + if (CPU.compare("hexagonv68") >= 0) { + HasLegalHalfType = true; + HasFloat16 = true; + } return true; } @@ -214,7 +221,7 @@ static constexpr CPUSuffix Suffixes[] = { {{"hexagonv60"}, {"60"}}, {{"hexagonv62"}, {"62"}}, {{"hexagonv65"}, {"65"}}, {{"hexagonv66"}, {"66"}}, {{"hexagonv67"}, {"67"}}, {{"hexagonv67t"}, {"67t"}}, - {{"hexagonv68"}, {"68"}}, + {{"hexagonv68"}, {"68"}}, {{"hexagonv69"}, {"69"}}, }; const char *HexagonTargetInfo::getHexagonCPUSuffix(StringRef Name) { diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp index 2ce7904ecc40..f9785e42025c 100644 --- a/clang/lib/Driver/ToolChains/Hexagon.cpp +++ b/clang/lib/Driver/ToolChains/Hexagon.cpp @@ -88,6 +88,27 @@ static void handleHVXTargetFeatures(const Driver &D, const ArgList &Args, Args.MakeArgString(llvm::Twine("+hvx-length") + HVXLength.lower()); Features.push_back(HVXFeature); } + + // Handle -mhvx-qfloat. + // QFloat is valid only on HVX v68/v68+ as of now. + unsigned short CpuVer; + Cpu.drop_front(1).getAsInteger(10, CpuVer); + if (Arg *A = Args.getLastArg(options::OPT_mhexagon_hvx_qfloat, + options::OPT_mno_hexagon_hvx_qfloat)) { + if (A->getOption().matches(options::OPT_mno_hexagon_hvx_qfloat)) { + StringRef OptName = A->getOption().getName().substr(4); + Features.push_back(Args.MakeArgString("-" + OptName)); + return; + } + StringRef OptName = A->getOption().getName().substr(1); + if (HasHVX) { + if (CpuVer >= 68) + Features.push_back(Args.MakeArgString("+" + OptName)); + else + D.Diag(diag::err_drv_invalid_arch_hvx_qfloat) << Cpu; + } else + D.Diag(diag::err_drv_invalid_hvx_qfloat); + } } // Hexagon target features. @@ -156,6 +177,12 @@ void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fsyntax-only"); } + if (Arg *A = Args.getLastArg(options::OPT_mhexagon_hvx_ieee_fp, + options::OPT_mno_hexagon_hvx_ieee_fp)) { + if (A->getOption().matches(options::OPT_mhexagon_hvx_ieee_fp)) + CmdArgs.push_back("-mhvx-ieee-fp"); + } + if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) { CmdArgs.push_back(Args.MakeArgString("-gpsize=" + Twine(G.getValue()))); } diff --git a/clang/lib/Headers/hexagon_types.h b/clang/lib/Headers/hexagon_types.h index 6958809418d8..029727cc4817 100644 --- a/clang/lib/Headers/hexagon_types.h +++ b/clang/lib/Headers/hexagon_types.h @@ -1177,37 +1177,6 @@ class HEXAGON_Vect32C { #endif /* __cplusplus */ -// V65 Silver types -#if __Q6S_ARCH__ >= 65 - // Silver vector types are 128 bytes, and pairs are 256. The vector predicate - // types are 16 bytes and 32 bytes for pairs. - typedef long HEXAGON_VecPred128 __attribute__((__vector_size__(16))) - __attribute__((aligned(128))); - - typedef long HEXAGON_VecPred256 __attribute__((__vector_size__(32))) - __attribute__((aligned(128))); - - typedef long HEXAGON_Vect1024 __attribute__((__vector_size__(128))) - __attribute__((aligned(128))); - - typedef long HEXAGON_Vect2048 __attribute__((__vector_size__(256))) - __attribute__((aligned(256))); - - typedef long HEXAGON_UVect1024 __attribute__((__vector_size__(128))) - __attribute__((aligned(4))); - - typedef long HEXAGON_UVect2048 __attribute__((__vector_size__(256))) - __attribute__((aligned(4))); - - #define Q6S_VectorPredPair HEXAGON_VecPred256 - #define Q6S_VectorPred HEXAGON_VecPred128 - #define Q6S_Vector HEXAGON_Vect1024 - #define Q6S_VectorPair HEXAGON_Vect2048 - #define Q6S_UVector HEXAGON_UVect1024 - #define Q6S_UVectorPair HEXAGON_UVect2048 - -#else /* __Q6S_ARCH__ >= 65 */ - // V65 Vector types #if __HVX_ARCH__ >= 65 #if defined __HVX__ && (__HVX_LENGTH__ == 128) @@ -1256,7 +1225,6 @@ class HEXAGON_Vect32C { #endif /* defined __HVX__ && (__HVX_LENGTH__ == 64) */ #endif /* defined __HVX__ && (__HVX_LENGTH__ == 128) */ #endif /* __HVX_ARCH__ >= 65 */ -#endif /* __Q6S_ARCH__ >= 65 */ /* Predicates */ diff --git a/clang/test/Driver/hexagon-hvx-ieee-fp.c b/clang/test/Driver/hexagon-hvx-ieee-fp.c new file mode 100644 index 000000000000..acc9a910f0f1 --- /dev/null +++ b/clang/test/Driver/hexagon-hvx-ieee-fp.c @@ -0,0 +1,14 @@ +// ----------------------------------------------------------------------------- +// Tests for the hvx ieee fp feature and errors. +// ----------------------------------------------------------------------------- + +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 -mhvx -mhvx-ieee-fp \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-IEEEFP %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv66 -mhvx=v68 -mhvx-ieee-fp \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-IEEEFP %s +// CHECK-IEEEFP: "-target-feature" "+hvx-ieee-fp" + +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 -mhvx -mhvx-ieee-fp \ +// RUN: -mno-hvx-ieee-fp 2>&1 | FileCheck -check-prefix=CHECK-NO-IEEEFP %s +// CHECK-NO-IEEEFP: "-target-feature" "-hvx-ieee-fp" + diff --git a/clang/test/Driver/hexagon-hvx-qfloat.c b/clang/test/Driver/hexagon-hvx-qfloat.c new file mode 100644 index 000000000000..698451aae58f --- /dev/null +++ b/clang/test/Driver/hexagon-hvx-qfloat.c @@ -0,0 +1,25 @@ +// ----------------------------------------------------------------------------- +// Tests for the hvx qfloat feature and errors. +// ----------------------------------------------------------------------------- + +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 -mhvx -mhvx-qfloat \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-QFLOAT %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv66 -mhvx=v68 -mhvx-qfloat \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-QFLOAT %s +// CHECK-QFLOAT: "-target-feature" "+hvx-qfloat" + +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 -mhvx -mhvx-qfloat \ +// RUN: -mno-hvx-qfloat 2>&1 | FileCheck -check-prefix=CHECK-NO-QFLOAT %s +// CHECK-NO-QFLOAT: "-target-feature" "-hvx-qfloat" + +// QFloat is valid only on hvxv68 and hvxv68+. +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 -mhvx=v66 \ +// RUN: -mhvx-qfloat 2>&1 | FileCheck -check-prefix=CHECK-ERROR1 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv66 -mhvx -mhvx-qfloat \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-ERROR1 %s +// CHECK-ERROR1: error: -mhvx-qfloat is not supported on HVX v66. + +// QFloat is valid only if HVX is enabled. +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 -mhvx-qfloat \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-ERROR2 %s +// CHECK-ERROR2: error: -mhvx-qfloat is not supported without a -mhvx/-mhvx= flag. diff --git a/clang/test/Driver/hexagon-hvx.c b/clang/test/Driver/hexagon-hvx.c index 4642aef81a31..aab7e329471b 100644 --- a/clang/test/Driver/hexagon-hvx.c +++ b/clang/test/Driver/hexagon-hvx.c @@ -2,10 +2,6 @@ // Tests for the hvx features and warnings. // ----------------------------------------------------------------------------- -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv66 -mhvx \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECKHVX166 %s -// CHECKHVX166: "-target-feature" "+hvxv66" - // RUN: %clang -c %s -### -target hexagon-unknown-elf -mv65 -mhvx \ // RUN: 2>&1 | FileCheck -check-prefix=CHECKHVX165 %s // CHECKHVX165: "-target-feature" "+hvxv65" @@ -14,6 +10,10 @@ // RUN: 2>&1 | FileCheck -check-prefix=CHECKHVX162 %s // CHECKHVX162: "-target-feature" "+hvxv62" +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv66 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECKHVX166 %s +// CHECKHVX166: "-target-feature" "+hvxv66" + // RUN: %clang -c %s -### -target hexagon-unknown-elf -mv65 -mhvx \ // RUN: -mhvx-length=128B 2>&1 | FileCheck -check-prefix=CHECKHVX2 %s @@ -39,6 +39,17 @@ // RUN: 2>&1 | FileCheck -check-prefix=CHECK-NOHVX %s // CHECK-NOHVX-NOT: "-target-feature" "+hvx +// No hvx-ieee-fp target feature must be added if -mno-hvx-ieee-fp occurs last +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx-ieee-fp -mno-hvx \ +// RUN: -mno-hvx-ieee-fp 2>&1 | FileCheck -check-prefix=CHECK-NOHVX-IEEE-FP %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx-ieee-fp -mhvx \ +// RUN: -mno-hvx-ieee-fp 2>&1 | FileCheck -check-prefix=CHECK-HVX-NOHVX-IEEE-FP %s +// +// CHECK-NOHVX-IEEE-FP-NOT: "-target-feature" "+hvx-ieee-fp" +// CHECK-HVX-NOHVX-IEEE-FP-NOT: "-target-feature" "+hvx-ieee-fp" +// CHECK-HVX-NOHVX-IEEE-FP: "-target-feature" "+hvx +// CHECK-HVX-NOHVX-IEEE-FP-NOT: "-target-feature" "+hvx-ieee-fp" + // Hvx target feature should be added if -mno-hvx doesn't occur last // RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 -mno-hvx -mhvx\ // RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXFEAT %s @@ -93,3 +104,15 @@ // RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx -mhvx-length=128 \ // RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-VALUE-ERROR %s // CHECK-HVXLENGTH-VALUE-ERROR: error: unsupported argument '{{.*}}' to option 'mhvx-length=' + +// Test -mhvx-ieee-fp flag +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx-ieee-fp \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXIEEEFP-LONE %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mhvx-ieee-fp \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXIEEEFP %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mno-hvx -mhvx-ieee-fp \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXIEEEFP %s +// CHECK-HVXIEEEFP-LONE-NOT: "-target-feature" "+hvx" +// CHECK-HVXIEEEFP-LONE: "-target-feature" "+hvx-ieee-fp" +// CHECK-HVXIEEEFP: "-target-feature" "+hvx-ieee-fp" +// CHECK-HVXIEEEFP-LONE-NOT: "-target-feature" "+hvx" diff --git a/clang/test/Driver/hexagon-toolchain-elf.c b/clang/test/Driver/hexagon-toolchain-elf.c index cc11f9fcba9e..d7c8015547e4 100644 --- a/clang/test/Driver/hexagon-toolchain-elf.c +++ b/clang/test/Driver/hexagon-toolchain-elf.c @@ -151,6 +151,22 @@ // CHECK02B: "-cc1" {{.*}} "-target-cpu" "hexagonv67t" // CHECK02B: hexagon-link{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v67t/crt0 +// RUN: %clang -### -target hexagon-unknown-elf \ +// RUN: -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \ +// RUN: -mcpu=hexagonv68 -fuse-ld=hexagon-link\ +// RUN: %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK02C %s +// CHECK02C: "-cc1" {{.*}} "-target-cpu" "hexagonv68" +// CHECK02C: hexagon-link{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v68/crt0 + +// RUN: %clang -### -target hexagon-unknown-elf \ +// RUN: -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \ +// RUN: -mcpu=hexagonv69 -fuse-ld=hexagon-link\ +// RUN: %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK02D %s +// CHECK02D: "-cc1" {{.*}} "-target-cpu" "hexagonv69" +// CHECK02D: hexagon-link{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v69/crt0 + // ----------------------------------------------------------------------------- // Test Linker related args // ----------------------------------------------------------------------------- diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c index 1b436d8d904f..04f5a7532b42 100644 --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -69,7 +69,7 @@ // RUN: not %clang_cc1 -triple hexagon--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix HEXAGON // HEXAGON: error: unknown target CPU 'not-a-cpu' -// HEXAGON-NEXT: note: valid target CPU values are: hexagonv5, hexagonv55, hexagonv60, hexagonv62, hexagonv65, hexagonv66, hexagonv67, hexagonv67t, hexagonv68{{$}} +// HEXAGON-NEXT: note: valid target CPU values are: hexagonv5, hexagonv55, hexagonv60, hexagonv62, hexagonv65, hexagonv66, hexagonv67, hexagonv67t, hexagonv68, hexagonv69{{$}} // RUN: not %clang_cc1 -triple bpf--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix BPF // BPF: error: unknown target CPU 'not-a-cpu' diff --git a/clang/test/Preprocessor/hexagon-predefines.c b/clang/test/Preprocessor/hexagon-predefines.c index e948d7024850..576dc4b6cb38 100644 --- a/clang/test/Preprocessor/hexagon-predefines.c +++ b/clang/test/Preprocessor/hexagon-predefines.c @@ -107,6 +107,17 @@ // CHECK-V68HVX-128B: #define __HVX__ 1 // CHECK-V68HVX-128B: #define __hexagon__ 1 +// RUN: %clang_cc1 -E -dM -triple hexagon-unknown-elf -target-cpu hexagonv69 \ +// RUN: -target-feature +hvxv69 -target-feature +hvx-length128b %s | FileCheck \ +// RUN: %s -check-prefix CHECK-V69HVX-128B +// CHECK-V69HVX-128B: #define __HEXAGON_ARCH__ 69 +// CHECK-V69HVX-128B: #define __HEXAGON_V69__ 1 +// CHECK-V69HVX-128B: #define __HVX_ARCH__ 69 +// CHECK-V69HVX-128B: #define __HVX_LENGTH__ 128 +// CHECK-V69HVX-128B: #define __HVX__ 1 +// CHECK-V69HVX-128B: #define __hexagon__ 1 + + // RUN: %clang_cc1 -E -dM -triple hexagon-unknown-elf -target-cpu hexagonv67 \ // RUN: -target-feature +hvxv67 -target-feature +hvx-length128b %s | FileCheck \ // RUN: %s -check-prefix CHECK-ELF From 3e52096809eb6d8689f6a8130cd0ee7f80ac4acf Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Thu, 23 Dec 2021 10:40:10 -0800 Subject: [PATCH 166/293] [libc][NFC] fix variable name A variable was named in a way that doesn't match the format. This patch renames it to match the format. Differential Revision: https://reviews.llvm.org/D116228 --- libc/src/math/generic/logf.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libc/src/math/generic/logf.cpp b/libc/src/math/generic/logf.cpp index 523045ffa684..e85c030a02ec 100644 --- a/libc/src/math/generic/logf.cpp +++ b/libc/src/math/generic/logf.cpp @@ -167,20 +167,20 @@ LLVM_LIBC_FUNCTION(float, logf, (float x)) { m += xbits.get_exponent(); // Set bits to 1.m xbits.set_unbiased_exponent(0x7F); - int fIndex = xbits.get_mantissa() >> 16; + int f_index = xbits.get_mantissa() >> 16; FPBits f(xbits.val); f.bits &= ~0x0000'FFFF; double d = static_cast(xbits) - static_cast(f); - d *= ONE_OVER_F[fIndex]; + d *= ONE_OVER_F[f_index]; double r = __llvm_libc::fputil::polyeval( d, 0x1.0000000008169p+0, -0x1.0000004f78405p-1, 0x1.555654d2bc769p-2, -0x1.00a570d090322p-2, 0x1.e158d823f89cap-3); double extra_factor = - __llvm_libc::fputil::fma(static_cast(m), LOG_2, LOG_F[fIndex]); + __llvm_libc::fputil::fma(static_cast(m), LOG_2, LOG_F[f_index]); switch (FPBits(x).uintval()) { case 0x3f80d19f: return 0x1.a1e82cp-8f; From 9fe0d67c9a72488a8d51bdbff52916a91bc06914 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 23 Dec 2021 11:02:56 -0800 Subject: [PATCH 167/293] [sanitizer] Check kArgVMax in SymbolizerProcess::GetArgV --- .../lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp | 1 + compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cpp | 1 + .../lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp | 1 + 3 files changed, 3 insertions(+) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp index 79687ca5f72d..e9829326bcd6 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp @@ -281,6 +281,7 @@ class LLVMSymbolizerProcess final : public SymbolizerProcess { argv[i++] = inline_flag; argv[i++] = kSymbolizerArch; argv[i++] = nullptr; + CHECK_LE(i, kArgVMax); } }; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cpp index a916a03234fe..ac811c8a9136 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_mac.cpp @@ -91,6 +91,7 @@ class AtosSymbolizerProcess final : public SymbolizerProcess { argv[i++] = "-d"; } argv[i++] = nullptr; + CHECK_LE(i, kArgVMax); } char pid_str_[16]; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp index 888ee7fe89bc..8167db42a7ab 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp @@ -218,6 +218,7 @@ class Addr2LineProcess final : public SymbolizerProcess { argv[i++] = "-Cfe"; argv[i++] = module_name_; argv[i++] = nullptr; + CHECK_LE(i, kArgVMax); } bool ReachedEndOfOutput(const char *buffer, uptr length) const override; From a7ebf2010ebb76c32f59b95ecd7a403c382add92 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 23 Dec 2021 11:11:47 -0800 Subject: [PATCH 168/293] [NFC][sanitizer] Increase kArgVMax to 16 D116202 uses all availible slots. --- .../lib/sanitizer_common/sanitizer_symbolizer_internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h index 1eda31d0a817..df122ed3425c 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h @@ -86,7 +86,7 @@ class SymbolizerProcess { ~SymbolizerProcess() {} /// The maximum number of arguments required to invoke a tool process. - static const unsigned kArgVMax = 6; + static const unsigned kArgVMax = 16; // Customizable by subclasses. virtual bool StartSymbolizerSubprocess(); From eafc64ed6325eba962096d4a947d7e45e909bfde Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 23 Dec 2021 11:25:13 -0800 Subject: [PATCH 169/293] [Driver][test] Remove unneeded -no-canonical-prefixes and use preferred --target= -no-canonical-prefixes is not needed if we omit "clang" from CHECK lines. "-cc1" is sufficient to anchor the line we want to test. --target= is preferred over Separate form -target. --- clang/test/Driver/spirv-toolchain.cl | 56 ++++++++++++++-------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/clang/test/Driver/spirv-toolchain.cl b/clang/test/Driver/spirv-toolchain.cl index d2562c0b667d..4be08f127290 100644 --- a/clang/test/Driver/spirv-toolchain.cl +++ b/clang/test/Driver/spirv-toolchain.cl @@ -1,58 +1,58 @@ // Check object emission. -// RUN: %clang -### -no-canonical-prefixes -target spirv64 -x cl -c %s 2>&1 | FileCheck --check-prefix=SPV64 %s -// RUN: %clang -### -target spirv64 %s 2>&1 | FileCheck --check-prefix=SPV64 %s -// RUN: %clang -### -no-canonical-prefixes -target spirv64 -x ir -c %s 2>&1 | FileCheck --check-prefix=SPV64 %s -// RUN: %clang -### -no-canonical-prefixes -target spirv64 -x clcpp -c %s 2>&1 | FileCheck --check-prefix=SPV64 %s -// RUN: %clang -### -no-canonical-prefixes -target spirv64 -x c -c %s 2>&1 | FileCheck --check-prefix=SPV64 %s +// RUN: %clang -### --target=spirv64 -x cl -c %s 2>&1 | FileCheck --check-prefix=SPV64 %s +// RUN: %clang -### --target=spirv64 %s 2>&1 | FileCheck --check-prefix=SPV64 %s +// RUN: %clang -### --target=spirv64 -x ir -c %s 2>&1 | FileCheck --check-prefix=SPV64 %s +// RUN: %clang -### --target=spirv64 -x clcpp -c %s 2>&1 | FileCheck --check-prefix=SPV64 %s +// RUN: %clang -### --target=spirv64 -x c -c %s 2>&1 | FileCheck --check-prefix=SPV64 %s -// SPV64: clang{{.*}} "-cc1" "-triple" "spirv64" +// SPV64: "-cc1" "-triple" "spirv64" // SPV64-SAME: "-o" [[BC:".*bc"]] // SPV64: {{llvm-spirv.*"}} [[BC]] "-o" {{".*o"}} -// RUN: %clang -### -no-canonical-prefixes -target spirv32 -x cl -c %s 2>&1 | FileCheck --check-prefix=SPV32 %s -// RUN: %clang -### -target spirv32 %s 2>&1 | FileCheck --check-prefix=SPV32 %s -// RUN: %clang -### -no-canonical-prefixes -target spirv32 -x ir -c %s 2>&1 | FileCheck --check-prefix=SPV32 %s -// RUN: %clang -### -no-canonical-prefixes -target spirv32 -x clcpp -c %s 2>&1 | FileCheck --check-prefix=SPV32 %s -// RUN: %clang -### -no-canonical-prefixes -target spirv32 -x c -c %s 2>&1 | FileCheck --check-prefix=SPV32 %s +// RUN: %clang -### --target=spirv32 -x cl -c %s 2>&1 | FileCheck --check-prefix=SPV32 %s +// RUN: %clang -### --target=spirv32 %s 2>&1 | FileCheck --check-prefix=SPV32 %s +// RUN: %clang -### --target=spirv32 -x ir -c %s 2>&1 | FileCheck --check-prefix=SPV32 %s +// RUN: %clang -### --target=spirv32 -x clcpp -c %s 2>&1 | FileCheck --check-prefix=SPV32 %s +// RUN: %clang -### --target=spirv32 -x c -c %s 2>&1 | FileCheck --check-prefix=SPV32 %s -// SPV32: clang{{.*}} "-cc1" "-triple" "spirv32" +// SPV32: "-cc1" "-triple" "spirv32" // SPV32-SAME: "-o" [[BC:".*bc"]] // SPV32: {{llvm-spirv.*"}} [[BC]] "-o" {{".*o"}} //----------------------------------------------------------------------------- // Check Assembly emission. -// RUN: %clang -### -no-canonical-prefixes -target spirv64 -x cl -S %s 2>&1 | FileCheck --check-prefix=SPT64 %s -// RUN: %clang -### -no-canonical-prefixes -target spirv64 -x ir -S %s 2>&1 | FileCheck --check-prefix=SPT64 %s -// RUN: %clang -### -no-canonical-prefixes -target spirv64 -x clcpp -c %s 2>&1 | FileCheck --check-prefix=SPV64 %s -// RUN: %clang -### -no-canonical-prefixes -target spirv64 -x c -S %s 2>&1 | FileCheck --check-prefix=SPT64 %s +// RUN: %clang -### --target=spirv64 -x cl -S %s 2>&1 | FileCheck --check-prefix=SPT64 %s +// RUN: %clang -### --target=spirv64 -x ir -S %s 2>&1 | FileCheck --check-prefix=SPT64 %s +// RUN: %clang -### --target=spirv64 -x clcpp -c %s 2>&1 | FileCheck --check-prefix=SPV64 %s +// RUN: %clang -### --target=spirv64 -x c -S %s 2>&1 | FileCheck --check-prefix=SPT64 %s -// SPT64: clang{{.*}} "-cc1" "-triple" "spirv64" +// SPT64: "-cc1" "-triple" "spirv64" // SPT64-SAME: "-o" [[BC:".*bc"]] // SPT64: {{llvm-spirv.*"}} [[BC]] "--spirv-tools-dis" "-o" {{".*s"}} -// RUN: %clang -### -no-canonical-prefixes -target spirv32 -x cl -S %s 2>&1 | FileCheck --check-prefix=SPT32 %s -// RUN: %clang -### -no-canonical-prefixes -target spirv32 -x ir -S %s 2>&1 | FileCheck --check-prefix=SPT32 %s -// RUN: %clang -### -no-canonical-prefixes -target spirv32 -x clcpp -c %s 2>&1 | FileCheck --check-prefix=SPV32 %s -// RUN: %clang -### -no-canonical-prefixes -target spirv32 -x c -S %s 2>&1 | FileCheck --check-prefix=SPT32 %s +// RUN: %clang -### --target=spirv32 -x cl -S %s 2>&1 | FileCheck --check-prefix=SPT32 %s +// RUN: %clang -### --target=spirv32 -x ir -S %s 2>&1 | FileCheck --check-prefix=SPT32 %s +// RUN: %clang -### --target=spirv32 -x clcpp -c %s 2>&1 | FileCheck --check-prefix=SPV32 %s +// RUN: %clang -### --target=spirv32 -x c -S %s 2>&1 | FileCheck --check-prefix=SPT32 %s -// SPT32: clang{{.*}} "-cc1" "-triple" "spirv32" +// SPT32: "-cc1" "-triple" "spirv32" // SPT32-SAME: "-o" [[BC:".*bc"]] // SPT32: {{llvm-spirv.*"}} [[BC]] "--spirv-tools-dis" "-o" {{".*s"}} //----------------------------------------------------------------------------- // Check assembly input -> object output -// RUN: %clang -### -no-canonical-prefixes -target spirv64 -x assembler -c %s 2>&1 | FileCheck --check-prefix=ASM %s -// RUN: %clang -### -no-canonical-prefixes -target spirv32 -x assembler -c %s 2>&1 | FileCheck --check-prefix=ASM %s +// RUN: %clang -### --target=spirv64 -x assembler -c %s 2>&1 | FileCheck --check-prefix=ASM %s +// RUN: %clang -### --target=spirv32 -x assembler -c %s 2>&1 | FileCheck --check-prefix=ASM %s // ASM: {{llvm-spirv.*"}} {{".*"}} "-to-binary" "-o" {{".*o"}} //----------------------------------------------------------------------------- // Check --save-temps. -// RUN: %clang -### -no-canonical-prefixes -target spirv64 -x cl -c %s --save-temps 2>&1 | FileCheck --check-prefix=TMP %s +// RUN: %clang -### --target=spirv64 -x cl -c %s --save-temps 2>&1 | FileCheck --check-prefix=TMP %s -// TMP: clang{{.*}} "-cc1" "-triple" "spirv64" +// TMP: "-cc1" "-triple" "spirv64" // TMP-SAME: "-E" // TMP-SAME: "-o" [[I:".*i"]] -// TMP: clang{{.*}} "-cc1" "-triple" "spirv64" +// TMP: "-cc1" "-triple" "spirv64" // TMP-SAME: "-o" [[BC:".*bc"]] // TMP-SAME: [[I]] // TMP: {{llvm-spirv.*"}} [[BC]] "--spirv-tools-dis" "-o" [[S:".*s"]] @@ -60,6 +60,6 @@ //----------------------------------------------------------------------------- // Check that warning occurs if multiple input files are passed. -// RUN: %clang -### -target spirv64 %s %s 2>&1 | FileCheck --check-prefix=WARN %s +// RUN: %clang -### --target=spirv64 %s %s 2>&1 | FileCheck --check-prefix=WARN %s // WARN: warning: Linking multiple input files is not supported for SPIR-V yet From f103ee2e9e3fc530fbada7cf3db6ca6913a502c1 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 22 Dec 2021 15:25:16 -0800 Subject: [PATCH 170/293] [sanitizer] Move internal_symbolizer "inline" option This option is per process anyway. I'd like to add more options, but having them as parameters of __sanitizer_symbolize_code looks inconvenient. Reviewed By: browneee Differential Revision: https://reviews.llvm.org/D116201 --- .../sanitizer_common_interface_posix.inc | 1 + .../sanitizer_symbolizer_posix_libcdep.cpp | 11 +++++--- .../symbolizer/sanitizer_symbolize.cpp | 26 ++++++++++++++----- .../symbolizer/scripts/build_symbolizer.sh | 1 + .../symbolizer/scripts/global_symbols.txt | 1 + .../lib/sanitizer_common/weak_symbols.txt | 1 + 6 files changed, 30 insertions(+), 11 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interface_posix.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interface_posix.inc index 38f9531148d4..11b894132994 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interface_posix.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interface_posix.inc @@ -11,3 +11,4 @@ INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_code) INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_data) INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_demangle) INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_flush) +INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_set_inline_frames) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp index 8167db42a7ab..3166c0013f46 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp @@ -319,8 +319,7 @@ class Addr2LinePool final : public SymbolizerTool { extern "C" { SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool __sanitizer_symbolize_code(const char *ModuleName, u64 ModuleOffset, - char *Buffer, int MaxLength, - bool SymbolizeInlineFrames); + char *Buffer, int MaxLength); SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool __sanitizer_symbolize_data(const char *ModuleName, u64 ModuleOffset, char *Buffer, int MaxLength); @@ -328,11 +327,16 @@ SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE void __sanitizer_symbolize_flush(); SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE int __sanitizer_symbolize_demangle(const char *Name, char *Buffer, int MaxLength); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool +__sanitizer_symbolize_set_inline_frames(bool InlineFrames); } // extern "C" class InternalSymbolizer final : public SymbolizerTool { public: static InternalSymbolizer *get(LowLevelAllocator *alloc) { + if (__sanitizer_symbolize_set_inline_frames) + CHECK(__sanitizer_symbolize_set_inline_frames( + common_flags()->symbolize_inline_frames)); if (__sanitizer_symbolize_code && __sanitizer_symbolize_data) return new (*alloc) InternalSymbolizer(); return 0; @@ -340,8 +344,7 @@ class InternalSymbolizer final : public SymbolizerTool { bool SymbolizePC(uptr addr, SymbolizedStack *stack) override { bool result = __sanitizer_symbolize_code( - stack->info.module, stack->info.module_offset, buffer_, kBufferSize, - common_flags()->symbolize_inline_frames); + stack->info.module, stack->info.module_offset, buffer_, kBufferSize); if (result) ParseSymbolizePCOutput(buffer_, stack); return result; diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cpp b/compiler-rt/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cpp index 3809880d50b4..9b1d81765087 100644 --- a/compiler-rt/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cpp +++ b/compiler-rt/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cpp @@ -17,10 +17,15 @@ #include "llvm/DebugInfo/Symbolize/DIPrinter.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" +static llvm::symbolize::LLVMSymbolizer *Symbolizer = nullptr; +static bool InlineFrames = true; + static llvm::symbolize::LLVMSymbolizer *getDefaultSymbolizer() { - static llvm::symbolize::LLVMSymbolizer *DefaultSymbolizer = - new llvm::symbolize::LLVMSymbolizer(); - return DefaultSymbolizer; + if (Symbolizer) + return Symbolizer; + llvm::symbolize::LLVMSymbolizer::Options Opts; + Symbolizer = new llvm::symbolize::LLVMSymbolizer(Opts); + return Symbolizer; } static llvm::symbolize::PrinterConfig getDefaultPrinterConfig() { @@ -43,8 +48,7 @@ extern "C" { typedef uint64_t u64; bool __sanitizer_symbolize_code(const char *ModuleName, uint64_t ModuleOffset, - char *Buffer, int MaxLength, - bool SymbolizeInlineFrames) { + char *Buffer, int MaxLength) { std::string Result; { llvm::raw_string_ostream OS(Result); @@ -55,7 +59,7 @@ bool __sanitizer_symbolize_code(const char *ModuleName, uint64_t ModuleOffset, // TODO: it is neccessary to set proper SectionIndex here. // object::SectionedAddress::UndefSection works for only absolute addresses. - if (SymbolizeInlineFrames) { + if (InlineFrames) { auto ResOrErr = getDefaultSymbolizer()->symbolizeInlinedCode( ModuleName, {ModuleOffset, llvm::object::SectionedAddress::UndefSection}); @@ -93,7 +97,10 @@ bool __sanitizer_symbolize_data(const char *ModuleName, uint64_t ModuleOffset, Result.c_str()) < MaxLength; } -void __sanitizer_symbolize_flush() { getDefaultSymbolizer()->flush(); } +void __sanitizer_symbolize_flush() { + if (Symbolizer) + Symbolizer->flush(); +} int __sanitizer_symbolize_demangle(const char *Name, char *Buffer, int MaxLength) { @@ -105,6 +112,11 @@ int __sanitizer_symbolize_demangle(const char *Name, char *Buffer, : 0; } +bool __sanitizer_symbolize_set_inline_frames(bool Value) { + InlineFrames = Value; + return true; +} + // Override __cxa_atexit and ignore callbacks. // This prevents crashes in a configuration when the symbolizer // is built into sanitizer runtime and consequently into the test process. diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh index 29925ccf30a1..7f301709c62b 100755 --- a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh +++ b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh @@ -159,6 +159,7 @@ SYMBOLIZER_API_LIST=__sanitizer_symbolize_code SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_data SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_flush SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_demangle +SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_set_inline_frames LIBCXX_ARCHIVE_DIR=$(dirname $(find $LIBCXX_BUILD -name libc++.a | head -n1)) diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt index cb4494f10b85..18f74e7741ef 100644 --- a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt +++ b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt @@ -38,6 +38,7 @@ __sanitizer_symbolize_code T __sanitizer_symbolize_data T __sanitizer_symbolize_demangle T __sanitizer_symbolize_flush T +__sanitizer_symbolize_set_inline_frames T __strdup U __udivdi3 U __umoddi3 U diff --git a/compiler-rt/lib/sanitizer_common/weak_symbols.txt b/compiler-rt/lib/sanitizer_common/weak_symbols.txt index 5a2b275184f4..8575bf610102 100644 --- a/compiler-rt/lib/sanitizer_common/weak_symbols.txt +++ b/compiler-rt/lib/sanitizer_common/weak_symbols.txt @@ -6,3 +6,4 @@ ___sanitizer_symbolize_code ___sanitizer_symbolize_data ___sanitizer_symbolize_demangle ___sanitizer_symbolize_flush +___sanitizer_symbolize_set_inline_frames From bf45624ba07f8bd65f38db872c89d8c0e4ffd5fe Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 23 Dec 2021 11:32:44 -0800 Subject: [PATCH 171/293] [ELF][PPC32] Support .got2 in an output section description I added `PPC32Got2Section` D62464 to support .got2 but did not implement .got2 in another output section. PR52799 has a linker script placing .got2 in .rodata, which causes a null pointer dereference because a MergeSyntheticSection's file is nullptr. Add the support. --- lld/ELF/InputFiles.h | 9 ++-- lld/ELF/InputSection.cpp | 6 +-- lld/ELF/SyntheticSections.cpp | 8 ++-- lld/ELF/Thunks.cpp | 5 ++- lld/test/ELF/linkerscript/ppc32-got2.s | 59 ++++++++++++++++++++++++++ lld/test/ELF/ppc32-relocatable-got2.s | 10 ++++- 6 files changed, 81 insertions(+), 16 deletions(-) create mode 100644 lld/test/ELF/linkerscript/ppc32-got2.s diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index 1fee39371cca..a031ef94c14d 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -41,6 +41,7 @@ namespace elf { using llvm::object::Archive; +class InputSection; class Symbol; // If --reproduce is specified, all input files are written to this tar archive. @@ -114,13 +115,13 @@ class InputFile { SmallVector symbols; + // .got2 in the current file. This is used by PPC32 -fPIC/-fPIE to compute + // offsets in PLT call stubs. + InputSection *ppc32Got2 = nullptr; + // Index of MIPS GOT built for this file. uint32_t mipsGotIndex = -1; - // outSecOff of .got2 in the current file. This is used by PPC32 -fPIC/-fPIE - // to compute offsets in PLT call stubs. - uint32_t ppc32Got2OutSecOff = 0; - // groupId is used for --warn-backrefs which is an optional error // checking feature. All files within the same --{start,end}-group or // --{start,end}-lib get the same group ID. Otherwise, each file gets a new diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 56a3510e41bb..df8cc5f221a0 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -474,13 +474,13 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef rels) { else if (config->relocatable && type != target->noneRel) sec->relocations.push_back({R_ABS, type, rel.r_offset, addend, &sym}); } else if (config->emachine == EM_PPC && type == R_PPC_PLTREL24 && - p->r_addend >= 0x8000) { + p->r_addend >= 0x8000 && sec->file->ppc32Got2) { // Similar to R_MIPS_GPREL{16,32}. If the addend of R_PPC_PLTREL24 // indicates that r30 is relative to the input section .got2 // (r_addend>=0x8000), after linking, r30 should be relative to the output // section .got2 . To compensate for the shift, adjust r_addend by - // ppc32Got2OutSecOff. - p->r_addend += sec->file->ppc32Got2OutSecOff; + // ppc32Got->outSecOff. + p->r_addend += sec->file->ppc32Got2->outSecOff; } } } diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 9bf6118ebc3f..872dd0e612af 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -3620,14 +3620,12 @@ void PPC32Got2Section::finalizeContents() { // .got2 . This function computes outSecOff of each .got2 to be used in // PPC32PltCallStub::writeTo(). The purpose of this empty synthetic section is // to collect input sections named ".got2". - uint32_t offset = 0; for (SectionCommand *cmd : getParent()->commands) if (auto *isd = dyn_cast(cmd)) { for (InputSection *isec : isd->sections) { - if (isec == this) - continue; - isec->file->ppc32Got2OutSecOff = offset; - offset += (uint32_t)isec->getSize(); + // isec->file may be nullptr for MergeSyntheticSection. + if (isec != this && isec->file) + isec->file->ppc32Got2 = isec; } } } diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp index 85a0a67d613c..38de4db191f4 100644 --- a/lld/ELF/Thunks.cpp +++ b/lld/ELF/Thunks.cpp @@ -806,8 +806,9 @@ void elf::writePPC32PltCallStub(uint8_t *buf, uint64_t gotPltVA, // The stub loads an address relative to r30 (.got2+Addend). Addend is // almost always 0x8000. The address of .got2 is different in another object // file, so a stub cannot be shared. - offset = gotPltVA - (in.ppc32Got2->getParent()->getVA() + - file->ppc32Got2OutSecOff + addend); + offset = gotPltVA - + (in.ppc32Got2->getParent()->getVA() + + (file->ppc32Got2 ? file->ppc32Got2->outSecOff : 0) + addend); } else { // The stub loads an address relative to _GLOBAL_OFFSET_TABLE_ (which is // currently the address of .got). diff --git a/lld/test/ELF/linkerscript/ppc32-got2.s b/lld/test/ELF/linkerscript/ppc32-got2.s new file mode 100644 index 000000000000..1e232fa2c3c4 --- /dev/null +++ b/lld/test/ELF/linkerscript/ppc32-got2.s @@ -0,0 +1,59 @@ +# REQUIRES: ppc +## Test .got2 placed in a different output section. + +# RUN: rm -rf %t && split-file %s %t +# RUN: llvm-mc -filetype=obj -triple=powerpc %t/a.s -o %t/a.o +# RUN: llvm-mc -filetype=obj -triple=powerpc %t/b.s -o %t/b.o +# RUN: ld.lld -shared -T %t/t %t/a.o %t/b.o -o %t/a.so +# RUN: llvm-readobj -r %t/a.so | FileCheck --check-prefix=RELOC %s +# RUN: llvm-readelf -S %t/a.so | FileCheck --check-prefix=SEC %s + +# RELOC: .rela.plt { +# RELOC-NEXT: 0x1A4 R_PPC_JMP_SLOT f 0x0 +# RELOC-NEXT: } + +# SEC: .got PROGBITS 0000018c +# SEC-NEXT: .rodata PROGBITS 00000198 + +## .got2+0x8000-0xb0 = .rodata+4+0x8000-0xb0 = 0x198+4+0x8000-0xb0 = 65536*1-32532 +# CHECK: <_start>: +# CHECK-NEXT: bcl 20, 31, 0x +# CHECK-NEXT: b0: mflr 30 +# CHECK-NEXT: addis 30, 30, 1 +# CHECK-NEXT: addi 30, 30, -32532 +# CHECK-NEXT: bl {{.*}} <00008000.got2.plt_pic32.f> + +## &.got[2] - (.got2+0x8000) = &.got[2] - (.rodata+4+0x8000) = 0x1A4 - (0x198+4+0x8000) = -32760 +# CHECK: <00008000.got2.plt_pic32.f>: +# CHECK-NEXT: lwz 11, -32760(30) +# CHECK-NEXT: mtctr 11 +# CHECK-NEXT: bctr +# CHECK-NEXT: nop + +#--- a.s +.section .rodata.cst4,"aM",@progbits,4 +.long 1 + +.section .got2,"aw" +.long f + +.text +.globl _start, f, g +_start: + bcl 20,31,.L +.L: + mflr 30 + addis 30, 30, .got2+0x8000-.L@ha + addi 30, 30, .got2+0x8000-.L@l + bl f+0x8000@plt + +#--- b.s +.section .got2,"aw" +.globl f +f: + bl f+0x8000@plt + +#--- t +SECTIONS { + .rodata : { *(.rodata .rodata.*) *(.got2) } +} diff --git a/lld/test/ELF/ppc32-relocatable-got2.s b/lld/test/ELF/ppc32-relocatable-got2.s index 06ccb39b6a4a..85a0764ffe5a 100644 --- a/lld/test/ELF/ppc32-relocatable-got2.s +++ b/lld/test/ELF/ppc32-relocatable-got2.s @@ -3,10 +3,11 @@ ## If r_addend indicates .got2, adjust it by the local .got2's output section offset. # RUN: llvm-mc -filetype=obj -triple=powerpc %s -o %t.o -# RUN: ld.lld -r %t.o %t.o -o %t +# RUN: echo 'bl f+0x8000@plt' | llvm-mc -filetype=obj -triple=powerpc - -o %t2.o +# RUN: ld.lld -r %t.o %t.o %t2.o -o %t # RUN: llvm-readobj -r %t | FileCheck %s -# RUN: ld.lld -shared --emit-relocs %t.o %t.o -o %t.so +# RUN: ld.lld -shared --emit-relocs %t.o %t.o %t2.o -o %t.so # RUN: llvm-readobj -r %t.so | FileCheck %s # CHECK: .rela.adjust { @@ -23,6 +24,11 @@ # CHECK-NEXT: R_PPC_PLTREL24 foo 0x0 # CHECK-NEXT: R_PPC_PLTREL24 foo 0x0 # CHECK-NEXT: } +## %t2.o has an invalid relocation with r_addend=0x8000. Test we don't crash. +## The addend doesn't matter. +# CHECK-NEXT: .rela.text { +# CHECK-NEXT: R_PPC_PLTREL24 f 0x8000 +# CHECK-NEXT: } .section .got2,"aw" .long 0 From be8180af5854806a343c3dd334d97ba2c4bfadfa Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 23 Dec 2021 11:43:26 -0700 Subject: [PATCH 172/293] [clang][driver] Warn when '-mno-outline-atomics' is used with a non-AArch64 triple MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Linux kernel has a make macro called cc-option that invokes the compiler with an option in isolation to see if it is supported before adding it to CFLAGS. The exit code of the compiler is used to determine if the flag is supported and should be added to the compiler invocation. A call to cc-option with '-mno-outline-atomics' was added to prevent linking errors with newer GCC versions but this call succeeds with a non-AArch64 target because there is no warning from clang with '-mno-outline-atomics', just '-moutline-atomics'. Because the call succeeds and adds '-mno-outline-atomics' to the compiler invocation, there is a warning from LLVM because the 'outline-atomics target feature is only supported by the AArch64 backend. $ echo | clang -target x86_64 -moutline-atomics -Werror -x c -c -o /dev/null - clang-14: error: The 'x86_64' architecture does not support -moutline-atomics; flag ignored [-Werror,-Woption-ignored] $ echo $? 1 $ echo | clang -target x86_64 -mno-outline-atomics -Werror -x c -c -o /dev/null - '-outline-atomics' is not a recognized feature for this target (ignoring feature) $ echo $? 0 This does not match GCC's behavior, which errors when the flag is added to a non-AArch64 target. $ echo | gcc -moutline-atomics -x c -c -o /dev/null - gcc: error: unrecognized command-line option ‘-moutline-atomics’; did you mean ‘-finline-atomics’? $ echo | gcc -mno-outline-atomics -x c -c -o /dev/null - gcc: error: unrecognized command-line option ‘-mno-outline-atomics’; did you mean ‘-fno-inline-atomics’? $ echo | aarch64-linux-gnu-gcc -moutline-atomics -x c -c -o /dev/null - $ echo | aarch64-linux-gnu-gcc -mno-outline-atomics -x c -c -o /dev/null - To get closer to GCC's behavior, issue a warning when '-mno-outline-atomics' is used without an AArch64 triple and do not add '{-,+}outline-atomic" to the list of target features in these cases. Link: https://github.com/ClangBuiltLinux/linux/issues/1552 Reviewed By: melver, nickdesaulniers Differential Revision: https://reviews.llvm.org/D116128 --- .../clang/Basic/DiagnosticDriverKinds.td | 2 +- clang/lib/Driver/ToolChains/Clang.cpp | 18 +++++++++--------- .../test/Driver/unsupported-outline-atomics.c | 15 +++++++++++++++ 3 files changed, 25 insertions(+), 10 deletions(-) create mode 100644 clang/test/Driver/unsupported-outline-atomics.c diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 3025e6fe3c02..c623aeff11f0 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -568,7 +568,7 @@ def warn_drv_moutline_unsupported_opt : Warning< InGroup; def warn_drv_moutline_atomics_unsupported_opt : Warning< - "'%0' does not support '-moutline-atomics'; flag ignored">, + "'%0' does not support '-%1'; flag ignored">, InGroup; def warn_drv_darwin_sdk_invalid_settings : Warning< diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index ca62229b8153..65347a38490e 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7007,18 +7007,18 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (Arg *A = Args.getLastArg(options::OPT_moutline_atomics, options::OPT_mno_outline_atomics)) { - if (A->getOption().matches(options::OPT_moutline_atomics)) { - // Option -moutline-atomics supported for AArch64 target only. - if (!Triple.isAArch64()) { - D.Diag(diag::warn_drv_moutline_atomics_unsupported_opt) - << Triple.getArchName(); - } else { + // Option -moutline-atomics supported for AArch64 target only. + if (!Triple.isAArch64()) { + D.Diag(diag::warn_drv_moutline_atomics_unsupported_opt) + << Triple.getArchName() << A->getOption().getName(); + } else { + if (A->getOption().matches(options::OPT_moutline_atomics)) { CmdArgs.push_back("-target-feature"); CmdArgs.push_back("+outline-atomics"); + } else { + CmdArgs.push_back("-target-feature"); + CmdArgs.push_back("-outline-atomics"); } - } else { - CmdArgs.push_back("-target-feature"); - CmdArgs.push_back("-outline-atomics"); } } else if (Triple.isAArch64() && getToolChain().IsAArch64OutlineAtomicsDefault(Args)) { diff --git a/clang/test/Driver/unsupported-outline-atomics.c b/clang/test/Driver/unsupported-outline-atomics.c new file mode 100644 index 000000000000..2e88528c2ec6 --- /dev/null +++ b/clang/test/Driver/unsupported-outline-atomics.c @@ -0,0 +1,15 @@ +// RUN: %clang -target x86_64 -moutline-atomics -S %s -### 2>&1 | FileCheck %s -check-prefix=CHECK-OUTLINE-ATOMICS-X86 +// CHECK-OUTLINE-ATOMICS-X86: warning: 'x86_64' does not support '-moutline-atomics'; flag ignored [-Woption-ignored] +// CHECK-OUTLINE-ATOMICS-X86-NOT: "-target-feature" "+outline-atomics" + +// RUN: %clang -target x86_64 -mno-outline-atomics -S %s -### 2>&1 | FileCheck %s -check-prefix=CHECK-NO-OUTLINE-ATOMICS-X86 +// CHECK-NO-OUTLINE-ATOMICS-X86: warning: 'x86_64' does not support '-mno-outline-atomics'; flag ignored [-Woption-ignored] +// CHECK-NO-OUTLINE-ATOMICS-X86-NOT: "-target-feature" "-outline-atomics" + +// RUN: %clang -target riscv64 -moutline-atomics -S %s -### 2>&1 | FileCheck %s -check-prefix=CHECK-OUTLINE-ATOMICS-RISCV +// CHECK-OUTLINE-ATOMICS-RISCV: warning: 'riscv64' does not support '-moutline-atomics'; flag ignored [-Woption-ignored] +// CHECK-OUTLINE-ATOMICS-RISCV-NOT: "-target-feature" "+outline-atomics" + +// RUN: %clang -target riscv64 -mno-outline-atomics -S %s -### 2>&1 | FileCheck %s -check-prefix=CHECK-NO-OUTLINE-ATOMICS-RISCV +// CHECK-NO-OUTLINE-ATOMICS-RISCV: warning: 'riscv64' does not support '-mno-outline-atomics'; flag ignored [-Woption-ignored] +// CHECK-NO-OUTLINE-ATOMICS-RISCV-NOT: "-target-feature" "-outline-atomics" From 8f8c89f3cd74fd7f165f0c43b515519243d6cc95 Mon Sep 17 00:00:00 2001 From: Mogball Date: Thu, 23 Dec 2021 11:55:29 -0800 Subject: [PATCH 173/293] [mlir] Remove spurious debug guard --- mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp index e9ea661cb68e..7fd46c711db0 100644 --- a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp +++ b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp @@ -255,11 +255,6 @@ bool GreedyPatternRewriteDriver::simplify(MutableArrayRef regions) { #else LogicalResult matchResult = matcher.matchAndRewrite(op, *this); #endif - - -#ifndef NDEBUG -#endif - changed |= succeeded(matchResult); } From 6842f52a0bbf851b43b51fa8dc7dd8dfb533d0e7 Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Wed, 22 Dec 2021 20:25:17 -0500 Subject: [PATCH 174/293] [libc++] [test] Flatten the directory structure a bit. NFC. Differential Revision: https://reviews.llvm.org/D116198 --- .../ranges/range.access/{range.access.begin => }/begin.pass.cpp | 0 .../test/std/ranges/range.access/{range.prim => }/data.pass.cpp | 0 .../test/std/ranges/range.access/{range.prim => }/empty.pass.cpp | 0 .../std/ranges/range.access/{range.access.end => }/end.pass.cpp | 0 .../range.access/range.access.cbegin/cbegin.compile.pass.cpp | 1 - .../ranges/range.access/range.access.cend/cend.compile.pass.cpp | 1 - .../test/std/ranges/range.access/{range.prim => }/size.pass.cpp | 0 .../test/std/ranges/range.access/{range.prim => }/ssize.pass.cpp | 0 8 files changed, 2 deletions(-) rename libcxx/test/std/ranges/range.access/{range.access.begin => }/begin.pass.cpp (100%) rename libcxx/test/std/ranges/range.access/{range.prim => }/data.pass.cpp (100%) rename libcxx/test/std/ranges/range.access/{range.prim => }/empty.pass.cpp (100%) rename libcxx/test/std/ranges/range.access/{range.access.end => }/end.pass.cpp (100%) delete mode 100644 libcxx/test/std/ranges/range.access/range.access.cbegin/cbegin.compile.pass.cpp delete mode 100644 libcxx/test/std/ranges/range.access/range.access.cend/cend.compile.pass.cpp rename libcxx/test/std/ranges/range.access/{range.prim => }/size.pass.cpp (100%) rename libcxx/test/std/ranges/range.access/{range.prim => }/ssize.pass.cpp (100%) diff --git a/libcxx/test/std/ranges/range.access/range.access.begin/begin.pass.cpp b/libcxx/test/std/ranges/range.access/begin.pass.cpp similarity index 100% rename from libcxx/test/std/ranges/range.access/range.access.begin/begin.pass.cpp rename to libcxx/test/std/ranges/range.access/begin.pass.cpp diff --git a/libcxx/test/std/ranges/range.access/range.prim/data.pass.cpp b/libcxx/test/std/ranges/range.access/data.pass.cpp similarity index 100% rename from libcxx/test/std/ranges/range.access/range.prim/data.pass.cpp rename to libcxx/test/std/ranges/range.access/data.pass.cpp diff --git a/libcxx/test/std/ranges/range.access/range.prim/empty.pass.cpp b/libcxx/test/std/ranges/range.access/empty.pass.cpp similarity index 100% rename from libcxx/test/std/ranges/range.access/range.prim/empty.pass.cpp rename to libcxx/test/std/ranges/range.access/empty.pass.cpp diff --git a/libcxx/test/std/ranges/range.access/range.access.end/end.pass.cpp b/libcxx/test/std/ranges/range.access/end.pass.cpp similarity index 100% rename from libcxx/test/std/ranges/range.access/range.access.end/end.pass.cpp rename to libcxx/test/std/ranges/range.access/end.pass.cpp diff --git a/libcxx/test/std/ranges/range.access/range.access.cbegin/cbegin.compile.pass.cpp b/libcxx/test/std/ranges/range.access/range.access.cbegin/cbegin.compile.pass.cpp deleted file mode 100644 index 92e92f232f7c..000000000000 --- a/libcxx/test/std/ranges/range.access/range.access.cbegin/cbegin.compile.pass.cpp +++ /dev/null @@ -1 +0,0 @@ -// Tested in begin.pass.cpp. diff --git a/libcxx/test/std/ranges/range.access/range.access.cend/cend.compile.pass.cpp b/libcxx/test/std/ranges/range.access/range.access.cend/cend.compile.pass.cpp deleted file mode 100644 index 924fea790e63..000000000000 --- a/libcxx/test/std/ranges/range.access/range.access.cend/cend.compile.pass.cpp +++ /dev/null @@ -1 +0,0 @@ -// Tested in end.pass.cpp. diff --git a/libcxx/test/std/ranges/range.access/range.prim/size.pass.cpp b/libcxx/test/std/ranges/range.access/size.pass.cpp similarity index 100% rename from libcxx/test/std/ranges/range.access/range.prim/size.pass.cpp rename to libcxx/test/std/ranges/range.access/size.pass.cpp diff --git a/libcxx/test/std/ranges/range.access/range.prim/ssize.pass.cpp b/libcxx/test/std/ranges/range.access/ssize.pass.cpp similarity index 100% rename from libcxx/test/std/ranges/range.access/range.prim/ssize.pass.cpp rename to libcxx/test/std/ranges/range.access/ssize.pass.cpp From a2a9a5c7d3d9e6860444d8b4fd9f1fd665801586 Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Wed, 22 Dec 2021 18:06:48 -0500 Subject: [PATCH 175/293] [libc++] [ranges] Fix bugs in ranges::empty(). It was missing the cast to `bool` in `bool(__t.empty())`. It was wrongly using `std::forward` in some places. Differential Revision: https://reviews.llvm.org/D115312 --- libcxx/include/__ranges/empty.h | 11 +++++------ .../std/ranges/range.access/empty.pass.cpp | 18 ++++++++++-------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/libcxx/include/__ranges/empty.h b/libcxx/include/__ranges/empty.h index bf6772c5673b..4c6a3a0d24ff 100644 --- a/libcxx/include/__ranges/empty.h +++ b/libcxx/include/__ranges/empty.h @@ -13,7 +13,6 @@ #include <__iterator/concepts.h> #include <__ranges/access.h> #include <__ranges/size.h> -#include <__utility/forward.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -29,13 +28,13 @@ namespace ranges { namespace __empty { template concept __member_empty = requires(_Tp&& __t) { - bool(_VSTD::forward<_Tp>(__t).empty()); + bool(__t.empty()); }; template concept __can_invoke_size = !__member_empty<_Tp> && - requires(_Tp&& __t) { ranges::size(_VSTD::forward<_Tp>(__t)); }; + requires(_Tp&& __t) { ranges::size(__t); }; template concept __can_compare_begin_end = @@ -50,13 +49,13 @@ namespace __empty { template <__member_empty _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Tp&& __t) const noexcept(noexcept(bool(__t.empty()))) { - return __t.empty(); + return bool(__t.empty()); } template <__can_invoke_size _Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Tp&& __t) const - noexcept(noexcept(ranges::size(_VSTD::forward<_Tp>(__t)))) { - return ranges::size(_VSTD::forward<_Tp>(__t)) == 0; + noexcept(noexcept(ranges::size(__t))) { + return ranges::size(__t) == 0; } template<__can_compare_begin_end _Tp> diff --git a/libcxx/test/std/ranges/range.access/empty.pass.cpp b/libcxx/test/std/ranges/range.access/empty.pass.cpp index 9be523eb42b7..5fb9965df755 100644 --- a/libcxx/test/std/ranges/range.access/empty.pass.cpp +++ b/libcxx/test/std/ranges/range.access/empty.pass.cpp @@ -15,11 +15,11 @@ #include #include +#include #include "test_macros.h" #include "test_iterators.h" using RangeEmptyT = decltype(std::ranges::empty); -using RangeSizeT = decltype(std::ranges::size); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -41,12 +41,16 @@ static_assert(!std::ranges::empty(std::move(array_of_incomplete))); static_assert(!std::ranges::empty(std::as_const(array_of_incomplete))); static_assert(!std::ranges::empty(static_cast(array_of_incomplete))); -struct NonConstSizeAndEmpty { - int size(); +struct InputRangeWithoutSize { + cpp17_input_iterator begin() const; + cpp17_input_iterator end() const; +}; +static_assert(!std::is_invocable_v); + +struct NonConstEmpty { bool empty(); }; -static_assert(!std::is_invocable_v); -static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); struct HasMemberAndFunction { constexpr bool empty() const { return true; } @@ -60,7 +64,7 @@ struct BadReturnType { static_assert(!std::is_invocable_v); struct BoolConvertible { - constexpr /*TODO: explicit*/ operator bool() noexcept(false) { return true; } + constexpr explicit operator bool() noexcept(false) { return true; } }; struct BoolConvertibleReturnType { constexpr BoolConvertible empty() noexcept { return {}; } @@ -159,9 +163,7 @@ constexpr bool testBeginEqualsEnd() { assert(std::ranges::empty(e) == false); // e.empty() assert(std::ranges::empty(std::as_const(e)) == true); // e.begin() == e.end() -#if 0 // TODO FIXME assert(std::ranges::empty(EvilBeginEnd())); -#endif return true; } From 4191a93ea448426d30de220ed8605329942d9872 Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Thu, 23 Dec 2021 14:25:18 -0500 Subject: [PATCH 176/293] [libc++] [test] Eliminate `== true` and `== false`. NFC. As suggested in D115312. --- .../std/ranges/range.access/empty.pass.cpp | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/libcxx/test/std/ranges/range.access/empty.pass.cpp b/libcxx/test/std/ranges/range.access/empty.pass.cpp index 5fb9965df755..18cdce02b573 100644 --- a/libcxx/test/std/ranges/range.access/empty.pass.cpp +++ b/libcxx/test/std/ranges/range.access/empty.pass.cpp @@ -80,10 +80,10 @@ static_assert(!std::is_invocable_v); constexpr bool testEmptyMember() { HasMemberAndFunction a; - assert(std::ranges::empty(a) == true); + assert(std::ranges::empty(a)); BoolConvertibleReturnType b; - assert(std::ranges::empty(b) == true); + assert(std::ranges::empty(b)); return true; } @@ -107,17 +107,17 @@ static_assert(std::ranges::sized_range); constexpr bool testUsingRangesSize() { SizeMember a{1}; - assert(std::ranges::empty(a) == false); + assert(!std::ranges::empty(a)); SizeMember b{0}; - assert(std::ranges::empty(b) == true); + assert(std::ranges::empty(b)); SizeFunction c{1}; - assert(std::ranges::empty(c) == false); + assert(!std::ranges::empty(c)); SizeFunction d{0}; - assert(std::ranges::empty(d) == true); + assert(std::ranges::empty(d)); BeginEndSizedSentinel e; - assert(std::ranges::empty(e) == true); + assert(std::ranges::empty(e)); return true; } @@ -154,14 +154,14 @@ struct EvilBeginEnd { constexpr bool testBeginEqualsEnd() { BeginEndNotSizedSentinel a; - assert(std::ranges::empty(a) == true); + assert(std::ranges::empty(a)); DisabledSizeRangeWithBeginEnd d; - assert(std::ranges::empty(d) == true); + assert(std::ranges::empty(d)); BeginEndAndEmpty e; - assert(std::ranges::empty(e) == false); // e.empty() - assert(std::ranges::empty(std::as_const(e)) == true); // e.begin() == e.end() + assert(!std::ranges::empty(e)); // e.empty() + assert(std::ranges::empty(std::as_const(e))); // e.begin() == e.end() assert(std::ranges::empty(EvilBeginEnd())); From 3042091168e99323217223a959df8675bdfa9b3c Mon Sep 17 00:00:00 2001 From: Arthur O'Dwyer Date: Thu, 23 Dec 2021 14:51:24 -0500 Subject: [PATCH 177/293] [libc++] [ranges] Whitespace and namespace-style adjustments. NFC. Largely split out of D116199 to keep that PR smaller. --- libcxx/include/__ranges/access.h | 94 +++++++++++++++++++------------- libcxx/include/__ranges/data.h | 5 +- libcxx/include/__ranges/empty.h | 3 +- libcxx/include/__ranges/size.h | 16 ++++-- 4 files changed, 72 insertions(+), 46 deletions(-) diff --git a/libcxx/include/__ranges/access.h b/libcxx/include/__ranges/access.h index c58849f7cf33..91dc3055c86d 100644 --- a/libcxx/include/__ranges/access.h +++ b/libcxx/include/__ranges/access.h @@ -29,11 +29,13 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { template concept __can_borrow = - is_lvalue_reference_v<_Tp> || enable_borrowed_range >; + is_lvalue_reference_v<_Tp> || enable_borrowed_range>; } // namespace ranges // [range.access.begin] -namespace ranges::__begin { + +namespace ranges { +namespace __begin { template concept __member_begin = __can_borrow<_Tp> && @@ -48,7 +50,7 @@ namespace ranges::__begin { concept __unqualified_begin = !__member_begin<_Tp> && __can_borrow<_Tp> && - __class_or_enum > && + __class_or_enum> && requires(_Tp && __t) { { _LIBCPP_AUTO_CAST(begin(__t)) } -> input_or_output_iterator; }; @@ -62,42 +64,47 @@ namespace ranges::__begin { } template - requires __member_begin<_Tp> + requires __member_begin<_Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const - noexcept(noexcept(_LIBCPP_AUTO_CAST(__t.begin()))) + noexcept(noexcept(_LIBCPP_AUTO_CAST(__t.begin()))) { return _LIBCPP_AUTO_CAST(__t.begin()); } template - requires __unqualified_begin<_Tp> + requires __unqualified_begin<_Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const - noexcept(noexcept(_LIBCPP_AUTO_CAST(begin(__t)))) + noexcept(noexcept(_LIBCPP_AUTO_CAST(begin(__t)))) { return _LIBCPP_AUTO_CAST(begin(__t)); } void operator()(auto&&) const = delete; }; -} // namespace ranges::__begin +} -namespace ranges { - inline namespace __cpo { - inline constexpr auto begin = __begin::__fn{}; - } // namespace __cpo +inline namespace __cpo { + inline constexpr auto begin = __begin::__fn{}; +} // namespace __cpo +} // namespace ranges +// [range.range] + +namespace ranges { template using iterator_t = decltype(ranges::begin(declval<_Tp&>())); } // namespace ranges // [range.access.end] -namespace ranges::__end { + +namespace ranges { +namespace __end { template concept __member_end = __can_borrow<_Tp> && requires(_Tp&& __t) { typename iterator_t<_Tp>; - { _LIBCPP_AUTO_CAST(__t.end()) } -> sentinel_for >; + { _LIBCPP_AUTO_CAST(__t.end()) } -> sentinel_for>; }; void end(auto&) = delete; @@ -107,10 +114,10 @@ namespace ranges::__end { concept __unqualified_end = !__member_end<_Tp> && __can_borrow<_Tp> && - __class_or_enum > && + __class_or_enum> && requires(_Tp && __t) { typename iterator_t<_Tp>; - { _LIBCPP_AUTO_CAST(end(__t)) } -> sentinel_for >; + { _LIBCPP_AUTO_CAST(end(__t)) } -> sentinel_for>; }; class __fn { @@ -123,76 +130,85 @@ namespace ranges::__end { } template - requires __member_end<_Tp> + requires __member_end<_Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const - noexcept(noexcept(_LIBCPP_AUTO_CAST(__t.end()))) + noexcept(noexcept(_LIBCPP_AUTO_CAST(__t.end()))) { return _LIBCPP_AUTO_CAST(__t.end()); } template - requires __unqualified_end<_Tp> + requires __unqualified_end<_Tp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const - noexcept(noexcept(_LIBCPP_AUTO_CAST(end(__t)))) + noexcept(noexcept(_LIBCPP_AUTO_CAST(end(__t)))) { return _LIBCPP_AUTO_CAST(end(__t)); } void operator()(auto&&) const = delete; }; -} // namespace ranges::__end +} -namespace ranges::inline __cpo { +inline namespace __cpo { inline constexpr auto end = __end::__fn{}; -} // namespace ranges::__cpo +} // namespace __cpo +} // namespace ranges + +// [range.access.cbegin] -namespace ranges::__cbegin { +namespace ranges { +namespace __cbegin { struct __fn { template - requires invocable + requires invocable [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp& __t) const - noexcept(noexcept(ranges::begin(_VSTD::as_const(__t)))) + noexcept(noexcept(ranges::begin(_VSTD::as_const(__t)))) { return ranges::begin(_VSTD::as_const(__t)); } template - requires is_rvalue_reference_v<_Tp> && invocable + requires is_rvalue_reference_v<_Tp> && invocable [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const - noexcept(noexcept(ranges::begin(static_cast<_Tp const&&>(__t)))) + noexcept(noexcept(ranges::begin(static_cast<_Tp const&&>(__t)))) { return ranges::begin(static_cast<_Tp const&&>(__t)); } }; -} // namespace ranges::__cbegin +} -namespace ranges::inline __cpo { +inline namespace __cpo { inline constexpr auto cbegin = __cbegin::__fn{}; -} // namespace ranges::__cpo +} // namespace __cpo +} // namespace ranges -namespace ranges::__cend { +// [range.access.cend] + +namespace ranges { +namespace __cend { struct __fn { template - requires invocable + requires invocable [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp& __t) const - noexcept(noexcept(ranges::end(_VSTD::as_const(__t)))) + noexcept(noexcept(ranges::end(_VSTD::as_const(__t)))) { return ranges::end(_VSTD::as_const(__t)); } template - requires is_rvalue_reference_v<_Tp> && invocable + requires is_rvalue_reference_v<_Tp> && invocable [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const - noexcept(noexcept(ranges::end(static_cast<_Tp const&&>(__t)))) + noexcept(noexcept(ranges::end(static_cast<_Tp const&&>(__t)))) { return ranges::end(static_cast<_Tp const&&>(__t)); } }; -} // namespace ranges::__cend +} -namespace ranges::inline __cpo { +inline namespace __cpo { inline constexpr auto cend = __cend::__fn{}; -} // namespace ranges::__cpo +} // namespace __cpo +} // namespace ranges #endif // !defined(_LIBCPP_HAS_NO_RANGES) diff --git a/libcxx/include/__ranges/data.h b/libcxx/include/__ranges/data.h index a33da0e5a1c2..cc151c59f3d7 100644 --- a/libcxx/include/__ranges/data.h +++ b/libcxx/include/__ranges/data.h @@ -26,8 +26,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if !defined(_LIBCPP_HAS_NO_RANGES) -namespace ranges { // [range.prim.data] + +namespace ranges { namespace __data { template concept __ptr_to_object = is_pointer_v<_Tp> && is_object_v>; @@ -64,7 +65,7 @@ namespace __data { return _VSTD::to_address(ranges::begin(_VSTD::forward<_Tp>(__t))); } }; -} // end namespace __data +} inline namespace __cpo { inline constexpr auto data = __data::__fn{}; diff --git a/libcxx/include/__ranges/empty.h b/libcxx/include/__ranges/empty.h index 4c6a3a0d24ff..e8a8aabf4aed 100644 --- a/libcxx/include/__ranges/empty.h +++ b/libcxx/include/__ranges/empty.h @@ -23,8 +23,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if !defined(_LIBCPP_HAS_NO_RANGES) -namespace ranges { // [range.prim.empty] + +namespace ranges { namespace __empty { template concept __member_empty = requires(_Tp&& __t) { diff --git a/libcxx/include/__ranges/size.h b/libcxx/include/__ranges/size.h index c37467eaf747..fc6641cf4887 100644 --- a/libcxx/include/__ranges/size.h +++ b/libcxx/include/__ranges/size.h @@ -26,10 +26,13 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if !defined(_LIBCPP_HAS_NO_RANGES) namespace ranges { -template -inline constexpr bool disable_sized_range = false; + template + inline constexpr bool disable_sized_range = false; +} // [range.prim.size] + +namespace ranges { namespace __size { void size(auto&) = delete; void size(const auto&) = delete; @@ -90,18 +93,23 @@ namespace __size { return _VSTD::__to_unsigned_like(ranges::end(__t) - ranges::begin(__t)); } }; -} // end namespace __size +} inline namespace __cpo { inline constexpr auto size = __size::__fn{}; } // namespace __cpo +} // namespace ranges + +// [range.prim.ssize] +namespace ranges { namespace __ssize { struct __fn { template requires requires (_Tp&& __t) { ranges::size(__t); } [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr integral auto operator()(_Tp&& __t) const - noexcept(noexcept(ranges::size(__t))) { + noexcept(noexcept(ranges::size(__t))) + { using _Signed = make_signed_t; if constexpr (sizeof(ptrdiff_t) > sizeof(_Signed)) return static_cast(ranges::size(__t)); From 937b00ab2cf05d0eba7a96fe7b14d277996e663e Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Thu, 23 Dec 2021 13:45:42 -0600 Subject: [PATCH 178/293] [Polly][SchedOpt] Account for prevectorization of multiple statements. A prevectorized loop may contain multiple statements, in which case isl_schedule_node_band_sink will sink the vector band to multiple leaves. Instead of statically assuming a specific tree structure after sinking, add a SIMD marker to all inner bands. Fixes llvm.org/PR52637 --- polly/include/polly/ScheduleTreeTransform.h | 33 ++++++++++++++ polly/lib/Transform/ScheduleOptimizer.cpp | 34 ++++++++++---- polly/lib/Transform/ScheduleTreeTransform.cpp | 29 ------------ .../focaltech_test_detail_threshold-7bc17e.ll | 12 ++--- ...vivid_vbi_gen_sliced-before-llvmreduced.ll | 45 +++++++++++++++++++ 5 files changed, 110 insertions(+), 43 deletions(-) create mode 100644 polly/test/ScheduleOptimizer/vivid-vbi-gen-vivid_vbi_gen_sliced-before-llvmreduced.ll diff --git a/polly/include/polly/ScheduleTreeTransform.h b/polly/include/polly/ScheduleTreeTransform.h index 5ed0c64ad3f8..b35da47f59cb 100644 --- a/polly/include/polly/ScheduleTreeTransform.h +++ b/polly/include/polly/ScheduleTreeTransform.h @@ -154,6 +154,39 @@ struct RecursiveScheduleTreeVisitor } }; +/// Recursively visit all nodes of a schedule tree while allowing changes. +/// +/// The visit methods return an isl::schedule_node that is used to continue +/// visiting the tree. Structural changes such as returning a different node +/// will confuse the visitor. +template +struct ScheduleNodeRewriter + : public RecursiveScheduleTreeVisitor { + Derived &getDerived() { return *static_cast(this); } + const Derived &getDerived() const { + return *static_cast(this); + } + + isl::schedule_node visitNode(isl::schedule_node Node, Args... args) { + return getDerived().visitChildren(Node); + } + + isl::schedule_node visitChildren(isl::schedule_node Node, Args... args) { + if (!Node.has_children()) + return Node; + + isl::schedule_node It = Node.first_child(); + while (true) { + It = getDerived().visit(It, std::forward(args)...); + if (!It.has_next_sibling()) + break; + It = It.next_sibling(); + } + return It.parent(); + } +}; + /// Is this node the marker for its parent band? bool isBandMark(const isl::schedule_node &Node); diff --git a/polly/lib/Transform/ScheduleOptimizer.cpp b/polly/lib/Transform/ScheduleOptimizer.cpp index 03878d5c8e4b..0a6461139542 100644 --- a/polly/lib/Transform/ScheduleOptimizer.cpp +++ b/polly/lib/Transform/ScheduleOptimizer.cpp @@ -384,6 +384,19 @@ ScheduleTreeOptimizer::isolateFullPartialTiles(isl::schedule_node Node, return Result; } +struct InsertSimdMarkers : public ScheduleNodeRewriter { + isl::schedule_node visitBand(isl::schedule_node_band Band) { + isl::schedule_node Node = visitChildren(Band); + + // Only add SIMD markers to innermost bands. + if (!Node.first_child().isa()) + return Node; + + isl::id LoopMarker = isl::id::alloc(Band.ctx(), "SIMD", nullptr); + return Band.insert_mark(LoopMarker); + } +}; + isl::schedule_node ScheduleTreeOptimizer::prevectSchedBand( isl::schedule_node Node, unsigned DimToVectorize, int VectorWidth) { assert(isl_schedule_node_get_type(Node.get()) == isl_schedule_node_band); @@ -408,16 +421,19 @@ isl::schedule_node ScheduleTreeOptimizer::prevectSchedBand( Node = Node.child(0); // Make sure the "trivially vectorizable loop" is not unrolled. Otherwise, // we will have troubles to match it in the backend. - isl::schedule_node_band NodeBand = - Node.as().set_ast_build_options( - isl::union_set(Node.ctx(), "{ unroll[x]: 1 = 0 }")); - Node = isl::manage(isl_schedule_node_band_sink(NodeBand.release())); - Node = Node.child(0); - if (isl_schedule_node_get_type(Node.get()) == isl_schedule_node_leaf) - Node = Node.parent(); - auto LoopMarker = isl::id::alloc(Node.ctx(), "SIMD", nullptr); + Node = Node.as().set_ast_build_options( + isl::union_set(Node.ctx(), "{ unroll[x]: 1 = 0 }")); + + // Sink the inner loop into the smallest possible statements to make them + // represent a single vector instruction if possible. + Node = isl::manage(isl_schedule_node_band_sink(Node.release())); + + // Add SIMD markers to those vector statements. + InsertSimdMarkers SimdMarkerInserter; + Node = SimdMarkerInserter.visit(Node); + PrevectOpts++; - return Node.insert_mark(LoopMarker); + return Node.parent(); } static bool isSimpleInnermostBand(const isl::schedule_node &Node) { diff --git a/polly/lib/Transform/ScheduleTreeTransform.cpp b/polly/lib/Transform/ScheduleTreeTransform.cpp index a2cb538021fb..01f18eadb4d9 100644 --- a/polly/lib/Transform/ScheduleTreeTransform.cpp +++ b/polly/lib/Transform/ScheduleTreeTransform.cpp @@ -118,35 +118,6 @@ static isl::schedule rebuildBand(isl::schedule_node_band OldBand, return NewBand.get_schedule(); } -/// Recursively visit all nodes of a schedule tree while allowing changes. -/// -/// The visit methods return an isl::schedule_node that is used to continue -/// visiting the tree. Structural changes such as returning a different node -/// will confuse the visitor. -template -struct ScheduleNodeRewriter - : public RecursiveScheduleTreeVisitor { - Derived &getDerived() { return *static_cast(this); } - const Derived &getDerived() const { - return *static_cast(this); - } - - isl::schedule_node visitNode(const isl::schedule_node &Node, Args... args) { - if (!Node.has_children()) - return Node; - - isl::schedule_node It = Node.first_child(); - while (true) { - It = getDerived().visit(It, std::forward(args)...); - if (!It.has_next_sibling()) - break; - It = It.next_sibling(); - } - return It.parent(); - } -}; - /// Rewrite a schedule tree by reconstructing it bottom-up. /// /// By default, the original schedule tree is reconstructed. To build a diff --git a/polly/test/ScheduleOptimizer/focaltech_test_detail_threshold-7bc17e.ll b/polly/test/ScheduleOptimizer/focaltech_test_detail_threshold-7bc17e.ll index 0c0bc12eb4cd..9d0c6b5b9479 100644 --- a/polly/test/ScheduleOptimizer/focaltech_test_detail_threshold-7bc17e.ll +++ b/polly/test/ScheduleOptimizer/focaltech_test_detail_threshold-7bc17e.ll @@ -80,15 +80,17 @@ cleanup: ; preds = %for.cond, %entry ; CHECK: schedule: "[call15] -> [{ Stmt_for_body30[i0, i1] -> [((i0) mod 32)]; Stmt_for_body23[i0, i1] -> [((i0) mod 32)] }]" ; CHECK: permutable: 1 ; CHECK: child: -; CHECK: mark: "SIMD" -; CHECK: child: -; CHECK: sequence: -; CHECK: - filter: "[call15] -> { Stmt_for_body23[i0, i1] }" +; CHECK: sequence: +; CHECK: - filter: "[call15] -> { Stmt_for_body23[i0, i1] }" +; CHECK: child: +; CHECK: mark: "SIMD" ; CHECK: child: ; CHECK: schedule: "[call15] -> [{ Stmt_for_body30[i0, i1] -> [((i1) mod 4)]; Stmt_for_body23[i0, i1] -> [((i1) mod 4)] }]" ; CHECK: permutable: 1 ; CHECK: coincident: [ 1 ] -; CHECK: - filter: "[call15] -> { Stmt_for_body30[i0, i1] }" +; CHECK: - filter: "[call15] -> { Stmt_for_body30[i0, i1] }" +; CHECK: child: +; CHECK: mark: "SIMD" ; CHECK: child: ; CHECK: schedule: "[call15] -> [{ Stmt_for_body30[i0, i1] -> [((i1) mod 4)]; Stmt_for_body23[i0, i1] -> [((i1) mod 4)] }]" ; CHECK: permutable: 1 diff --git a/polly/test/ScheduleOptimizer/vivid-vbi-gen-vivid_vbi_gen_sliced-before-llvmreduced.ll b/polly/test/ScheduleOptimizer/vivid-vbi-gen-vivid_vbi_gen_sliced-before-llvmreduced.ll new file mode 100644 index 000000000000..3bd1f9838500 --- /dev/null +++ b/polly/test/ScheduleOptimizer/vivid-vbi-gen-vivid_vbi_gen_sliced-before-llvmreduced.ll @@ -0,0 +1,45 @@ +; RUN: opt %loadPolly -polly-vectorizer=stripmine -polly-isl-arg=--no-schedule-serialize-sccs -polly-tiling=0 -polly-opt-isl -analyze - < %s | FileCheck %s + +; isl_schedule_node_band_sink may sink into multiple children. +; https://llvm.org/PR52637 + +%struct.v4l2_sliced_vbi_data = type { [48 x i8] } + +define void @vivid_vbi_gen_sliced() { +entry: + br label %for.body + +for.body: ; preds = %vivid_vbi_gen_teletext.exit, %entry + %i.015 = phi i32 [ 0, %entry ], [ %inc, %vivid_vbi_gen_teletext.exit ] + %data0.014 = phi %struct.v4l2_sliced_vbi_data* [ null, %entry ], [ %incdec.ptr, %vivid_vbi_gen_teletext.exit ] + %arraydecay = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 0, i32 0, i32 0 + %arrayidx.i = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 0, i32 0, i32 6 + %0 = load i8, i8* %arrayidx.i, align 1 + store i8 %0, i8* %arraydecay, align 1 + br label %for.body.for.body_crit_edge.i + +for.body.for.body_crit_edge.i: ; preds = %for.body.for.body_crit_edge.i, %for.body + %inc10.i13 = phi i32 [ 1, %for.body ], [ %inc10.i, %for.body.for.body_crit_edge.i ] + %arrayidx2.phi.trans.insert.i = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 0, i32 0, i32 %inc10.i13 + store i8 0, i8* %arrayidx2.phi.trans.insert.i, align 1 + %inc10.i = add nuw nsw i32 %inc10.i13, 1 + %exitcond.not.i = icmp eq i32 %inc10.i13, 42 + br i1 %exitcond.not.i, label %vivid_vbi_gen_teletext.exit, label %for.body.for.body_crit_edge.i + +vivid_vbi_gen_teletext.exit: ; preds = %for.body.for.body_crit_edge.i + %incdec.ptr = getelementptr inbounds %struct.v4l2_sliced_vbi_data, %struct.v4l2_sliced_vbi_data* %data0.014, i32 1 + %inc = add nuw nsw i32 %i.015, 1 + %exitcond.not = icmp eq i32 %i.015, 1 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %vivid_vbi_gen_teletext.exit + ret void +} + + +; CHECK: schedule: +; CHECK: schedule: +; CHECK: mark: "SIMD" +; CHECK: schedule: +; CHECK: mark: "SIMD" +; CHECK: schedule: From 313de31fbb757643db13bcb47f8fe515039e298a Mon Sep 17 00:00:00 2001 From: Rob Suderman Date: Thu, 23 Dec 2021 12:09:20 -0800 Subject: [PATCH 179/293] [mlir][tosa] Split tosa-to-linalg named ops out of pass Linalg named ops lowering are moved to a separate pass. This allows TOSA canonicalizers to run between named-ops lowerings and the general TOSA lowerings. This allows the TOSA canonicalizers to run between lowerings. Reviewed By: NatashaKnk Differential Revision: https://reviews.llvm.org/D116057 --- mlir/include/mlir/Conversion/Passes.td | 14 + .../Conversion/TosaToLinalg/TosaToLinalg.h | 4 + .../Conversion/TosaToLinalg/CMakeLists.txt | 2 + .../Conversion/TosaToLinalg/TosaToLinalg.cpp | 814 +--------------- .../TosaToLinalg/TosaToLinalgNamed.cpp | 885 ++++++++++++++++++ .../TosaToLinalg/TosaToLinalgNamedPass.cpp | 68 ++ .../TosaToLinalg/TosaToLinalgPass.cpp | 4 + .../TosaToLinalg/tosa-to-linalg-named.mlir | 448 +++++++++ .../TosaToLinalg/tosa-to-linalg.mlir | 460 --------- 9 files changed, 1426 insertions(+), 1273 deletions(-) create mode 100644 mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp create mode 100644 mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamedPass.cpp create mode 100644 mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index f6e49cc889d1..4d1f383c0229 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -645,6 +645,20 @@ def TosaToLinalg : FunctionPass<"tosa-to-linalg"> { let constructor = "tosa::createTosaToLinalg()"; } +//===----------------------------------------------------------------------===// +// TosaToLinalgNamed +//===----------------------------------------------------------------------===// + +def TosaToLinalgNamed : FunctionPass<"tosa-to-linalg-named"> { + let summary = "Lower TOSA to LinAlg named operations"; + let description = [{ + Pass that converts TOSA operations to the equivalent operations using the + Linalg named operations. + }]; + + let constructor = "tosa::createTosaToLinalgNamed()"; +} + //===----------------------------------------------------------------------===// // TosaToSCF //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Conversion/TosaToLinalg/TosaToLinalg.h b/mlir/include/mlir/Conversion/TosaToLinalg/TosaToLinalg.h index b542833fa0e9..ec44d01065a7 100644 --- a/mlir/include/mlir/Conversion/TosaToLinalg/TosaToLinalg.h +++ b/mlir/include/mlir/Conversion/TosaToLinalg/TosaToLinalg.h @@ -20,6 +20,7 @@ namespace mlir { namespace tosa { std::unique_ptr createTosaToLinalg(); +std::unique_ptr createTosaToLinalgNamed(); /// Populates passes to convert from TOSA to Linalg on buffers. At the end of /// the pass, the function will only contain linalg ops or standard ops if the @@ -29,6 +30,9 @@ void addTosaToLinalgPasses(OpPassManager &pm); /// Populates conversion passes from TOSA dialect to Linalg dialect. void populateTosaToLinalgConversionPatterns(RewritePatternSet *patterns); +/// Populates conversion passes from TOSA dialect to Linalg named operations. +void populateTosaToLinalgNamedConversionPatterns(RewritePatternSet *patterns); + } // namespace tosa } // namespace mlir diff --git a/mlir/lib/Conversion/TosaToLinalg/CMakeLists.txt b/mlir/lib/Conversion/TosaToLinalg/CMakeLists.txt index 5617dd3e0ce0..b98afb2aaadc 100644 --- a/mlir/lib/Conversion/TosaToLinalg/CMakeLists.txt +++ b/mlir/lib/Conversion/TosaToLinalg/CMakeLists.txt @@ -1,5 +1,7 @@ add_mlir_conversion_library(MLIRTosaToLinalg TosaToLinalg.cpp + TosaToLinalgNamed.cpp + TosaToLinalgNamedPass.cpp TosaToLinalgPass.cpp ADDITIONAL_HEADER_DIRS diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp index abff8b57ccdc..04262234ceaa 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp @@ -61,37 +61,6 @@ static mlir::SelectOp clampHelper(Location loc, Value arg, return rewriter.create(loc, largerThanMax, max, minOrArg); } -static mlir::Value applyPad(Location loc, Value input, ArrayRef pad, - Attribute padAttr, OpBuilder &rewriter) { - // Input should be padded if necessary. - if (llvm::all_of(pad, [](int64_t p) { return p == 0; })) - return input; - - ShapedType inputTy = input.getType().cast(); - Type inputETy = inputTy.getElementType(); - auto inputShape = inputTy.getShape(); - - assert((inputShape.size() * 2) == pad.size()); - - SmallVector paddedShape; - SmallVector lowIndices; - SmallVector highIndices; - for (int i = 0, s = inputShape.size(); i < s; i++) { - auto lowPad = pad[i * 2]; - auto highPad = pad[i * 2 + 1]; - paddedShape.push_back(inputShape[i] + highPad + lowPad); - lowIndices.push_back(rewriter.getIndexAttr(lowPad)); - highIndices.push_back(rewriter.getIndexAttr(highPad)); - } - - Value padValue = rewriter.create(loc, padAttr); - - return linalg::PadTensorOp::createPadScalarOp( - RankedTensorType::get(paddedShape, inputETy), input, padValue, - lowIndices, highIndices, /*nofold=*/false, loc, rewriter) - .result(); -} - static SmallVector filterDynamicDims(SmallVector dynDims) { SmallVector filteredDims; for (auto dim : dynDims) @@ -1065,510 +1034,6 @@ class PointwiseConverter : public OpRewritePattern { } }; -class ConvConverter : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - LogicalResult - matchAndRewrite(tosa::Conv2DOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const final { - Location loc = op->getLoc(); - Value input = op->getOperand(0); - Value weight = op->getOperand(1); - Value bias = op->getOperand(2); - - ShapedType inputTy = input.getType().cast(); - ShapedType weightTy = weight.getType().cast(); - ShapedType biasTy = bias.getType().cast(); - ShapedType resultTy = op->getResult(0).getType().cast(); - - Type inputETy = inputTy.getElementType(); - Type resultETy = resultTy.getElementType(); - - auto padAttr = op->getAttr("pad").cast(); - auto strideTosaAttr = op->getAttr("stride").cast(); - auto dilationTosaAttr = op->getAttr("dilation").cast(); - bool isQuantized = op->hasAttr("quantization_info"); - - if (!inputTy.hasStaticShape() || !weightTy.hasStaticShape() || - !biasTy.hasStaticShape() || !resultTy.hasStaticShape()) - return rewriter.notifyMatchFailure(op, - "tosa.conv ops require static shapes"); - - if (inputETy.isUnsignedInteger()) - return rewriter.notifyMatchFailure( - op, "tosa.conv ops does not support unsigned integer input"); - - auto weightShape = weightTy.getShape(); - - // Apply padding as necessary. - Attribute zeroAttr = rewriter.getZeroAttr(inputETy); - if (isQuantized) { - auto quantizationInfo = - op->getAttr("quantization_info").cast(); - auto iZp = quantizationInfo.input_zp().getValue().getSExtValue(); - - int64_t intMin = - APInt::getSignedMinValue(inputETy.getIntOrFloatBitWidth()) - .getSExtValue(); - int64_t intMax = - APInt::getSignedMaxValue(inputETy.getIntOrFloatBitWidth()) - .getSExtValue(); - - if (iZp < intMin || iZp > intMax) - return rewriter.notifyMatchFailure( - op, "tosa.conv op quantization has zp outside of input range"); - - zeroAttr = rewriter.getIntegerAttr(inputETy, iZp); - } - - llvm::SmallVector pad; - pad.resize(2, 0); - getValuesFromIntArrayAttribute(padAttr, pad); - pad.resize(pad.size() + 2, 0); - input = applyPad(loc, input, pad, zeroAttr, rewriter); - - // Transpose the kernel to match dimension ordering of the linalg - // convolution operation. - // TODO(suderman): See if this can be efficiently folded - check whether - // the input is used anywhere else, if not fold the constant. - SmallVector weightPerm{1, 2, 3, 0}; - SmallVector newWeightShape{weightShape[1], weightShape[2], - weightShape[3], weightShape[0]}; - auto weightPermAttr = DenseIntElementsAttr::get( - RankedTensorType::get({4}, rewriter.getI64Type()), weightPerm); - Value weightPermValue = - rewriter.create(loc, weightPermAttr); - Type newWeightTy = - RankedTensorType::get(newWeightShape, weightTy.getElementType()); - weight = rewriter.create(loc, newWeightTy, weight, - weightPermValue); - - Attribute resultZeroAttr = rewriter.getZeroAttr(resultETy); - Value initTensor = rewriter.create( - loc, resultTy.getShape(), resultETy); - Value zero = rewriter.create(loc, resultZeroAttr); - Value zeroTensor = - rewriter.create(loc, zero, initTensor).getResult(0); - - // Extract the attributes for convolution. - llvm::SmallVector stride, dilation; - getValuesFromIntArrayAttribute(strideTosaAttr, stride); - getValuesFromIntArrayAttribute(dilationTosaAttr, dilation); - - // Create the convolution op. - auto strideAttr = DenseIntElementsAttr::get( - RankedTensorType::get({2}, rewriter.getI64Type()), stride); - auto dilationAttr = DenseIntElementsAttr::get( - RankedTensorType::get({2}, rewriter.getI64Type()), dilation); - - // Create maps for the bias broadcasting - SmallVector indexingMaps; - indexingMaps.push_back(AffineMap::get( - /*dimCount=*/resultTy.getRank(), /*symbolCount=*/0, - {rewriter.getAffineDimExpr(3)}, rewriter.getContext())); - indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); - indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); - - Value biasInitTensor = rewriter.create( - loc, resultTy.getShape(), resultETy); - - if (isQuantized) { - auto quantizationInfo = - op->getAttr("quantization_info").cast(); - auto iZp = rewriter.getI32IntegerAttr( - quantizationInfo.input_zp().getValue().getSExtValue()); - auto kZp = rewriter.getI32IntegerAttr( - quantizationInfo.weight_zp().getValue().getSExtValue()); - - auto iZpVal = rewriter.create(loc, iZp); - auto kZpVal = rewriter.create(loc, kZp); - Value conv = - rewriter - .create( - loc, resultTy, ValueRange{input, weight, iZpVal, kZpVal}, - ValueRange{zeroTensor}, strideAttr, dilationAttr) - ->getResult(0); - - Value result = - rewriter - .create( - loc, resultTy, ValueRange({bias, conv}), biasInitTensor, - indexingMaps, getNParallelLoopsAttrs(resultTy.getRank()), - [&](OpBuilder &nestedBuilder, Location nestedLoc, - ValueRange args) { - Value added = nestedBuilder.create( - loc, args[0], args[1]); - nestedBuilder.create(nestedLoc, added); - }) - .getResult(0); - rewriter.replaceOp(op, result); - return success(); - } - - Value conv = rewriter - .create( - loc, resultTy, ValueRange{input, weight}, - ValueRange{zeroTensor}, strideAttr, dilationAttr) - ->getResult(0); - - Value result = - rewriter - .create( - loc, resultTy, ValueRange({bias, conv}), biasInitTensor, - indexingMaps, getNParallelLoopsAttrs(resultTy.getRank()), - [&](OpBuilder &nestedBuilder, Location nestedLoc, - ValueRange args) { - Value added = nestedBuilder.create( - loc, args[0], args[1]); - nestedBuilder.create(nestedLoc, added); - }) - .getResult(0); - - rewriter.replaceOp(op, result); - return success(); - } -}; - -class DepthwiseConvConverter - : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - LogicalResult - matchAndRewrite(tosa::DepthwiseConv2DOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const final { - Location loc = op->getLoc(); - Value input = op->getOperand(0); - Value weight = op->getOperand(1); - Value bias = op->getOperand(2); - - ShapedType inputTy = input.getType().cast(); - ShapedType weightTy = weight.getType().cast(); - ShapedType biasTy = bias.getType().cast(); - ShapedType resultTy = op->getResult(0).getType().cast(); - - Type inputETy = inputTy.getElementType(); - Type resultETy = resultTy.getElementType(); - - auto padAttr = op->getAttr("pad").cast(); - auto strideTosaAttr = op->getAttr("stride").cast(); - auto dilationTosaAttr = op->getAttr("dilation").cast(); - - bool isQuantized = op->hasAttr("quantization_info"); - IntegerAttr iZp; - IntegerAttr kZp; - if (isQuantized) { - auto quantizationInfo = - op->getAttr("quantization_info").cast(); - iZp = rewriter.getI32IntegerAttr( - quantizationInfo.input_zp().getValue().getSExtValue()); - kZp = rewriter.getI32IntegerAttr( - quantizationInfo.weight_zp().getValue().getSExtValue()); - } - - if (!inputTy.hasStaticShape() || !weightTy.hasStaticShape() || - !biasTy.hasStaticShape() || !resultTy.hasStaticShape()) - return rewriter.notifyMatchFailure(op, - "tosa.conv ops require static shapes"); - - auto weightShape = weightTy.getShape(); - auto resultShape = resultTy.getShape(); - - // Apply padding as necessary. - Attribute zeroAttr = rewriter.getZeroAttr(inputETy); - if (isQuantized) { - auto quantizationInfo = - op->getAttr("quantization_info").cast(); - auto iZp = quantizationInfo.input_zp().getValue().getSExtValue(); - - int64_t intMin = - APInt::getSignedMinValue(inputETy.getIntOrFloatBitWidth()) - .getSExtValue(); - int64_t intMax = - APInt::getSignedMaxValue(inputETy.getIntOrFloatBitWidth()) - .getSExtValue(); - - if (iZp < intMin || iZp > intMax) - return rewriter.notifyMatchFailure( - op, "tosa.depthwise_conv op quantization has zp outside of input " - "range"); - - zeroAttr = rewriter.getIntegerAttr(inputETy, iZp); - } - - llvm::SmallVector pad; - pad.resize(2, 0); - getValuesFromIntArrayAttribute(padAttr, pad); - pad.resize(pad.size() + 2, 0); - - input = applyPad(loc, input, pad, zeroAttr, rewriter); - - // Extract the attributes for convolution. - llvm::SmallVector stride, dilation; - getValuesFromIntArrayAttribute(strideTosaAttr, stride); - getValuesFromIntArrayAttribute(dilationTosaAttr, dilation); - - // Create the convolution op. - auto strideAttr = DenseIntElementsAttr::get( - RankedTensorType::get({2}, rewriter.getI64Type()), stride); - auto dilationAttr = DenseIntElementsAttr::get( - RankedTensorType::get({2}, rewriter.getI64Type()), dilation); - ShapedType linalgConvTy = - RankedTensorType::get({resultShape[0], resultShape[1], resultShape[2], - weightShape[2], weightShape[3]}, - resultETy); - - // Broadcast the initial value to the output tensor before convolving. - SmallVector indexingMaps; - indexingMaps.push_back(AffineMap::get( - /*dimCount=*/resultTy.getRank(), /*symbolCount=*/0, - {rewriter.getAffineDimExpr(3)}, rewriter.getContext())); - indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); - indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); - - Attribute resultZeroAttr = rewriter.getZeroAttr(resultETy); - Value initTensor = rewriter.create( - loc, linalgConvTy.getShape(), resultETy); - Value zero = rewriter.create(loc, resultZeroAttr); - Value zeroTensor = - rewriter.create(loc, zero, initTensor).getResult(0); - - Value biasInitTensor = rewriter.create( - loc, resultTy.getShape(), resultETy); - if (!isQuantized) { - Value conv = rewriter - .create( - loc, linalgConvTy, ValueRange{input, weight}, - ValueRange{zeroTensor}, strideAttr, dilationAttr) - .getResult(0); - Value convReshape = rewriter.create(loc, resultTy, conv); - Value result = - rewriter - .create( - loc, resultTy, ValueRange({bias, convReshape}), - biasInitTensor, indexingMaps, - getNParallelLoopsAttrs(resultTy.getRank()), - [&](OpBuilder &nestedBuilder, Location nestedLoc, - ValueRange args) { - Value added = nestedBuilder.create( - loc, args[0], args[1]); - nestedBuilder.create(nestedLoc, added); - }) - .getResult(0); - rewriter.replaceOp(op, result); - } else { - auto iZpVal = rewriter.create(loc, iZp); - auto kZpVal = rewriter.create(loc, kZp); - Value conv = - rewriter - .create( - loc, linalgConvTy, ValueRange{input, weight, iZpVal, kZpVal}, - ValueRange{zeroTensor}, strideAttr, dilationAttr) - .getResult(0); - Value convReshape = rewriter.create(loc, resultTy, conv); - Value result = - rewriter - .create( - loc, resultTy, ValueRange({bias, convReshape}), - biasInitTensor, indexingMaps, - getNParallelLoopsAttrs(resultTy.getRank()), - [&](OpBuilder &nestedBuilder, Location nestedLoc, - ValueRange args) { - Value added = nestedBuilder.create( - loc, args[0], args[1]); - nestedBuilder.create(nestedLoc, added); - }) - .getResult(0); - rewriter.replaceOp(op, result); - } - return success(); - } -}; - -class MatMulConverter : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - LogicalResult - matchAndRewrite(tosa::MatMulOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const final { - Location loc = op.getLoc(); - - auto outputTy = op.getType().cast(); - auto outputElementTy = outputTy.getElementType(); - - auto firstOperandTy = op->getOperand(0).getType().cast(); - auto secondOperandTy = op->getOperand(1).getType().cast(); - - SmallVector dynDims; - dynDims.resize(op->getResult(0).getType().cast().getRank()); - - if (!firstOperandTy.hasRank() || firstOperandTy.isDynamicDim(0)) { - dynDims[0] = rewriter.create(loc, op->getOperand(0), 0); - } - - if (!firstOperandTy.hasRank() || firstOperandTy.isDynamicDim(1)) { - dynDims[1] = rewriter.create(loc, op->getOperand(0), 1); - } - - if (!secondOperandTy.hasRank() || secondOperandTy.isDynamicDim(2)) { - dynDims[2] = rewriter.create(loc, op->getOperand(1), 2); - } - - SmallVector filteredDims = filterDynamicDims(dynDims); - - auto zeroAttr = rewriter.getZeroAttr(outputElementTy); - Value zero = rewriter.create(loc, zeroAttr); - auto initTensor = rewriter.create( - loc, filteredDims, outputTy.getShape(), outputTy.getElementType()); - Value zeroTensor = - rewriter.create(loc, zero, initTensor).getResult(0); - if (!op.quantization_info()) { - rewriter.replaceOpWithNewOp( - op, TypeRange{op.getType()}, ValueRange{adaptor.a(), adaptor.b()}, - ValueRange{zeroTensor}); - return success(); - } - - auto quantizationInfo = op.quantization_info().getValue(); - auto aZp = rewriter.create( - loc, rewriter.getI32IntegerAttr( - quantizationInfo.a_zp().getValue().getSExtValue())); - auto bZp = rewriter.create( - loc, rewriter.getI32IntegerAttr( - quantizationInfo.b_zp().getValue().getSExtValue())); - rewriter.replaceOpWithNewOp( - op, TypeRange{op.getType()}, - ValueRange{adaptor.a(), adaptor.b(), aZp, bZp}, zeroTensor); - - return success(); - } -}; - -class FullyConnectedConverter - : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - LogicalResult - matchAndRewrite(tosa::FullyConnectedOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const final { - Location loc = op.getLoc(); - auto outputTy = op.getType().cast(); - auto input = op.input(); - auto inputTy = input.getType().cast(); - - auto bias = op.bias(); - - auto weight = op.weight(); - auto weightTy = weight.getType().cast(); - auto weightShape = weightTy.getShape(); - - auto outputETy = outputTy.getElementType(); - - SmallVector dynDims; - dynDims.resize(op->getResult(0).getType().cast().getRank()); - - if (!inputTy.hasRank() || inputTy.isDynamicDim(0)) { - dynDims[0] = rewriter.create(loc, input, 0); - } - - if (!weightTy.hasRank() || weightTy.isDynamicDim(0)) { - dynDims[1] = rewriter.create(loc, weight, 0); - } - - SmallVector filteredDims = filterDynamicDims(dynDims); - - // Creating maps for the output of MatMul and the bias - SmallVector indexingMaps; - - // Broadcast the bias. - indexingMaps.push_back(AffineMap::get(/*dimCount=*/2, /*symbolCount=*/0, - {rewriter.getAffineDimExpr(1)}, - rewriter.getContext())); - - indexingMaps.push_back(rewriter.getMultiDimIdentityMap(outputTy.getRank())); - indexingMaps.push_back(rewriter.getMultiDimIdentityMap(outputTy.getRank())); - - auto initTensor = rewriter.create( - loc, filteredDims, outputTy.getShape(), outputTy.getElementType()); - - // When quantized, the input elemeny type is not the same as the output - Attribute resultZeroAttr = rewriter.getZeroAttr(outputETy); - Value zero = rewriter.create(loc, resultZeroAttr); - Value zeroTensor = - rewriter.create(loc, zero, initTensor).getResult(0); - - SmallVector permutation{1, 0}; - auto permutationAttr = DenseIntElementsAttr::get( - RankedTensorType::get({2}, rewriter.getI64Type()), permutation); - Value permutationValue = - rewriter.create(loc, permutationAttr); - - SmallVector newWeightShape{weightShape[1], weightShape[0]}; - Type newWeightTy = - RankedTensorType::get(newWeightShape, weightTy.getElementType()); - - Value transposedWeight = rewriter.create( - loc, newWeightTy, weight, permutationValue); - - auto biasInitTensor = - rewriter - .create(loc, filteredDims, - outputTy.getShape(), outputETy) - ->getResults(); - - if (!op.quantization_info()) { - Value matmul = rewriter - .create( - loc, TypeRange{op.getType()}, - ValueRange{input, transposedWeight}, zeroTensor) - ->getResult(0); - - Value result = - rewriter - .create( - loc, outputTy, ValueRange({bias, matmul}), biasInitTensor, - indexingMaps, getNParallelLoopsAttrs(outputTy.getRank()), - [&](OpBuilder &nestedBuilder, Location nestedLoc, - ValueRange args) { - Value added = nestedBuilder.create( - loc, args[0], args[1]); - nestedBuilder.create(nestedLoc, added); - }) - .getResult(0); - rewriter.replaceOp(op, result); - return success(); - } - - auto quantizationInfo = op.quantization_info().getValue(); - auto inputZp = rewriter.create( - loc, rewriter.getI32IntegerAttr( - quantizationInfo.input_zp().getValue().getSExtValue())); - auto outputZp = rewriter.create( - loc, rewriter.getI32IntegerAttr( - quantizationInfo.weight_zp().getValue().getSExtValue())); - Value matmul = - rewriter - .create( - loc, TypeRange{op.getType()}, - ValueRange{input, transposedWeight, inputZp, outputZp}, - zeroTensor) - ->getResult(0); - Value result = - rewriter - .create( - loc, outputTy, ValueRange({bias, matmul}), biasInitTensor, - indexingMaps, getNParallelLoopsAttrs(outputTy.getRank()), - [&](OpBuilder &nestedBuilder, Location nestedLoc, - ValueRange args) { - Value added = nestedBuilder.create( - loc, args[0], args[1]); - nestedBuilder.create(nestedLoc, added); - }) - .getResult(0); - rewriter.replaceOp(op, result); - return success(); - } -}; - class ReshapeConverterCollapse : public OpConversionPattern { public: using OpConversionPattern::OpConversionPattern; @@ -2810,277 +2275,6 @@ class TableConverter : public OpRewritePattern { } }; -class MaxPool2dConverter : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(tosa::MaxPool2dOp op, - PatternRewriter &rewriter) const final { - Location loc = op.getLoc(); - Value input = op.input(); - ShapedType inputTy = input.getType().cast(); - - ShapedType resultTy = op.getType().template cast(); - Type resultETy = inputTy.getElementType(); - - if (!inputTy.hasStaticShape()) - return failure(); - - // Determine what the initial value needs to be for the max pool op. - Attribute initialAttr; - if (resultETy.isF32()) - initialAttr = rewriter.getFloatAttr( - resultETy, - APFloat::getLargest(resultETy.cast().getFloatSemantics(), - true)); - - if (resultETy.isa()) - initialAttr = rewriter.getIntegerAttr( - resultETy, - APInt::getSignedMinValue(resultETy.getIntOrFloatBitWidth())); - - if (!initialAttr) - return rewriter.notifyMatchFailure( - op, "Unsupported initial value for tosa.maxpool_2d op"); - - // Apply padding as necessary. - llvm::SmallVector pad; - pad.resize(2, 0); - getValuesFromIntArrayAttribute(op.pad(), pad); - pad.resize(pad.size() + 2, 0); - Value paddedInput = applyPad(loc, input, pad, initialAttr, rewriter); - - Value initialValue = rewriter.create(loc, initialAttr); - - SmallVector kernel, stride; - getValuesFromIntArrayAttribute(op.kernel(), kernel); - getValuesFromIntArrayAttribute(op.stride(), stride); - - Attribute strideAttr = rewriter.getI64VectorAttr(stride); - Attribute dilationAttr = rewriter.getI64VectorAttr({1, 1}); - - // Create the linalg op that performs pooling. - Value initTensor = rewriter.create( - loc, resultTy.getShape(), resultTy.getElementType()); - - Value filledInitTensor = - rewriter.create(loc, initialValue, initTensor).result(); - - Value fakeWindowDims = - rewriter.create(loc, kernel, resultETy); - - rewriter.replaceOpWithNewOp( - op, ArrayRef{resultTy}, ValueRange{paddedInput, fakeWindowDims}, - filledInitTensor, strideAttr, dilationAttr); - return success(); - } -}; - -class AvgPool2dConverter : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(tosa::AvgPool2dOp op, - PatternRewriter &rewriter) const final { - Location loc = op.getLoc(); - Value input = op.input(); - ShapedType inputTy = input.getType().cast(); - Type inElementTy = inputTy.getElementType(); - - ShapedType resultTy = op.getType().template cast(); - Type resultETy = op.getType().cast().getElementType(); - - Type accETy = - inElementTy.isa() ? rewriter.getI32Type() : inElementTy; - ShapedType accTy = resultTy.clone(accETy); - - if (!inputTy.hasStaticShape()) - return failure(); - - // Apply padding as necessary. - llvm::SmallVector pad; - pad.resize(2, 0); - getValuesFromIntArrayAttribute(op.pad(), pad); - pad.resize(pad.size() + 2, 0); - Attribute padAttr = rewriter.getZeroAttr(inElementTy); - Value paddedInput = applyPad(loc, input, pad, padAttr, rewriter); - - Attribute initialAttr = rewriter.getZeroAttr(accETy); - Value initialValue = rewriter.create(loc, initialAttr); - - SmallVector kernel, stride; - getValuesFromIntArrayAttribute(op.kernel(), kernel); - getValuesFromIntArrayAttribute(op.stride(), stride); - - Attribute strideAttr = rewriter.getI64VectorAttr(stride); - Attribute dilationAttr = rewriter.getI64VectorAttr({1, 1}); - - // Create the linalg op that performs pooling. - Value poolInitTensor = - rewriter.create(loc, accTy.getShape(), accETy); - - Value filledInitTensor = - rewriter.create(loc, initialValue, poolInitTensor) - .result(); - - Value fakeWindowDims = - rewriter.create(loc, kernel, accETy); - - // Sum across the pooled region. - Value poolingOp = rewriter - .create( - loc, ArrayRef{accTy}, - ValueRange{paddedInput, fakeWindowDims}, - filledInitTensor, strideAttr, dilationAttr) - .getResult(0); - - // Normalize the summed value by the number of elements grouped in each - // pool. - auto poolingOpTy = poolingOp.getType().cast(); - auto affineMap = rewriter.getMultiDimIdentityMap(resultTy.getRank()); - - Value genericInitTensor = rewriter.create( - loc, resultTy.getShape(), resultETy); - - auto genericOp = rewriter.create( - loc, ArrayRef({resultTy}), ValueRange{poolingOp}, - ValueRange{genericInitTensor}, - ArrayRef({affineMap, affineMap}), - getNParallelLoopsAttrs(resultTy.getRank()), - [&](OpBuilder &b, Location loc, ValueRange args) { - auto zero = rewriter.create(loc, 0); - auto one = rewriter.create(loc, 1); - auto iH = rewriter.create( - loc, poolingOpTy.getDimSize(1) - 1); - auto iW = rewriter.create( - loc, poolingOpTy.getDimSize(2) - 1); - - // Compute the indices from either end. - auto y0 = rewriter.create(loc, 1); - auto x0 = rewriter.create(loc, 2); - auto y1 = rewriter.create(loc, iH, y0); - auto x1 = rewriter.create(loc, iW, x0); - - // Determines what the portion of valid input is covered by the - // kernel. - auto padFn = [&](Value v, Value x, int64_t pad) -> Value { - if (pad == 0) - return v; - - auto padVal = rewriter.create(loc, pad); - Value dx = rewriter.create(loc, x, padVal); - - Value cmp = rewriter.create( - loc, arith::CmpIPredicate::slt, dx, zero); - Value offset = rewriter.create(loc, cmp, dx, zero); - return rewriter.create(loc, v, offset)->getResult(0); - }; - - // Compute the vertical component of coverage. - auto kH0 = rewriter.create(loc, kernel[0]); - auto kH1 = padFn(kH0, y0, pad[2]); - auto kH2 = padFn(kH1, y1, pad[3]); - auto kHCmp = rewriter.create( - loc, arith::CmpIPredicate::slt, kH2, one); - auto kH3 = rewriter.create(loc, kHCmp, one, kH2); - - // compute the horizontal component of coverage. - auto kW0 = rewriter.create(loc, kernel[1]); - auto kW1 = padFn(kW0, x0, pad[4]); - auto kW2 = padFn(kW1, x1, pad[5]); - auto kWCmp = rewriter.create( - loc, arith::CmpIPredicate::slt, kW2, one); - auto kW3 = rewriter.create(loc, kWCmp, one, kW2); - - // Compute the total number of elements and normalize. - Value count = rewriter.create(loc, kH3, kW3); - auto countI = rewriter.create( - loc, rewriter.getI32Type(), count); - - // Divide by the number of summed values. For floats this is just - // a div however for quantized values input normalization had - // to be applied. - Value poolVal = args[0]; - if (accETy.isa()) { - auto countF = rewriter.create(loc, accETy, countI); - poolVal = rewriter.create(loc, poolVal, countF) - ->getResult(0); - } else { - - // If we have quantization information we need to apply an offset - // for the input zp value. - if (op.quantization_info()) { - auto quantizationInfo = op.quantization_info().getValue(); - auto inputZp = rewriter.create( - loc, quantizationInfo.input_zp()); - Value offset = - rewriter.create(loc, accETy, countI, inputZp); - poolVal = - rewriter.create(loc, accETy, poolVal, offset); - } - - // Compute the multiplier and shift values for the quantization - // normalization. Preferably we would want to compute more bits - // however 32-bits should be enough for compute. Honestly we - // should probably straight divide. - int64_t numerator = ((1 << 30) + 1); - int64_t shift = 30; - - Value numeratorVal = rewriter.create( - loc, rewriter.getI32IntegerAttr(numerator)); - Value multiplierVal = - rewriter - .create(loc, rewriter.getI32Type(), - numeratorVal, countI) - .getResult(); - Value shiftVal = rewriter.create( - loc, rewriter.getI8IntegerAttr(shift)); - - auto scaled = - rewriter - .create( - loc, rewriter.getI32Type(), poolVal, multiplierVal, - shiftVal, rewriter.getBoolAttr(false)) - .getResult(); - - // If we have quantization information we need to apply output - // zeropoint. - if (op.quantization_info()) { - auto quantizationInfo = op.quantization_info().getValue(); - auto outputZp = rewriter.create( - loc, quantizationInfo.output_zp()); - scaled = rewriter.create(loc, scaled, outputZp) - .getResult(); - } - - // Apply Clip. - int64_t outBitwidth = resultETy.getIntOrFloatBitWidth(); - - auto min = rewriter.create( - loc, APInt::getSignedMinValue(outBitwidth).getSExtValue(), - accETy); - auto max = rewriter.create( - loc, APInt::getSignedMaxValue(outBitwidth).getSExtValue(), - accETy); - auto clamp = clampHelper( - loc, scaled, min, max, arith::CmpIPredicate::slt, rewriter); - - poolVal = clamp; - // Convert type. - if (resultETy != clamp.getType()) { - poolVal = - rewriter.create(loc, resultETy, poolVal); - } - } - - rewriter.create(loc, poolVal); - }); - - rewriter.replaceOp(op, genericOp.getResult(0)); - return success(); - } -}; - } // namespace void mlir::tosa::populateTosaToLinalgConversionPatterns( @@ -3132,8 +2326,6 @@ void mlir::tosa::populateTosaToLinalgConversionPatterns( ReduceConverter, ArgMaxConverter, ConcatConverter, - ConvConverter, - DepthwiseConvConverter, GatherConverter, PadConverter, ReshapeConverterCollapse, @@ -3144,10 +2336,6 @@ void mlir::tosa::populateTosaToLinalgConversionPatterns( ReverseConverter, TableConverter, TileConverter, - TransposeConverter, - MatMulConverter, - MaxPool2dConverter, - AvgPool2dConverter, - FullyConnectedConverter>(patterns->getContext()); + TransposeConverter>(patterns->getContext()); // clang-format on } diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp new file mode 100644 index 000000000000..90220ef44e97 --- /dev/null +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp @@ -0,0 +1,885 @@ +//===- TosaToLinalgNamed.cpp - Lowering Tosa to Linalg Named Ops ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// These rewriters lower from the Tosa to the Linalg named ops. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Conversion/TosaToLinalg/TosaToLinalg.h" +#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/Math/IR/Math.h" +#include "mlir/Dialect/SCF/SCF.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Tosa/IR/TosaOps.h" +#include "mlir/Dialect/Utils/ReshapeOpsUtils.h" +#include "mlir/IR/Matchers.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" + +#include + +using namespace mlir; + +static SmallVector getNParallelLoopsAttrs(unsigned nParallelLoops) { + return SmallVector(nParallelLoops, getParallelIteratorTypeName()); +} + +template +static void getValuesFromIntArrayAttribute(ArrayAttr attr, + SmallVector &arrayValues) { + for (Attribute val : attr.getValue()) { + arrayValues.push_back(val.cast().getValue().getSExtValue()); + } +} + +template +static mlir::SelectOp clampHelper(Location loc, Value arg, + arith::ConstantOp min, arith::ConstantOp max, + P pred, OpBuilder &rewriter) { + auto smallerThanMin = rewriter.create(loc, pred, arg, min); + auto minOrArg = + rewriter.create(loc, smallerThanMin, min, arg); + auto largerThanMax = rewriter.create(loc, pred, max, arg); + return rewriter.create(loc, largerThanMax, max, minOrArg); +} + +static mlir::Value applyPad(Location loc, Value input, ArrayRef pad, + Attribute padAttr, OpBuilder &rewriter) { + // Input should be padded if necessary. + if (llvm::all_of(pad, [](int64_t p) { return p == 0; })) + return input; + + ShapedType inputTy = input.getType().cast(); + Type inputETy = inputTy.getElementType(); + auto inputShape = inputTy.getShape(); + + assert((inputShape.size() * 2) == pad.size()); + + SmallVector paddedShape; + SmallVector lowIndices; + SmallVector highIndices; + for (int i = 0, s = inputShape.size(); i < s; i++) { + auto lowPad = pad[i * 2]; + auto highPad = pad[i * 2 + 1]; + paddedShape.push_back(inputShape[i] + highPad + lowPad); + lowIndices.push_back(rewriter.getIndexAttr(lowPad)); + highIndices.push_back(rewriter.getIndexAttr(highPad)); + } + + Value padValue = rewriter.create(loc, padAttr); + + return linalg::PadTensorOp::createPadScalarOp( + RankedTensorType::get(paddedShape, inputETy), input, padValue, + lowIndices, highIndices, /*nofold=*/false, loc, rewriter) + .result(); +} + +static SmallVector filterDynamicDims(SmallVector dynDims) { + SmallVector filteredDims; + for (auto dim : dynDims) + if (dim) + filteredDims.push_back(dim); + return filteredDims; +} + +namespace { + +class ConvConverter : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(tosa::Conv2DOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const final { + Location loc = op->getLoc(); + Value input = op->getOperand(0); + Value weight = op->getOperand(1); + Value bias = op->getOperand(2); + + ShapedType inputTy = input.getType().cast(); + ShapedType weightTy = weight.getType().cast(); + ShapedType biasTy = bias.getType().cast(); + ShapedType resultTy = op->getResult(0).getType().cast(); + + Type inputETy = inputTy.getElementType(); + Type resultETy = resultTy.getElementType(); + + auto padAttr = op->getAttr("pad").cast(); + auto strideTosaAttr = op->getAttr("stride").cast(); + auto dilationTosaAttr = op->getAttr("dilation").cast(); + bool isQuantized = op->hasAttr("quantization_info"); + + if (!inputTy.hasStaticShape() || !weightTy.hasStaticShape() || + !biasTy.hasStaticShape() || !resultTy.hasStaticShape()) + return rewriter.notifyMatchFailure(op, + "tosa.conv ops require static shapes"); + + if (inputETy.isUnsignedInteger()) + return rewriter.notifyMatchFailure( + op, "tosa.conv ops does not support unsigned integer input"); + + auto weightShape = weightTy.getShape(); + + // Apply padding as necessary. + Attribute zeroAttr = rewriter.getZeroAttr(inputETy); + if (isQuantized) { + auto quantizationInfo = + op->getAttr("quantization_info").cast(); + auto iZp = quantizationInfo.input_zp().getValue().getSExtValue(); + + int64_t intMin = + APInt::getSignedMinValue(inputETy.getIntOrFloatBitWidth()) + .getSExtValue(); + int64_t intMax = + APInt::getSignedMaxValue(inputETy.getIntOrFloatBitWidth()) + .getSExtValue(); + + if (iZp < intMin || iZp > intMax) + return rewriter.notifyMatchFailure( + op, "tosa.conv op quantization has zp outside of input range"); + + zeroAttr = rewriter.getIntegerAttr(inputETy, iZp); + } + + llvm::SmallVector pad; + pad.resize(2, 0); + getValuesFromIntArrayAttribute(padAttr, pad); + pad.resize(pad.size() + 2, 0); + input = applyPad(loc, input, pad, zeroAttr, rewriter); + + // Transpose the kernel to match dimension ordering of the linalg + // convolution operation. + // TODO(suderman): See if this can be efficiently folded - check whether + // the input is used anywhere else, if not fold the constant. + SmallVector weightPerm{1, 2, 3, 0}; + SmallVector newWeightShape{weightShape[1], weightShape[2], + weightShape[3], weightShape[0]}; + auto weightPermAttr = DenseIntElementsAttr::get( + RankedTensorType::get({4}, rewriter.getI64Type()), weightPerm); + Value weightPermValue = + rewriter.create(loc, weightPermAttr); + Type newWeightTy = + RankedTensorType::get(newWeightShape, weightTy.getElementType()); + weight = rewriter.create(loc, newWeightTy, weight, + weightPermValue); + + Attribute resultZeroAttr = rewriter.getZeroAttr(resultETy); + Value initTensor = rewriter.create( + loc, resultTy.getShape(), resultETy); + Value zero = rewriter.create(loc, resultZeroAttr); + Value zeroTensor = + rewriter.create(loc, zero, initTensor).getResult(0); + + // Extract the attributes for convolution. + llvm::SmallVector stride, dilation; + getValuesFromIntArrayAttribute(strideTosaAttr, stride); + getValuesFromIntArrayAttribute(dilationTosaAttr, dilation); + + // Create the convolution op. + auto strideAttr = DenseIntElementsAttr::get( + RankedTensorType::get({2}, rewriter.getI64Type()), stride); + auto dilationAttr = DenseIntElementsAttr::get( + RankedTensorType::get({2}, rewriter.getI64Type()), dilation); + + // Create maps for the bias broadcasting + SmallVector indexingMaps; + indexingMaps.push_back(AffineMap::get( + /*dimCount=*/resultTy.getRank(), /*symbolCount=*/0, + {rewriter.getAffineDimExpr(3)}, rewriter.getContext())); + indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); + indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); + + Value biasInitTensor = rewriter.create( + loc, resultTy.getShape(), resultETy); + + if (isQuantized) { + auto quantizationInfo = + op->getAttr("quantization_info").cast(); + auto iZp = rewriter.getI32IntegerAttr( + quantizationInfo.input_zp().getValue().getSExtValue()); + auto kZp = rewriter.getI32IntegerAttr( + quantizationInfo.weight_zp().getValue().getSExtValue()); + + auto iZpVal = rewriter.create(loc, iZp); + auto kZpVal = rewriter.create(loc, kZp); + Value conv = + rewriter + .create( + loc, resultTy, ValueRange{input, weight, iZpVal, kZpVal}, + ValueRange{zeroTensor}, strideAttr, dilationAttr) + ->getResult(0); + + Value result = + rewriter + .create( + loc, resultTy, ValueRange({bias, conv}), biasInitTensor, + indexingMaps, getNParallelLoopsAttrs(resultTy.getRank()), + [&](OpBuilder &nestedBuilder, Location nestedLoc, + ValueRange args) { + Value added = nestedBuilder.create( + loc, args[0], args[1]); + nestedBuilder.create(nestedLoc, added); + }) + .getResult(0); + rewriter.replaceOp(op, result); + return success(); + } + + Value conv = rewriter + .create( + loc, resultTy, ValueRange{input, weight}, + ValueRange{zeroTensor}, strideAttr, dilationAttr) + ->getResult(0); + + Value result = + rewriter + .create( + loc, resultTy, ValueRange({bias, conv}), biasInitTensor, + indexingMaps, getNParallelLoopsAttrs(resultTy.getRank()), + [&](OpBuilder &nestedBuilder, Location nestedLoc, + ValueRange args) { + Value added = nestedBuilder.create( + loc, args[0], args[1]); + nestedBuilder.create(nestedLoc, added); + }) + .getResult(0); + + rewriter.replaceOp(op, result); + return success(); + } +}; + +class DepthwiseConvConverter + : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(tosa::DepthwiseConv2DOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const final { + Location loc = op->getLoc(); + Value input = op->getOperand(0); + Value weight = op->getOperand(1); + Value bias = op->getOperand(2); + + ShapedType inputTy = input.getType().cast(); + ShapedType weightTy = weight.getType().cast(); + ShapedType biasTy = bias.getType().cast(); + ShapedType resultTy = op->getResult(0).getType().cast(); + + Type inputETy = inputTy.getElementType(); + Type resultETy = resultTy.getElementType(); + + auto padAttr = op->getAttr("pad").cast(); + auto strideTosaAttr = op->getAttr("stride").cast(); + auto dilationTosaAttr = op->getAttr("dilation").cast(); + + bool isQuantized = op->hasAttr("quantization_info"); + IntegerAttr iZp; + IntegerAttr kZp; + if (isQuantized) { + auto quantizationInfo = + op->getAttr("quantization_info").cast(); + iZp = rewriter.getI32IntegerAttr( + quantizationInfo.input_zp().getValue().getSExtValue()); + kZp = rewriter.getI32IntegerAttr( + quantizationInfo.weight_zp().getValue().getSExtValue()); + } + + if (!inputTy.hasStaticShape() || !weightTy.hasStaticShape() || + !biasTy.hasStaticShape() || !resultTy.hasStaticShape()) + return rewriter.notifyMatchFailure(op, + "tosa.conv ops require static shapes"); + + auto weightShape = weightTy.getShape(); + auto resultShape = resultTy.getShape(); + + // Apply padding as necessary. + Attribute zeroAttr = rewriter.getZeroAttr(inputETy); + if (isQuantized) { + auto quantizationInfo = + op->getAttr("quantization_info").cast(); + auto iZp = quantizationInfo.input_zp().getValue().getSExtValue(); + + int64_t intMin = + APInt::getSignedMinValue(inputETy.getIntOrFloatBitWidth()) + .getSExtValue(); + int64_t intMax = + APInt::getSignedMaxValue(inputETy.getIntOrFloatBitWidth()) + .getSExtValue(); + + if (iZp < intMin || iZp > intMax) + return rewriter.notifyMatchFailure( + op, "tosa.depthwise_conv op quantization has zp outside of input " + "range"); + + zeroAttr = rewriter.getIntegerAttr(inputETy, iZp); + } + + llvm::SmallVector pad; + pad.resize(2, 0); + getValuesFromIntArrayAttribute(padAttr, pad); + pad.resize(pad.size() + 2, 0); + + input = applyPad(loc, input, pad, zeroAttr, rewriter); + + // Extract the attributes for convolution. + llvm::SmallVector stride, dilation; + getValuesFromIntArrayAttribute(strideTosaAttr, stride); + getValuesFromIntArrayAttribute(dilationTosaAttr, dilation); + + // Create the convolution op. + auto strideAttr = DenseIntElementsAttr::get( + RankedTensorType::get({2}, rewriter.getI64Type()), stride); + auto dilationAttr = DenseIntElementsAttr::get( + RankedTensorType::get({2}, rewriter.getI64Type()), dilation); + ShapedType linalgConvTy = + RankedTensorType::get({resultShape[0], resultShape[1], resultShape[2], + weightShape[2], weightShape[3]}, + resultETy); + + // Broadcast the initial value to the output tensor before convolving. + SmallVector indexingMaps; + indexingMaps.push_back(AffineMap::get( + /*dimCount=*/resultTy.getRank(), /*symbolCount=*/0, + {rewriter.getAffineDimExpr(3)}, rewriter.getContext())); + indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); + indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); + + Attribute resultZeroAttr = rewriter.getZeroAttr(resultETy); + Value initTensor = rewriter.create( + loc, linalgConvTy.getShape(), resultETy); + Value zero = rewriter.create(loc, resultZeroAttr); + Value zeroTensor = + rewriter.create(loc, zero, initTensor).getResult(0); + + Value biasInitTensor = rewriter.create( + loc, resultTy.getShape(), resultETy); + if (!isQuantized) { + Value conv = rewriter + .create( + loc, linalgConvTy, ValueRange{input, weight}, + ValueRange{zeroTensor}, strideAttr, dilationAttr) + .getResult(0); + Value convReshape = rewriter.create( + loc, resultTy, conv, rewriter.getI64ArrayAttr(resultTy.getShape())); + Value result = + rewriter + .create( + loc, resultTy, ValueRange({bias, convReshape}), + biasInitTensor, indexingMaps, + getNParallelLoopsAttrs(resultTy.getRank()), + [&](OpBuilder &nestedBuilder, Location nestedLoc, + ValueRange args) { + Value added = nestedBuilder.create( + loc, args[0], args[1]); + nestedBuilder.create(nestedLoc, added); + }) + .getResult(0); + rewriter.replaceOp(op, result); + } else { + auto iZpVal = rewriter.create(loc, iZp); + auto kZpVal = rewriter.create(loc, kZp); + Value conv = + rewriter + .create( + loc, linalgConvTy, ValueRange{input, weight, iZpVal, kZpVal}, + ValueRange{zeroTensor}, strideAttr, dilationAttr) + .getResult(0); + Value convReshape = rewriter.create( + loc, resultTy, conv, rewriter.getI64ArrayAttr(resultTy.getShape())); + Value result = + rewriter + .create( + loc, resultTy, ValueRange({bias, convReshape}), + biasInitTensor, indexingMaps, + getNParallelLoopsAttrs(resultTy.getRank()), + [&](OpBuilder &nestedBuilder, Location nestedLoc, + ValueRange args) { + Value added = nestedBuilder.create( + loc, args[0], args[1]); + nestedBuilder.create(nestedLoc, added); + }) + .getResult(0); + rewriter.replaceOp(op, result); + } + return success(); + } +}; + +class MatMulConverter : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(tosa::MatMulOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const final { + Location loc = op.getLoc(); + + auto outputTy = op.getType().cast(); + auto outputElementTy = outputTy.getElementType(); + + auto firstOperandTy = op->getOperand(0).getType().cast(); + auto secondOperandTy = op->getOperand(1).getType().cast(); + + SmallVector dynDims; + dynDims.resize(op->getResult(0).getType().cast().getRank()); + + if (!firstOperandTy.hasRank() || firstOperandTy.isDynamicDim(0)) { + dynDims[0] = rewriter.create(loc, op->getOperand(0), 0); + } + + if (!firstOperandTy.hasRank() || firstOperandTy.isDynamicDim(1)) { + dynDims[1] = rewriter.create(loc, op->getOperand(0), 1); + } + + if (!secondOperandTy.hasRank() || secondOperandTy.isDynamicDim(2)) { + dynDims[2] = rewriter.create(loc, op->getOperand(1), 2); + } + + SmallVector filteredDims = filterDynamicDims(dynDims); + + auto zeroAttr = rewriter.getZeroAttr(outputElementTy); + Value zero = rewriter.create(loc, zeroAttr); + auto initTensor = rewriter.create( + loc, filteredDims, outputTy.getShape(), outputTy.getElementType()); + Value zeroTensor = + rewriter.create(loc, zero, initTensor).getResult(0); + if (!op.quantization_info()) { + rewriter.replaceOpWithNewOp( + op, TypeRange{op.getType()}, ValueRange{adaptor.a(), adaptor.b()}, + ValueRange{zeroTensor}); + return success(); + } + + auto quantizationInfo = op.quantization_info().getValue(); + auto aZp = rewriter.create( + loc, rewriter.getI32IntegerAttr( + quantizationInfo.a_zp().getValue().getSExtValue())); + auto bZp = rewriter.create( + loc, rewriter.getI32IntegerAttr( + quantizationInfo.b_zp().getValue().getSExtValue())); + rewriter.replaceOpWithNewOp( + op, TypeRange{op.getType()}, + ValueRange{adaptor.a(), adaptor.b(), aZp, bZp}, zeroTensor); + + return success(); + } +}; + +class FullyConnectedConverter + : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(tosa::FullyConnectedOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const final { + Location loc = op.getLoc(); + auto outputTy = op.getType().cast(); + auto input = op.input(); + auto inputTy = input.getType().cast(); + + auto bias = op.bias(); + + auto weight = op.weight(); + auto weightTy = weight.getType().cast(); + auto weightShape = weightTy.getShape(); + + auto outputETy = outputTy.getElementType(); + + SmallVector dynDims; + dynDims.resize(op->getResult(0).getType().cast().getRank()); + + if (!inputTy.hasRank() || inputTy.isDynamicDim(0)) { + dynDims[0] = rewriter.create(loc, input, 0); + } + + if (!weightTy.hasRank() || weightTy.isDynamicDim(0)) { + dynDims[1] = rewriter.create(loc, weight, 0); + } + + SmallVector filteredDims = filterDynamicDims(dynDims); + + // Creating maps for the output of MatMul and the bias + SmallVector indexingMaps; + + // Broadcast the bias. + indexingMaps.push_back(AffineMap::get(/*dimCount=*/2, /*symbolCount=*/0, + {rewriter.getAffineDimExpr(1)}, + rewriter.getContext())); + + indexingMaps.push_back(rewriter.getMultiDimIdentityMap(outputTy.getRank())); + indexingMaps.push_back(rewriter.getMultiDimIdentityMap(outputTy.getRank())); + + auto initTensor = rewriter.create( + loc, filteredDims, outputTy.getShape(), outputTy.getElementType()); + + // When quantized, the input elemeny type is not the same as the output + Attribute resultZeroAttr = rewriter.getZeroAttr(outputETy); + Value zero = rewriter.create(loc, resultZeroAttr); + Value zeroTensor = + rewriter.create(loc, zero, initTensor).getResult(0); + + SmallVector permutation{1, 0}; + auto permutationAttr = DenseIntElementsAttr::get( + RankedTensorType::get({2}, rewriter.getI64Type()), permutation); + Value permutationValue = + rewriter.create(loc, permutationAttr); + + SmallVector newWeightShape{weightShape[1], weightShape[0]}; + Type newWeightTy = + RankedTensorType::get(newWeightShape, weightTy.getElementType()); + + Value transposedWeight = rewriter.create( + loc, newWeightTy, weight, permutationValue); + + auto biasInitTensor = + rewriter + .create(loc, filteredDims, + outputTy.getShape(), outputETy) + ->getResults(); + + if (!op.quantization_info()) { + Value matmul = rewriter + .create( + loc, TypeRange{op.getType()}, + ValueRange{input, transposedWeight}, zeroTensor) + ->getResult(0); + + Value result = + rewriter + .create( + loc, outputTy, ValueRange({bias, matmul}), biasInitTensor, + indexingMaps, getNParallelLoopsAttrs(outputTy.getRank()), + [&](OpBuilder &nestedBuilder, Location nestedLoc, + ValueRange args) { + Value added = nestedBuilder.create( + loc, args[0], args[1]); + nestedBuilder.create(nestedLoc, added); + }) + .getResult(0); + rewriter.replaceOp(op, result); + return success(); + } + + auto quantizationInfo = op.quantization_info().getValue(); + auto inputZp = rewriter.create( + loc, rewriter.getI32IntegerAttr( + quantizationInfo.input_zp().getValue().getSExtValue())); + auto outputZp = rewriter.create( + loc, rewriter.getI32IntegerAttr( + quantizationInfo.weight_zp().getValue().getSExtValue())); + Value matmul = + rewriter + .create( + loc, TypeRange{op.getType()}, + ValueRange{input, transposedWeight, inputZp, outputZp}, + zeroTensor) + ->getResult(0); + Value result = + rewriter + .create( + loc, outputTy, ValueRange({bias, matmul}), biasInitTensor, + indexingMaps, getNParallelLoopsAttrs(outputTy.getRank()), + [&](OpBuilder &nestedBuilder, Location nestedLoc, + ValueRange args) { + Value added = nestedBuilder.create( + loc, args[0], args[1]); + nestedBuilder.create(nestedLoc, added); + }) + .getResult(0); + rewriter.replaceOp(op, result); + return success(); + } +}; + +class MaxPool2dConverter : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(tosa::MaxPool2dOp op, + PatternRewriter &rewriter) const final { + Location loc = op.getLoc(); + Value input = op.input(); + ShapedType inputTy = input.getType().cast(); + + ShapedType resultTy = op.getType().template cast(); + Type resultETy = inputTy.getElementType(); + + if (!inputTy.hasStaticShape()) + return failure(); + + // Determine what the initial value needs to be for the max pool op. + Attribute initialAttr; + if (resultETy.isF32()) + initialAttr = rewriter.getFloatAttr( + resultETy, + APFloat::getLargest(resultETy.cast().getFloatSemantics(), + true)); + + if (resultETy.isa()) + initialAttr = rewriter.getIntegerAttr( + resultETy, + APInt::getSignedMinValue(resultETy.getIntOrFloatBitWidth())); + + if (!initialAttr) + return rewriter.notifyMatchFailure( + op, "Unsupported initial value for tosa.maxpool_2d op"); + + // Apply padding as necessary. + llvm::SmallVector pad; + pad.resize(2, 0); + getValuesFromIntArrayAttribute(op.pad(), pad); + pad.resize(pad.size() + 2, 0); + Value paddedInput = applyPad(loc, input, pad, initialAttr, rewriter); + + Value initialValue = rewriter.create(loc, initialAttr); + + SmallVector kernel, stride; + getValuesFromIntArrayAttribute(op.kernel(), kernel); + getValuesFromIntArrayAttribute(op.stride(), stride); + + Attribute strideAttr = rewriter.getI64VectorAttr(stride); + Attribute dilationAttr = rewriter.getI64VectorAttr({1, 1}); + + // Create the linalg op that performs pooling. + Value initTensor = rewriter.create( + loc, resultTy.getShape(), resultTy.getElementType()); + + Value filledInitTensor = + rewriter.create(loc, initialValue, initTensor).result(); + + Value fakeWindowDims = + rewriter.create(loc, kernel, resultETy); + + rewriter.replaceOpWithNewOp( + op, ArrayRef{resultTy}, ValueRange{paddedInput, fakeWindowDims}, + filledInitTensor, strideAttr, dilationAttr); + return success(); + } +}; + +class AvgPool2dConverter : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(tosa::AvgPool2dOp op, + PatternRewriter &rewriter) const final { + Location loc = op.getLoc(); + Value input = op.input(); + ShapedType inputTy = input.getType().cast(); + Type inElementTy = inputTy.getElementType(); + + ShapedType resultTy = op.getType().template cast(); + Type resultETy = op.getType().cast().getElementType(); + + Type accETy = + inElementTy.isa() ? rewriter.getI32Type() : inElementTy; + ShapedType accTy = resultTy.clone(accETy); + + if (!inputTy.hasStaticShape()) + return failure(); + + // Apply padding as necessary. + llvm::SmallVector pad; + pad.resize(2, 0); + getValuesFromIntArrayAttribute(op.pad(), pad); + pad.resize(pad.size() + 2, 0); + Attribute padAttr = rewriter.getZeroAttr(inElementTy); + Value paddedInput = applyPad(loc, input, pad, padAttr, rewriter); + + Attribute initialAttr = rewriter.getZeroAttr(accETy); + Value initialValue = rewriter.create(loc, initialAttr); + + SmallVector kernel, stride; + getValuesFromIntArrayAttribute(op.kernel(), kernel); + getValuesFromIntArrayAttribute(op.stride(), stride); + + Attribute strideAttr = rewriter.getI64VectorAttr(stride); + Attribute dilationAttr = rewriter.getI64VectorAttr({1, 1}); + + // Create the linalg op that performs pooling. + Value poolInitTensor = + rewriter.create(loc, accTy.getShape(), accETy); + + Value filledInitTensor = + rewriter.create(loc, initialValue, poolInitTensor) + .result(); + + Value fakeWindowDims = + rewriter.create(loc, kernel, accETy); + + // Sum across the pooled region. + Value poolingOp = rewriter + .create( + loc, ArrayRef{accTy}, + ValueRange{paddedInput, fakeWindowDims}, + filledInitTensor, strideAttr, dilationAttr) + .getResult(0); + + // Normalize the summed value by the number of elements grouped in each + // pool. + auto poolingOpTy = poolingOp.getType().cast(); + auto affineMap = rewriter.getMultiDimIdentityMap(resultTy.getRank()); + + Value genericInitTensor = rewriter.create( + loc, resultTy.getShape(), resultETy); + + auto genericOp = rewriter.create( + loc, ArrayRef({resultTy}), ValueRange{poolingOp}, + ValueRange{genericInitTensor}, + ArrayRef({affineMap, affineMap}), + getNParallelLoopsAttrs(resultTy.getRank()), + [&](OpBuilder &b, Location loc, ValueRange args) { + auto zero = rewriter.create(loc, 0); + auto one = rewriter.create(loc, 1); + auto iH = rewriter.create( + loc, poolingOpTy.getDimSize(1) - 1); + auto iW = rewriter.create( + loc, poolingOpTy.getDimSize(2) - 1); + + // Compute the indices from either end. + auto y0 = rewriter.create(loc, 1); + auto x0 = rewriter.create(loc, 2); + auto y1 = rewriter.create(loc, iH, y0); + auto x1 = rewriter.create(loc, iW, x0); + + // Determines what the portion of valid input is covered by the + // kernel. + auto padFn = [&](Value v, Value x, int64_t pad) -> Value { + if (pad == 0) + return v; + + auto padVal = rewriter.create(loc, pad); + Value dx = rewriter.create(loc, x, padVal); + + Value cmp = rewriter.create( + loc, arith::CmpIPredicate::slt, dx, zero); + Value offset = rewriter.create(loc, cmp, dx, zero); + return rewriter.create(loc, v, offset)->getResult(0); + }; + + // Compute the vertical component of coverage. + auto kH0 = rewriter.create(loc, kernel[0]); + auto kH1 = padFn(kH0, y0, pad[2]); + auto kH2 = padFn(kH1, y1, pad[3]); + auto kHCmp = rewriter.create( + loc, arith::CmpIPredicate::slt, kH2, one); + auto kH3 = rewriter.create(loc, kHCmp, one, kH2); + + // compute the horizontal component of coverage. + auto kW0 = rewriter.create(loc, kernel[1]); + auto kW1 = padFn(kW0, x0, pad[4]); + auto kW2 = padFn(kW1, x1, pad[5]); + auto kWCmp = rewriter.create( + loc, arith::CmpIPredicate::slt, kW2, one); + auto kW3 = rewriter.create(loc, kWCmp, one, kW2); + + // Compute the total number of elements and normalize. + Value count = rewriter.create(loc, kH3, kW3); + auto countI = rewriter.create( + loc, rewriter.getI32Type(), count); + + // Divide by the number of summed values. For floats this is just + // a div however for quantized values input normalization had + // to be applied. + Value poolVal = args[0]; + if (accETy.isa()) { + auto countF = rewriter.create(loc, accETy, countI); + poolVal = rewriter.create(loc, poolVal, countF) + ->getResult(0); + } else { + + // If we have quantization information we need to apply an offset + // for the input zp value. + if (op.quantization_info()) { + auto quantizationInfo = op.quantization_info().getValue(); + auto inputZp = rewriter.create( + loc, quantizationInfo.input_zp()); + Value offset = + rewriter.create(loc, accETy, countI, inputZp); + poolVal = + rewriter.create(loc, accETy, poolVal, offset); + } + + // Compute the multiplier and shift values for the quantization + // normalization. Preferably we would want to compute more bits + // however 32-bits should be enough for compute. Honestly we + // should probably straight divide. + int64_t numerator = ((1 << 30) + 1); + int64_t shift = 30; + + Value numeratorVal = rewriter.create( + loc, rewriter.getI32IntegerAttr(numerator)); + Value multiplierVal = + rewriter + .create(loc, rewriter.getI32Type(), + numeratorVal, countI) + .getResult(); + Value shiftVal = rewriter.create( + loc, rewriter.getI8IntegerAttr(shift)); + + auto scaled = + rewriter + .create( + loc, rewriter.getI32Type(), poolVal, multiplierVal, + shiftVal, rewriter.getBoolAttr(false)) + .getResult(); + + // If we have quantization information we need to apply output + // zeropoint. + if (op.quantization_info()) { + auto quantizationInfo = op.quantization_info().getValue(); + auto outputZp = rewriter.create( + loc, quantizationInfo.output_zp()); + scaled = rewriter.create(loc, scaled, outputZp) + .getResult(); + } + + // Apply Clip. + int64_t outBitwidth = resultETy.getIntOrFloatBitWidth(); + + auto min = rewriter.create( + loc, APInt::getSignedMinValue(outBitwidth).getSExtValue(), + accETy); + auto max = rewriter.create( + loc, APInt::getSignedMaxValue(outBitwidth).getSExtValue(), + accETy); + auto clamp = clampHelper( + loc, scaled, min, max, arith::CmpIPredicate::slt, rewriter); + + poolVal = clamp; + // Convert type. + if (resultETy != clamp.getType()) { + poolVal = + rewriter.create(loc, resultETy, poolVal); + } + } + + rewriter.create(loc, poolVal); + }); + + rewriter.replaceOp(op, genericOp.getResult(0)); + return success(); + } +}; + +} // namespace + +void mlir::tosa::populateTosaToLinalgNamedConversionPatterns( + RewritePatternSet *patterns) { + patterns->add< + // clang-format off + ConvConverter, + DepthwiseConvConverter, + MatMulConverter, + MaxPool2dConverter, + AvgPool2dConverter, + FullyConnectedConverter>(patterns->getContext()); + // clang-format on +} diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamedPass.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamedPass.cpp new file mode 100644 index 000000000000..f5f6ac1a5469 --- /dev/null +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamedPass.cpp @@ -0,0 +1,68 @@ +//===- TosaToLinalgPass.cpp - Lowering Tosa to Linalg Dialect -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This transformation pass legalizes Tosa operations to the Linalg dialect. +// +//===----------------------------------------------------------------------===// + +#include "../PassDetail.h" +#include "mlir/Conversion/TosaToLinalg/TosaToLinalg.h" +#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/Math/IR/Math.h" +#include "mlir/Dialect/SCF/SCF.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Tosa/IR/TosaOps.h" +#include "mlir/Dialect/Tosa/Transforms/PassDetail.h" +#include "mlir/Dialect/Tosa/Transforms/Passes.h" +#include "mlir/Dialect/Tosa/Utils/QuantUtils.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Pass/PassManager.h" +#include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" + +using namespace mlir; + +namespace { +struct TosaToLinalgNamed : public TosaToLinalgNamedBase { +public: + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } + + void runOnFunction() override { + RewritePatternSet patterns(&getContext()); + ConversionTarget target(getContext()); + target.addLegalDialect(); + + // Not every TOSA op can be legalized to linalg. + target.addIllegalOp(); + target.addIllegalOp(); + target.addIllegalOp(); + target.addIllegalOp(); + target.addIllegalOp(); + target.addIllegalOp(); + + target.markUnknownOpDynamicallyLegal([](Operation *) { return true; }); + + FuncOp func = getFunction(); + mlir::tosa::populateTosaToLinalgNamedConversionPatterns(&patterns); + if (failed(applyFullConversion(func, target, std::move(patterns)))) + signalPassFailure(); + } +}; +} // namespace + +std::unique_ptr mlir::tosa::createTosaToLinalgNamed() { + return std::make_unique(); +} diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgPass.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgPass.cpp index 8f4f872c8860..3813ba345137 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgPass.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgPass.cpp @@ -26,6 +26,7 @@ #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "mlir/Transforms/Passes.h" using namespace mlir; @@ -67,6 +68,9 @@ std::unique_ptr mlir::tosa::createTosaToLinalg() { } void mlir::tosa::addTosaToLinalgPasses(OpPassManager &pm) { + pm.addNestedPass(createTosaMakeBroadcastablePass()); + pm.addNestedPass(createTosaToLinalgNamed()); + pm.addNestedPass(mlir::createCanonicalizerPass()); pm.addNestedPass(createTosaMakeBroadcastablePass()); pm.addNestedPass(createTosaToLinalg()); } diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir new file mode 100644 index 000000000000..f5814883cc49 --- /dev/null +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir @@ -0,0 +1,448 @@ +// RUN: mlir-opt --split-input-file --tosa-to-linalg-named %s -verify-diagnostics -o -| FileCheck %s + +// CHECK-LABEL: @matmul +func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) { + // CHECK: [[C0:%.+]] = arith.constant 0 + // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6] + // CHECK: [[FILLED:%.+]] = linalg.fill([[C0]], [[INIT]]) : f32, tensor<1x5x6xf32> -> tensor<1x5x6xf32> + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) outs([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> + %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) + return %0 : tensor<1x5x6xf32> +} + +// ----- + + +// CHECK-LABEL: @matmul_quantized +func @matmul_quantized(%arg0: tensor<1x5x3xi8>, %arg1: tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) { + // CHECK: [[C0:%.+]] = arith.constant 0 + // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6] + // CHECK: [[FILLED:%.+]] = linalg.fill([[C0]], [[INIT]]) : i32, tensor<1x5x6xi32> -> tensor<1x5x6xi32> + // CHECK: [[ONE:%.+]] = arith.constant 1 + // CHECK: [[TWO:%.+]] = arith.constant 2 + // CHECK: linalg.quantized_batch_matmul ins(%arg0, %arg1, [[ONE]], [[TWO]] : tensor<1x5x3xi8>, tensor<1x3x6xi8>, i32, i32) outs([[FILLED]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32> + %0 = "tosa.matmul"(%arg0, %arg1) {quantization_info = {a_zp = 1 : i32, b_zp = 2 : i32}} : (tensor<1x5x3xi8>, tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) + return %0 : tensor<1x5x6xi32> +} + +// ----- + +// CHECK-LABEL: @matmul_dyn_batch +func @matmul_dyn_batch(%arg0: tensor, %arg1: tensor) -> (tensor) { + // CHECK: %[[C0:.+]] = arith.constant 0 + // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]] + // CHECK: %[[C0_0:.+]] = arith.constant 0 + // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DIM]], 5, 6] + // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0_0]], %[[INIT]]) : f32, tensor -> tensor + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor, tensor) outs(%[[FILLED]] : tensor) -> tensor + %0 = "tosa.matmul"(%arg0, %arg1) : (tensor, tensor) -> (tensor) + return %0 : tensor +} + +// ----- + +// CHECK-LABEL: @matmul_dyn_independent_dim +func @matmul_dyn_independent_dim(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x?xf32>) -> (tensor<1x5x?xf32>) { + // CHECK: %[[C2:.+]] = arith.constant 2 + // CHECK: %[[DIM:.+]] = tensor.dim %arg1, %[[C2]] + // CHECK: %[[C0:.+]] = arith.constant 0 + // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 5, %[[DIM]]] + // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0]], %[[INIT]]) : f32, tensor<1x5x?xf32> -> tensor<1x5x?xf32> + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x?xf32>) outs(%[[FILLED]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32> + %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x?xf32>) -> (tensor<1x5x?xf32>) + return %0 : tensor<1x5x?xf32> +} + +// ----- + +// CHECK-LABEL: @matmul_dyn_independent_dim +func @matmul_dyn_independent_dim(%arg0: tensor<1x5x?xf32>, %arg1: tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) { + // CHECK: %[[C0:.+]] = arith.constant 0 + // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 5, 6] + // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0]], %[[INIT]]) : f32, tensor<1x5x6xf32> -> tensor<1x5x6xf32> + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) outs(%[[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> + %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x?xf32>, tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) + return %0 : tensor<1x5x6xf32> +} + +// ----- + +// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)> +// CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0, d1)> + +// CHECK-LABEL: @fully_connected +func @fully_connected(%arg0: tensor<5x3xf32>, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor<5x6xf32>) { + // CHECK: [[INITT:%.+]] = linalg.init_tensor [5, 6] + // CHECK: [[ZERO:%.+]] = arith.constant 0 + // CHECK: [[FILL:%.+]] = linalg.fill([[ZERO]], [[INITT]]) + // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]> + // CHECK: [[TRANSPOSE:%.+]] = "tosa.transpose"(%arg1, [[PERM]]) + // CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6] + // CHECK: [[MATMUL:%.+]] = linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) outs([[FILL]] : tensor<5x6xf32>) -> tensor<5x6xf32> + // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xf32>, tensor<5x6xf32>) outs([[INITB]] : tensor<5x6xf32>) { + // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): + // CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32 + // CHECK: linalg.yield [[ADD]] : f32 + + %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) : (tensor<5x3xf32>, tensor<6x3xf32>, tensor<6xf32>) -> (tensor<5x6xf32>) + return %0 : tensor<5x6xf32> +} + +// ----- + +// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)> +// CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0, d1)> + +// CHECK-LABEL: @quantized_fully_connected +func @quantized_fully_connected(%arg0: tensor<5x3xi8>, %arg1: tensor<6x3xi8>, %arg2: tensor<6xi32>) -> (tensor<5x6xi32>) { + // CHECK: [[INITT:%.+]] = linalg.init_tensor [5, 6] + // CHECK: [[ZERO:%.+]] = arith.constant 0 + // CHECK: [[FILL:%.+]] = linalg.fill([[ZERO]], [[INITT]]) + // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]> + // CHECK: [[TRANSPOSE:%.+]] = "tosa.transpose"(%arg1, [[PERM]]) + // CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6] + // CHECK: [[ONE:%.+]] = arith.constant 1 + // CHECK: [[TWO:%.+]] = arith.constant 2 + // CHECK: [[MATMUL:%.+]] = linalg.quantized_matmul ins(%arg0, [[TRANSPOSE]], [[ONE]], [[TWO]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) outs([[FILL]] : tensor<5x6xi32>) -> tensor<5x6xi32> + // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xi32>, tensor<5x6xi32>) outs([[INITB]] + // CHECK: ^bb0([[IN1:%.+]]: i32, [[IN2:%.+]]: i32, [[UNUSED:%.+]]: i32): + // CHECK: [[ADD:%.+]] = arith.addi + // CHECK: linalg.yield [[ADD]] : i32 + %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) {quantization_info = {input_zp = 1:i32, weight_zp = 2:i32}} : (tensor<5x3xi8>, tensor<6x3xi8>, tensor<6xi32>) -> (tensor<5x6xi32>) + return %0 : tensor<5x6xi32> +} + +// ----- + +// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)> +// CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0, d1)> + +// CHECK-LABEL: @fully_connected_dyn +func @fully_connected_dyn(%arg0: tensor, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor) { + // CHECK: %[[C0:.+]] = arith.constant 0 + // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]] + // CHECK: %[[INITT:.+]] = linalg.init_tensor [%[[DIM]], 6] + // CHECK: %[[ZERO:.+]] = arith.constant 0 + // CHECK: %[[FILL:.+]] = linalg.fill(%[[ZERO]], %[[INITT]]) + // CHECK: %[[PERM:.+]] = arith.constant dense<[1, 0]> + // CHECK: %[[TRANSPOSE:.+]] = "tosa.transpose"(%arg1, %[[PERM]]) + // CHECK: %[[INITB:.+]] = linalg.init_tensor [%[[DIM]], 6] + // CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%arg0, %[[TRANSPOSE]] : tensor, tensor<3x6xf32>) outs(%[[FILL]] : tensor) -> tensor + // CHECK: %[[ADDED:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, %[[MATMUL]] : tensor<6xf32>, tensor) outs(%[[INITB]] : tensor) { + // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): + // CHECK: %[[ADD:.+]] = arith.addf %arg3, %arg4 : f32 + // CHECK: linalg.yield %[[ADD]] : f32 + + %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) : (tensor, tensor<6x3xf32>, tensor<6xf32>) -> (tensor) + return %0 : tensor +} + +// ----- + +// CHECK-LABEL: @max_pool +func @max_pool(%arg0: tensor<1x6x34x62xf32>) -> () { + // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38 + // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 32, 62] + // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[CONST]], [[INIT]]) + // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3] + // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, [[KERNEL]] : tensor<1x6x34x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x32x62xf32>) + %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x4x32x62xf32>) + return +} + +// CHECK-LABEL: @max_pool_padded +func @max_pool_padded(%arg0: tensor<1x6x34x62xf32>) -> () { + // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38 : f32 + // CHECK-DAG: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 0, 0, 0] high[0, 0, 1, 0] + // CHECK-DAG: linalg.yield [[CONST]] + // CHECK-DAG: [[INITVAL:%.+]] = arith.constant -3.40282347E+38 : f32 + // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 33, 62] + // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[INITVAL]], [[INIT]]) + // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3] + // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x6x35x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x33x62xf32>) + %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 1], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x4x33x62xf32>) + return +} + +// CHECK-LABEL: @max_pool_i8 +func @max_pool_i8(%arg0: tensor<1x6x34x62xi8>) -> () { + // CHECK: arith.constant -128 + // CHECK: linalg.pooling_nhwc_max + %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi8>) -> (tensor<1x4x32x62xi8>) + return +} + +// CHECK-LABEL: @max_pool_i16 +func @max_pool_i16(%arg0: tensor<1x6x34x62xi16>) -> () { + // CHECK: arith.constant -32768 + // CHECK: linalg.pooling_nhwc_max + %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi16>) -> (tensor<1x4x32x62xi16>) + return +} + +// CHECK-LABEL: @max_pool_i32 +func @max_pool_i32(%arg0: tensor<1x6x34x62xi32>) -> () { + // CHECK: arith.constant -2147483648 + // CHECK: linalg.pooling_nhwc_max + %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi32>) -> (tensor<1x4x32x62xi32>) + return +} +// ----- + +// CHECK-LABEL: @avg_pool +func @avg_pool(%arg0: tensor<1x6x34x62xf32>) -> (tensor<1x5x33x62xf32>) { + // Initial piece computes the sum of the pooling region, with appropriate padding. + // CHECK: [[CONST:%.+]] = arith.constant 0 + // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] + // CHECK: [[CONST:%.+]] = arith.constant 0 + // CHECK: [[POOLINIT:%.+]] = linalg.init_tensor [1, 5, 33, 62] + // CHECK: [[FILL:%.+]] = linalg.fill([[CONST]], [[POOLINIT]]) + // CHECK: [[KERNEL:%.+]] = linalg.init_tensor [4, 4] + // CHECK: [[POOL:%.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x8x36x62xf32>, tensor<4x4xf32>) outs([[FILL]] : tensor<1x5x33x62xf32>) + // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 33, 62] + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins([[POOL]] : tensor<1x5x33x62xf32>) outs([[INIT]] : tensor<1x5x33x62xf32>) + // CHECK: [[ZERO:%.0]] = arith.constant 0 + // CHECK: [[ONE:%.+]] = arith.constant 1 + // CHECK: [[HEIGHT:%.+]] = arith.constant 4 + // CHECK: [[WIDTH:%.+]] = arith.constant 32 + // CHECK: [[IDX1:%.+]] = linalg.index 1 + // CHECK: [[IDX2:%.+]] = linalg.index 2 + + // The large block below computes what portion of the kernel is within non-padded input. + // CHECK: [[NY:%.+]] = arith.subi [[HEIGHT]], [[IDX1]] + // CHECK: [[NX:%.+]] = arith.subi [[WIDTH]], [[IDX2]] + // CHECK: [[KH:%.+]] = arith.constant 4 + // CHECK: [[PAD0:%.+]] = arith.constant 1 + // CHECK: [[SUBP0:%.+]] = arith.subi [[IDX1]], [[PAD0]] + // CHECK: [[P0CMP:%.+]] = arith.cmpi slt, [[SUBP0]], [[ZERO]] + // CHECK: [[SELP0:%.+]] = select [[P0CMP]], [[SUBP0]], [[ZERO]] + // CHECK: [[ADDP0:%.+]] = arith.addi [[KH]], [[SELP0]] + // CHECK: [[PAD1:%.+]] = arith.constant 1 + // CHECK: [[SUBP1:%.+]] = arith.subi [[NY]], [[PAD1]] + // CHECK: [[P1CMP:%.+]] = arith.cmpi slt, [[SUBP1]], [[ZERO]] + // CHECK: [[SELP1:%.+]] = select [[P1CMP]], [[SUBP1]], [[ZERO]] + // CHECK: [[ADDP1:%.+]] = arith.addi [[ADDP0]], [[SELP1]] + // CHECK: [[YCMP:%.+]] = arith.cmpi slt, [[ADDP1]], [[ONE]] + // CHECK: [[YSEL:%.+]] = select [[YCMP]], [[ONE]], [[ADDP1]] + // CHECK: [[KW:%.+]] = arith.constant 4 : index + // CHECK: [[PAD2:%.+]] = arith.constant 1 : index + // CHECK: [[SUBP2:%.+]] = arith.subi [[IDX2]], [[PAD2]] + // CHECK: [[P2CMP:%.+]] = arith.cmpi slt, [[SUBP2]], [[ZERO]] + // CHECK: [[SELP2:%.+]] = select [[P2CMP]], [[SUBP2]], [[ZERO]] + // CHECK: [[ADDP2:%.+]] = arith.addi [[KW]], [[SELP2]] + // CHECK: [[PAD3:%.+]] = arith.constant 1 : index + // CHECK: [[SUBP3:%.+]] = arith.subi [[NX]], [[PAD3]] + // CHECK: [[P3CMP:%.+]] = arith.cmpi slt, [[SUBP3]], [[ZERO]] + // CHECK: [[SELP3:%.+]] = select [[P3CMP]], [[SUBP3]], [[ZERO]] + // CHECK: [[ADDP3:%.+]] = arith.addi [[ADDP2]], [[SELP3]] + // CHECK: [[XCMP:%.+]] = arith.cmpi slt, [[ADDP3]], [[ONE]] + // CHECK: [[XSEL:%.+]] = select [[XCMP]], [[ONE]], [[ADDP3]] + + // Given the valid coverage of the pooling region, normalize the summation. + // CHECK: [[C:%.+]] = arith.muli [[YSEL]], [[XSEL]] + // CHECK: [[CI:%.+]] = arith.index_cast [[C]] + // CHECK: [[CF:%.+]] = arith.sitofp [[CI]] + // CHECK: [[RESULT:%.+]] = arith.divf %arg1, [[CF]] + // CHECK: linalg.yield [[RESULT]] + %0 = "tosa.avg_pool2d"(%arg0) {pad = [1, 1, 1, 1], kernel = [4, 4], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x5x33x62xf32>) + return %0 : tensor<1x5x33x62xf32> +} + +// ----- + +// CHECK-LABEL: @avg_pool_i8 +func @avg_pool_i8(%arg0 : tensor<1x128x128x2xi8>) -> () { + + // CHECK: linalg.pooling_nhwc_sum + // CHECK: linalg.generic + + // CHECK: %[[INZP:.+]] = arith.constant -128 + // CHECK: %[[INZP_OFF:.+]] = arith.muli %{{.+}}, %[[INZP]] + // CHECK: %[[OFFSETED:.+]] = arith.subi %arg1, %[[INZP_OFF]] + // CHECK: %[[NUMERATOR:.+]] = arith.constant 1073741825 + // CHECK: %[[MULTIPLIER:.+]] = arith.divui %[[NUMERATOR]], %{{.+}} + // CHECK: %[[SHIFT:.+]] = arith.constant 30 + // CHECK: %[[SCALE:.+]] = "tosa.apply_scale"(%{{.+}}, %[[MULTIPLIER]], %[[SHIFT]]) {double_round = false} + // CHECK: %[[OUTZP:.+]] = arith.constant -128 + // CHECK: %[[OUT:.+]] = arith.addi %[[SCALE]], %[[OUTZP]] + // CHECK: %[[MIN:.+]] = arith.constant -128 + // CHECK: %[[MAX:.+]] = arith.constant 127 + // CHECK: %[[CMP_MIN:.+]] = arith.cmpi slt, %[[OUT]], %[[MIN]] + // CHECK: %[[CLMP_MIN:.+]] = select %[[CMP_MIN]], %[[MIN]], %[[OUT]] + // CHECK: %[[CMP_MAX:.+]] = arith.cmpi slt, %[[MAX]], %[[OUT]] + // CHECK: %[[CLMP_MAX:.+]] = select %[[CMP_MAX]], %[[MAX]], %[[CLMP_MIN]] + // CHECK: %[[TRUNC:.+]] = arith.trunci %[[CLMP_MAX]] + // CHECK: linalg.yield %[[TRUNC]] + %0 = "tosa.avg_pool2d"(%arg0) {kernel = [4, 4], pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, output_zp = -128 : i32}, stride = [4, 4]} : (tensor<1x128x128x2xi8>) -> tensor<1x32x32x2xi8> + return +} + +// ----- + +// CHECK-LABEL: @avg_pool_i16 +func @avg_pool_i16(%arg0 : tensor<1x128x128x2xi16>) -> () { + + // CHECK: linalg.pooling_nhwc_sum + // CHECK: linalg.generic + + // CHECK: %[[INZP:.+]] = arith.constant -128 + // CHECK: %[[INZP_OFF:.+]] = arith.muli %{{.+}}, %[[INZP]] + // CHECK: %[[OFFSETED:.+]] = arith.subi %arg1, %[[INZP_OFF]] + // CHECK: %[[NUMERATOR:.+]] = arith.constant 1073741825 + // CHECK: %[[MULTIPLIER:.+]] = arith.divui %[[NUMERATOR]], %{{.+}} + // CHECK: %[[SHIFT:.+]] = arith.constant 30 + // CHECK: %[[SCALE:.+]] = "tosa.apply_scale"(%{{.+}}, %[[MULTIPLIER]], %[[SHIFT]]) {double_round = false} + // CHECK: %[[OUTZP:.+]] = arith.constant -128 + // CHECK: %[[OUT:.+]] = arith.addi %[[SCALE]], %[[OUTZP]] + // CHECK: %[[MIN:.+]] = arith.constant -32768 + // CHECK: %[[MAX:.+]] = arith.constant 32767 + // CHECK: %[[CMP_MIN:.+]] = arith.cmpi slt, %[[OUT]], %[[MIN]] + // CHECK: %[[CLMP_MIN:.+]] = select %[[CMP_MIN]], %[[MIN]], %[[OUT]] + // CHECK: %[[CMP_MAX:.+]] = arith.cmpi slt, %[[MAX]], %[[OUT]] + // CHECK: %[[CLMP_MAX:.+]] = select %[[CMP_MAX]], %[[MAX]], %[[CLMP_MIN]] + // CHECK: %[[TRUNC:.+]] = arith.trunci %[[CLMP_MAX]] + // CHECK: linalg.yield %[[TRUNC]] + %0 = "tosa.avg_pool2d"(%arg0) {kernel = [4, 4], pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, output_zp = -128 : i32}, stride = [4, 4]} : (tensor<1x128x128x2xi16>) -> tensor<1x32x32x2xi16> + return +} + +// ----- + +// CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d3)> +// CHECK: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> + +// CHECK-LABEL: @conv2d_f32 +func @conv2d_f32(%input: tensor<1x49x42x27xf32>, %weights: tensor<28x3x3x27xf32>, %bias: tensor<28xf32>) -> () { + // CHECK: %[[PERM:.+]] = arith.constant dense<[1, 2, 3, 0]> + // CHECK: %[[W:.+]] = "tosa.transpose"(%arg1, %[[PERM]]) + // CHECK: %[[M_IN:.+]] = linalg.init_tensor [1, 45, 40, 28] + // CHECK: %[[CST:.+]] = arith.constant 0 + // CHECK: %[[FILL:.+]] = linalg.fill + // CHECK: %[[B_IN:.+]] = linalg.init_tensor [1, 45, 40, 28] + // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<1x49x42x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<1x45x40x28xf32>) + // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x45x40x28xf32>) outs(%[[B_IN]] : tensor<1x45x40x28xf32>) + // CHECK: arith.addf + // CHECK: linalg.yield + %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [0, 0, 0, 0], stride = [1, 1], dilation = [2, 1]} : (tensor<1x49x42x27xf32>, tensor<28x3x3x27xf32>, tensor<28xf32>) -> (tensor<1x45x40x28xf32>) + return +} + +// ----- + +// CHECK-LABEL: @conv2d_padded_f32 +func @conv2d_padded_f32(%input: tensor<1x47x40x28xf32>, %weights: tensor<28x3x3x28xf32>, %bias: tensor<28xf32>) -> () { + // CHECK: %[[C0:.+]] = arith.constant 0 + // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] + // CHECK: linalg.yield %[[C0]] + // CHECK: linalg.conv_2d_nhwc_hwcf + %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [1, 1, 1, 1], stride = [1, 1], dilation = [2, 1]} : (tensor<1x47x40x28xf32>, tensor<28x3x3x28xf32>, tensor<28xf32>) -> (tensor<1x45x40x28xf32>) + return +} + +// ----- + +// CHECK-LABEL: @conv2d_quant +func @conv2d_quant(%arg0 : tensor<1x12x12x1xi8>, %arg1 : tensor<1024x3x3x1xi8>, %arg2 : tensor<1024xi32>) -> () { + // CHECK: %[[C22:.+]] = arith.constant -22 + // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] + // CHECK: linalg.yield %[[C22]] + // CHECK: linalg.conv_2d_nhwc_hwcf_q + %0 = "tosa.conv2d"(%arg0, %arg1, %arg2) {dilation = [1, 1], pad = [1, 1, 1, 1], quantization_info = {input_zp = -22 : i32, weight_zp = 42 : i32}, stride = [1, 1]} : (tensor<1x12x12x1xi8>, tensor<1024x3x3x1xi8>, tensor<1024xi32>) -> tensor<1x12x12x1024xi32> + return +} + +// ----- + +// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> +// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> + +// CHECK-LABEL: @depthwise_conv +func @depthwise_conv(%arg0 : tensor<1x7x5x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () { + // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 5, 3, 11] + // CHECK: [[CST0:%.+]] = arith.constant 0 + // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) + // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33] + // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>) + // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 5, 5, 33]} + // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) { + // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors + // CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32 + // CHECK: linalg.yield [[ADD]] : f32 + // CHECK: } -> tensor<1x5x5x33xf32> + %2 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) { pad = [0, 0, 0, 0], stride = [1, 1], dilation = [1, 1] } : (tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>, tensor<33xf32>) -> (tensor<1x5x5x33xf32>) + return +} + +// ----- + +// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> +// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> + +// CHECK-LABEL: @depthwise_conv_strides +func @depthwise_conv_strides(%arg0 : tensor<1x11x9x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () { + // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 5, 3, 11] + // CHECK: [[CST0:%.+]] = arith.constant 0 + // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) + // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33] + // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>) + // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 5, 5, 33]} + // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) { + // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors + // CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32 + // CHECK: linalg.yield [[ADD]] : f32 + // CHECK: } -> tensor<1x5x5x33xf32> + %2 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) { pad = [0, 0, 0, 0], stride = [2, 2], dilation = [1, 1] } : (tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>, tensor<33xf32>) -> (tensor<1x5x5x33xf32>) + return +} + +// ----- + +// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> +// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> + +// CHECK-LABEL: @depthwise_conv_quant +func @depthwise_conv_quant(%arg0 : tensor<1x12x12x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () { + // CHECK: [[PADV:%.+]] = arith.constant -128 + // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] + // CHECK: linalg.yield [[PADV]] + + // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 12, 12, 4, 128] + // CHECK: [[CST0:%.+]] = arith.constant 0 + // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) + // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 12, 12, 512] + // CHECK: [[C128:%.+]] = arith.constant -128 + // CHECK: [[C42:%.+]] = arith.constant 42 + // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x12x12x4x128xi32>) + // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 12, 12, 512]} + // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x12x12x512xi32>) outs([[OUT]] : tensor<1x12x12x512xi32>) { + // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32): // no predecessors + // CHECK: [[ADD:%.+]] = arith.addi %arg3, %arg4 : i32 + // CHECK: linalg.yield [[ADD]] : i32 + // CHECK: } -> tensor<1x12x12x512xi32> + %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = [1, 1, 1, 1], quantization_info = {input_zp = -128 : i32, weight_zp = 42 : i32}, stride = [1, 1], dilation = [1, 1] } : (tensor<1x12x12x4xi8>, tensor<3x3x4x128xi8>, tensor<512xi32>) -> tensor<1x12x12x512xi32> + return +} + +// ----- + +// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> +// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> + +// CHECK-LABEL: @depthwise_conv_quant_dilations +func @depthwise_conv_quant_dilations(%arg0 : tensor<1x14x14x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () { + // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 10, 10, 4, 128] + // CHECK: [[CST0:%.+]] = arith.constant 0 + // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) + // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 10, 10, 512] + // CHECK: [[C128:%.+]] = arith.constant -128 + // CHECK: [[C42:%.+]] = arith.constant 42 + // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x10x10x4x128xi32>) + // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 10, 10, 512]} + // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x10x10x512xi32>) outs([[OUT]] : tensor<1x10x10x512xi32>) { + // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32): // no predecessors + // CHECK: [[ADD:%.+]] = arith.addi %arg3, %arg4 : i32 + // CHECK: linalg.yield [[ADD]] : i32 + // CHECK: } -> tensor<1x10x10x512xi32> + %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, weight_zp = 42 : i32}, stride = [1, 1], dilation = [2, 2] } : (tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, tensor<512xi32>) -> tensor<1x10x10x512xi32> + return +} diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir index f8dc2e0bbd08..e68e76c67ef9 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir @@ -1064,154 +1064,6 @@ func @tile(%arg0 : tensor<2x3xi8>) -> () { // ----- - -// CHECK-LABEL: @matmul -func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) { - // CHECK: [[C0:%.+]] = arith.constant 0 - // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6] - // CHECK: [[FILLED:%.+]] = linalg.fill([[C0]], [[INIT]]) : f32, tensor<1x5x6xf32> -> tensor<1x5x6xf32> - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) outs([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> - %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) - return %0 : tensor<1x5x6xf32> -} - -// ----- - - -// CHECK-LABEL: @matmul_quantized -func @matmul_quantized(%arg0: tensor<1x5x3xi8>, %arg1: tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) { - // CHECK: [[C0:%.+]] = arith.constant 0 - // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6] - // CHECK: [[FILLED:%.+]] = linalg.fill([[C0]], [[INIT]]) : i32, tensor<1x5x6xi32> -> tensor<1x5x6xi32> - // CHECK: [[ONE:%.+]] = arith.constant 1 - // CHECK: [[TWO:%.+]] = arith.constant 2 - // CHECK: linalg.quantized_batch_matmul ins(%arg0, %arg1, [[ONE]], [[TWO]] : tensor<1x5x3xi8>, tensor<1x3x6xi8>, i32, i32) outs([[FILLED]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32> - %0 = "tosa.matmul"(%arg0, %arg1) {quantization_info = {a_zp = 1 : i32, b_zp = 2 : i32}} : (tensor<1x5x3xi8>, tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) - return %0 : tensor<1x5x6xi32> -} - -// ----- - -// CHECK-LABEL: @matmul_dyn_batch -func @matmul_dyn_batch(%arg0: tensor, %arg1: tensor) -> (tensor) { - // CHECK: %[[C0:.+]] = arith.constant 0 - // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]] - // CHECK: %[[C0_0:.+]] = arith.constant 0 - // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DIM]], 5, 6] - // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0_0]], %[[INIT]]) : f32, tensor -> tensor - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor, tensor) outs(%[[FILLED]] : tensor) -> tensor - %0 = "tosa.matmul"(%arg0, %arg1) : (tensor, tensor) -> (tensor) - return %0 : tensor -} - -// ----- - -// CHECK-LABEL: @matmul_dyn_independent_dim -func @matmul_dyn_independent_dim(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x?xf32>) -> (tensor<1x5x?xf32>) { - // CHECK: %[[C2:.+]] = arith.constant 2 - // CHECK: %[[DIM:.+]] = tensor.dim %arg1, %[[C2]] - // CHECK: %[[C0:.+]] = arith.constant 0 - // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 5, %[[DIM]]] - // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0]], %[[INIT]]) : f32, tensor<1x5x?xf32> -> tensor<1x5x?xf32> - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x?xf32>) outs(%[[FILLED]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32> - %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x?xf32>) -> (tensor<1x5x?xf32>) - return %0 : tensor<1x5x?xf32> -} - -// ----- - -// CHECK-LABEL: @matmul_dyn_independent_dim -func @matmul_dyn_independent_dim(%arg0: tensor<1x5x?xf32>, %arg1: tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) { - // CHECK: %[[C0:.+]] = arith.constant 0 - // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 5, 6] - // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0]], %[[INIT]]) : f32, tensor<1x5x6xf32> -> tensor<1x5x6xf32> - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) outs(%[[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> - %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x?xf32>, tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) - return %0 : tensor<1x5x6xf32> -} - -// ----- - -// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d1, d0)> -// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0, d1)> -// CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d1)> - -// CHECK-LABEL: @fully_connected -func @fully_connected(%arg0: tensor<5x3xf32>, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor<5x6xf32>) { - // CHECK: [[INITT:%.+]] = linalg.init_tensor [5, 6] - // CHECK: [[ZERO:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill([[ZERO]], [[INITT]]) - // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]> - // CHECK: [[INITT:%.+]] = linalg.init_tensor [3, 6] - // CHECK: [[TRANSPOSE:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg1 : tensor<6x3xf32>) outs([[INITT]] : tensor<3x6xf32>) { - // CHECK: ^bb0([[IN:%.+]]: f32, [[UNUSED:%.+]]: f32): - // CHECK: linalg.yield [[IN]] : f32 - // CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6] - // CHECK: [[MATMUL:%.+]] = linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) outs([[FILL]] : tensor<5x6xf32>) -> tensor<5x6xf32> - // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xf32>, tensor<5x6xf32>) outs([[INITB]] : tensor<5x6xf32>) { - // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): - // CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32 - // CHECK: linalg.yield [[ADD]] : f32 - - %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) : (tensor<5x3xf32>, tensor<6x3xf32>, tensor<6xf32>) -> (tensor<5x6xf32>) - return %0 : tensor<5x6xf32> -} - -// ----- - -// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d1, d0)> -// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0, d1)> -// CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d1)> - -// CHECK-LABEL: @quantized_fully_connected -func @quantized_fully_connected(%arg0: tensor<5x3xi8>, %arg1: tensor<6x3xi8>, %arg2: tensor<6xi32>) -> (tensor<5x6xi32>) { - // CHECK: [[INITT:%.+]] = linalg.init_tensor [5, 6] - // CHECK: [[ZERO:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill([[ZERO]], [[INITT]]) - // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]> - // CHECK: [[INITT:%.+]] = linalg.init_tensor [3, 6] - // CHECK: [[TRANSPOSE:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg1 : tensor<6x3xi8>) outs([[INITT]] : tensor<3x6xi8>) { - // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: i8): - // CHECK: linalg.yield [[IN]] : i8 - // CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6] - // CHECK: [[ONE:%.+]] = arith.constant 1 - // CHECK: [[TWO:%.+]] = arith.constant 2 - // CHECK: [[MATMUL:%.+]] = linalg.quantized_matmul ins(%arg0, [[TRANSPOSE]], [[ONE]], [[TWO]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) outs([[FILL]] : tensor<5x6xi32>) -> tensor<5x6xi32> - // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xi32>, tensor<5x6xi32>) outs([[INITB]] - // CHECK: ^bb0([[IN1:%.+]]: i32, [[IN2:%.+]]: i32, [[UNUSED:%.+]]: i32): - // CHECK: [[ADD:%.+]] = arith.addi - // CHECK: linalg.yield [[ADD]] : i32 - %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) {quantization_info = {input_zp = 1:i32, weight_zp = 2:i32}} : (tensor<5x3xi8>, tensor<6x3xi8>, tensor<6xi32>) -> (tensor<5x6xi32>) - return %0 : tensor<5x6xi32> -} - -// ----- - -// CHECK-LABEL: @fully_connected_dyn -func @fully_connected_dyn(%arg0: tensor, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor) { - // CHECK: %[[C0:.+]] = arith.constant 0 - // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]] - // CHECK: %[[INITT:.+]] = linalg.init_tensor [%[[DIM]], 6] - // CHECK: %[[ZERO:.+]] = arith.constant 0 - // CHECK: %[[FILL:.+]] = linalg.fill(%[[ZERO]], %[[INITT]]) - // CHECK: %[[PERM:.+]] = arith.constant dense<[1, 0]> - // CHECK: %[[INITT:.+]] = linalg.init_tensor [3, 6] - // CHECK: %[[TRANSPOSE:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg1 : tensor<6x3xf32>) outs(%[[INITT]] : tensor<3x6xf32>) { - // CHECK: ^bb0(%[[IN:.+]]: f32, %[[UNUSED:.+]]: f32): - // CHECK: linalg.yield %[[IN]] : f32 - // CHECK: %[[INITB:.+]] = linalg.init_tensor [%[[DIM]], 6] - // CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%arg0, %[[TRANSPOSE]] : tensor, tensor<3x6xf32>) outs(%[[FILL]] : tensor) -> tensor - // CHECK: %[[ADDED:.+]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, %[[MATMUL]] : tensor<6xf32>, tensor) outs(%[[INITB]] : tensor) { - // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): - // CHECK: %[[ADD:.+]] = arith.addf %arg3, %arg4 : f32 - // CHECK: linalg.yield %[[ADD]] : f32 - - %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) : (tensor, tensor<6x3xf32>, tensor<6xf32>) -> (tensor) - return %0 : tensor -} - -// ----- - func @pad_float(%arg0 : tensor<1x2xf32>) -> (tensor<4x9xf32>) { %0 = arith.constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32> // TODO: Output contains multiple "arith.constant 1 : index". @@ -1395,318 +1247,6 @@ func @table16(%arg0: tensor<6xi16>, %arg1: tensor<513xi16>) -> () { // ----- -// CHECK-LABEL: @max_pool -func @max_pool(%arg0: tensor<1x6x34x62xf32>) -> () { - // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38 - // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 32, 62] - // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[CONST]], [[INIT]]) - // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3] - // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, [[KERNEL]] : tensor<1x6x34x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x32x62xf32>) - %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x4x32x62xf32>) - return -} - -// CHECK-LABEL: @max_pool_padded -func @max_pool_padded(%arg0: tensor<1x6x34x62xf32>) -> () { - // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38 : f32 - // CHECK-DAG: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 0, 0, 0] high[0, 0, 1, 0] - // CHECK-DAG: linalg.yield [[CONST]] - // CHECK-DAG: [[INITVAL:%.+]] = arith.constant -3.40282347E+38 : f32 - // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 33, 62] - // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[INITVAL]], [[INIT]]) - // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3] - // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x6x35x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x33x62xf32>) - %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 1], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x4x33x62xf32>) - return -} - -// CHECK-LABEL: @max_pool_i8 -func @max_pool_i8(%arg0: tensor<1x6x34x62xi8>) -> () { - // CHECK: arith.constant -128 - // CHECK: linalg.pooling_nhwc_max - %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi8>) -> (tensor<1x4x32x62xi8>) - return -} - -// CHECK-LABEL: @max_pool_i16 -func @max_pool_i16(%arg0: tensor<1x6x34x62xi16>) -> () { - // CHECK: arith.constant -32768 - // CHECK: linalg.pooling_nhwc_max - %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi16>) -> (tensor<1x4x32x62xi16>) - return -} - -// CHECK-LABEL: @max_pool_i32 -func @max_pool_i32(%arg0: tensor<1x6x34x62xi32>) -> () { - // CHECK: arith.constant -2147483648 - // CHECK: linalg.pooling_nhwc_max - %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi32>) -> (tensor<1x4x32x62xi32>) - return -} -// ----- - -// CHECK-LABEL: @avg_pool -func @avg_pool(%arg0: tensor<1x6x34x62xf32>) -> (tensor<1x5x33x62xf32>) { - // Initial piece computes the sum of the pooling region, with appropriate padding. - // CHECK: [[CONST:%.+]] = arith.constant 0 - // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] - // CHECK: [[CONST:%.+]] = arith.constant 0 - // CHECK: [[POOLINIT:%.+]] = linalg.init_tensor [1, 5, 33, 62] - // CHECK: [[FILL:%.+]] = linalg.fill([[CONST]], [[POOLINIT]]) - // CHECK: [[KERNEL:%.+]] = linalg.init_tensor [4, 4] - // CHECK: [[POOL:%.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x8x36x62xf32>, tensor<4x4xf32>) outs([[FILL]] : tensor<1x5x33x62xf32>) - // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 33, 62] - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins([[POOL]] : tensor<1x5x33x62xf32>) outs([[INIT]] : tensor<1x5x33x62xf32>) - // CHECK: [[ZERO:%.0]] = arith.constant 0 - // CHECK: [[ONE:%.+]] = arith.constant 1 - // CHECK: [[HEIGHT:%.+]] = arith.constant 4 - // CHECK: [[WIDTH:%.+]] = arith.constant 32 - // CHECK: [[IDX1:%.+]] = linalg.index 1 - // CHECK: [[IDX2:%.+]] = linalg.index 2 - - // The large block below computes what portion of the kernel is within non-padded input. - // CHECK: [[NY:%.+]] = arith.subi [[HEIGHT]], [[IDX1]] - // CHECK: [[NX:%.+]] = arith.subi [[WIDTH]], [[IDX2]] - // CHECK: [[KH:%.+]] = arith.constant 4 - // CHECK: [[PAD0:%.+]] = arith.constant 1 - // CHECK: [[SUBP0:%.+]] = arith.subi [[IDX1]], [[PAD0]] - // CHECK: [[P0CMP:%.+]] = arith.cmpi slt, [[SUBP0]], [[ZERO]] - // CHECK: [[SELP0:%.+]] = select [[P0CMP]], [[SUBP0]], [[ZERO]] - // CHECK: [[ADDP0:%.+]] = arith.addi [[KH]], [[SELP0]] - // CHECK: [[PAD1:%.+]] = arith.constant 1 - // CHECK: [[SUBP1:%.+]] = arith.subi [[NY]], [[PAD1]] - // CHECK: [[P1CMP:%.+]] = arith.cmpi slt, [[SUBP1]], [[ZERO]] - // CHECK: [[SELP1:%.+]] = select [[P1CMP]], [[SUBP1]], [[ZERO]] - // CHECK: [[ADDP1:%.+]] = arith.addi [[ADDP0]], [[SELP1]] - // CHECK: [[YCMP:%.+]] = arith.cmpi slt, [[ADDP1]], [[ONE]] - // CHECK: [[YSEL:%.+]] = select [[YCMP]], [[ONE]], [[ADDP1]] - // CHECK: [[KW:%.+]] = arith.constant 4 : index - // CHECK: [[PAD2:%.+]] = arith.constant 1 : index - // CHECK: [[SUBP2:%.+]] = arith.subi [[IDX2]], [[PAD2]] - // CHECK: [[P2CMP:%.+]] = arith.cmpi slt, [[SUBP2]], [[ZERO]] - // CHECK: [[SELP2:%.+]] = select [[P2CMP]], [[SUBP2]], [[ZERO]] - // CHECK: [[ADDP2:%.+]] = arith.addi [[KW]], [[SELP2]] - // CHECK: [[PAD3:%.+]] = arith.constant 1 : index - // CHECK: [[SUBP3:%.+]] = arith.subi [[NX]], [[PAD3]] - // CHECK: [[P3CMP:%.+]] = arith.cmpi slt, [[SUBP3]], [[ZERO]] - // CHECK: [[SELP3:%.+]] = select [[P3CMP]], [[SUBP3]], [[ZERO]] - // CHECK: [[ADDP3:%.+]] = arith.addi [[ADDP2]], [[SELP3]] - // CHECK: [[XCMP:%.+]] = arith.cmpi slt, [[ADDP3]], [[ONE]] - // CHECK: [[XSEL:%.+]] = select [[XCMP]], [[ONE]], [[ADDP3]] - - // Given the valid coverage of the pooling region, normalize the summation. - // CHECK: [[C:%.+]] = arith.muli [[YSEL]], [[XSEL]] - // CHECK: [[CI:%.+]] = arith.index_cast [[C]] - // CHECK: [[CF:%.+]] = arith.sitofp [[CI]] - // CHECK: [[RESULT:%.+]] = arith.divf %arg1, [[CF]] - // CHECK: linalg.yield [[RESULT]] - %0 = "tosa.avg_pool2d"(%arg0) {pad = [1, 1, 1, 1], kernel = [4, 4], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x5x33x62xf32>) - return %0 : tensor<1x5x33x62xf32> -} - -// ----- - -// CHECK-LABEL: @avg_pool_i8 -func @avg_pool_i8(%arg0 : tensor<1x128x128x2xi8>) -> () { - - // CHECK: linalg.pooling_nhwc_sum - // CHECK: linalg.generic - - // CHECK: %[[INZP:.+]] = arith.constant -128 - // CHECK: %[[INZP_OFF:.+]] = arith.muli %{{.+}}, %[[INZP]] - // CHECK: %[[OFFSETED:.+]] = arith.subi %arg1, %[[INZP_OFF]] - // CHECK: %[[NUMERATOR:.+]] = arith.constant 1073741825 - // CHECK: %[[MULTIPLIER:.+]] = arith.divui %[[NUMERATOR]], %{{.+}} - // CHECK: %[[SHIFT:.+]] = arith.constant 30 - // CHECK: %[[SCALE:.+]] = "tosa.apply_scale"(%{{.+}}, %[[MULTIPLIER]], %[[SHIFT]]) {double_round = false} - // CHECK: %[[OUTZP:.+]] = arith.constant -128 - // CHECK: %[[OUT:.+]] = arith.addi %[[SCALE]], %[[OUTZP]] - // CHECK: %[[MIN:.+]] = arith.constant -128 - // CHECK: %[[MAX:.+]] = arith.constant 127 - // CHECK: %[[CMP_MIN:.+]] = arith.cmpi slt, %[[OUT]], %[[MIN]] - // CHECK: %[[CLMP_MIN:.+]] = select %[[CMP_MIN]], %[[MIN]], %[[OUT]] - // CHECK: %[[CMP_MAX:.+]] = arith.cmpi slt, %[[MAX]], %[[OUT]] - // CHECK: %[[CLMP_MAX:.+]] = select %[[CMP_MAX]], %[[MAX]], %[[CLMP_MIN]] - // CHECK: %[[TRUNC:.+]] = arith.trunci %[[CLMP_MAX]] - // CHECK: linalg.yield %[[TRUNC]] - %0 = "tosa.avg_pool2d"(%arg0) {kernel = [4, 4], pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, output_zp = -128 : i32}, stride = [4, 4]} : (tensor<1x128x128x2xi8>) -> tensor<1x32x32x2xi8> - return -} - -// ----- - -// CHECK-LABEL: @avg_pool_i16 -func @avg_pool_i16(%arg0 : tensor<1x128x128x2xi16>) -> () { - - // CHECK: linalg.pooling_nhwc_sum - // CHECK: linalg.generic - - // CHECK: %[[INZP:.+]] = arith.constant -128 - // CHECK: %[[INZP_OFF:.+]] = arith.muli %{{.+}}, %[[INZP]] - // CHECK: %[[OFFSETED:.+]] = arith.subi %arg1, %[[INZP_OFF]] - // CHECK: %[[NUMERATOR:.+]] = arith.constant 1073741825 - // CHECK: %[[MULTIPLIER:.+]] = arith.divui %[[NUMERATOR]], %{{.+}} - // CHECK: %[[SHIFT:.+]] = arith.constant 30 - // CHECK: %[[SCALE:.+]] = "tosa.apply_scale"(%{{.+}}, %[[MULTIPLIER]], %[[SHIFT]]) {double_round = false} - // CHECK: %[[OUTZP:.+]] = arith.constant -128 - // CHECK: %[[OUT:.+]] = arith.addi %[[SCALE]], %[[OUTZP]] - // CHECK: %[[MIN:.+]] = arith.constant -32768 - // CHECK: %[[MAX:.+]] = arith.constant 32767 - // CHECK: %[[CMP_MIN:.+]] = arith.cmpi slt, %[[OUT]], %[[MIN]] - // CHECK: %[[CLMP_MIN:.+]] = select %[[CMP_MIN]], %[[MIN]], %[[OUT]] - // CHECK: %[[CMP_MAX:.+]] = arith.cmpi slt, %[[MAX]], %[[OUT]] - // CHECK: %[[CLMP_MAX:.+]] = select %[[CMP_MAX]], %[[MAX]], %[[CLMP_MIN]] - // CHECK: %[[TRUNC:.+]] = arith.trunci %[[CLMP_MAX]] - // CHECK: linalg.yield %[[TRUNC]] - %0 = "tosa.avg_pool2d"(%arg0) {kernel = [4, 4], pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, output_zp = -128 : i32}, stride = [4, 4]} : (tensor<1x128x128x2xi16>) -> tensor<1x32x32x2xi16> - return -} - -// ----- - -// CHECK: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)> -// CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> -// CHECK: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d3)> - -// CHECK-LABEL: @conv2d_f32 -func @conv2d_f32(%input: tensor<1x49x42x27xf32>, %weights: tensor<28x3x3x27xf32>, %bias: tensor<28xf32>) -> () { - // CHECK: %[[W_IN:.+]] = linalg.init_tensor [3, 3, 27, 28] - // CHECK: %[[W:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg1 : tensor<28x3x3x27xf32>) outs(%[[W_IN]] : tensor<3x3x27x28xf32>) - // CHECK: linalg.yield %arg3 : f32 - // CHECK: %[[M_IN:.+]] = linalg.init_tensor [1, 45, 40, 28] - // CHECK: %[[CST:.+]] = arith.constant 0 - // CHECK: %[[FILL:.+]] = linalg.fill - // CHECK: %[[B_IN:.+]] = linalg.init_tensor [1, 45, 40, 28] - // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<1x49x42x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<1x45x40x28xf32>) - // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x45x40x28xf32>) outs(%[[B_IN]] : tensor<1x45x40x28xf32>) - // CHECK: arith.addf - // CHECK: linalg.yield %7 : f32 - %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [0, 0, 0, 0], stride = [1, 1], dilation = [2, 1]} : (tensor<1x49x42x27xf32>, tensor<28x3x3x27xf32>, tensor<28xf32>) -> (tensor<1x45x40x28xf32>) - return -} - -// ----- - -// CHECK-LABEL: @conv2d_padded_f32 -func @conv2d_padded_f32(%input: tensor<1x47x40x28xf32>, %weights: tensor<28x3x3x28xf32>, %bias: tensor<28xf32>) -> () { - // CHECK: %[[C0:.+]] = arith.constant 0 - // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] - // CHECK: linalg.yield %[[C0]] - // CHECK: linalg.conv_2d_nhwc_hwcf - %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [1, 1, 1, 1], stride = [1, 1], dilation = [2, 1]} : (tensor<1x47x40x28xf32>, tensor<28x3x3x28xf32>, tensor<28xf32>) -> (tensor<1x45x40x28xf32>) - return -} - -// ----- - -// CHECK-LABEL: @conv2d_quant -func @conv2d_quant(%arg0 : tensor<1x12x12x1xi8>, %arg1 : tensor<1024x3x3x1xi8>, %arg2 : tensor<1024xi32>) -> () { - // CHECK: %[[C22:.+]] = arith.constant -22 - // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] - // CHECK: linalg.yield %[[C22]] - // CHECK: linalg.conv_2d_nhwc_hwcf_q - %0 = "tosa.conv2d"(%arg0, %arg1, %arg2) {dilation = [1, 1], pad = [1, 1, 1, 1], quantization_info = {input_zp = -22 : i32, weight_zp = 42 : i32}, stride = [1, 1]} : (tensor<1x12x12x1xi8>, tensor<1024x3x3x1xi8>, tensor<1024xi32>) -> tensor<1x12x12x1024xi32> - return -} - -// ----- - -// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> -// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> - -// CHECK-LABEL: @depthwise_conv -func @depthwise_conv(%arg0 : tensor<1x7x5x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () { - // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 5, 3, 11] - // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) - // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33] - // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>) - // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] - // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) { - // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors - // CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32 - // CHECK: linalg.yield [[ADD]] : f32 - // CHECK: } -> tensor<1x5x5x33xf32> - %2 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) { pad = [0, 0, 0, 0], stride = [1, 1], dilation = [1, 1] } : (tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>, tensor<33xf32>) -> (tensor<1x5x5x33xf32>) - return -} - -// ----- - -// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> -// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> - -// CHECK-LABEL: @depthwise_conv_strides -func @depthwise_conv_strides(%arg0 : tensor<1x11x9x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () { - // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 5, 3, 11] - // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) - // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33] - // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>) - // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] - // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) { - // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors - // CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32 - // CHECK: linalg.yield [[ADD]] : f32 - // CHECK: } -> tensor<1x5x5x33xf32> - %2 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) { pad = [0, 0, 0, 0], stride = [2, 2], dilation = [1, 1] } : (tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>, tensor<33xf32>) -> (tensor<1x5x5x33xf32>) - return -} - -// ----- - -// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> -// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> - -// CHECK-LABEL: @depthwise_conv_quant -func @depthwise_conv_quant(%arg0 : tensor<1x12x12x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () { - // CHECK: [[PADV:%.+]] = arith.constant -128 - // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] - // CHECK: linalg.yield [[PADV]] - - // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 12, 12, 4, 128] - // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) - // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 12, 12, 512] - // CHECK: [[C128:%.+]] = arith.constant -128 - // CHECK: [[C42:%.+]] = arith.constant 42 - // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x12x12x4x128xi32>) - // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] - // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x12x12x512xi32>) outs([[OUT]] : tensor<1x12x12x512xi32>) { - // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32): // no predecessors - // CHECK: [[ADD:%.+]] = arith.addi %arg3, %arg4 : i32 - // CHECK: linalg.yield [[ADD]] : i32 - // CHECK: } -> tensor<1x12x12x512xi32> - %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = [1, 1, 1, 1], quantization_info = {input_zp = -128 : i32, weight_zp = 42 : i32}, stride = [1, 1], dilation = [1, 1] } : (tensor<1x12x12x4xi8>, tensor<3x3x4x128xi8>, tensor<512xi32>) -> tensor<1x12x12x512xi32> - return -} - -// ----- - -// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> -// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> - -// CHECK-LABEL: @depthwise_conv_quant_dilations -func @depthwise_conv_quant_dilations(%arg0 : tensor<1x14x14x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () { - // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 10, 10, 4, 128] - // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) - // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 10, 10, 512] - // CHECK: [[C128:%.+]] = arith.constant -128 - // CHECK: [[C42:%.+]] = arith.constant 42 - // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x10x10x4x128xi32>) - // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] - // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x10x10x512xi32>) outs([[OUT]] : tensor<1x10x10x512xi32>) { - // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32): // no predecessors - // CHECK: [[ADD:%.+]] = arith.addi %arg3, %arg4 : i32 - // CHECK: linalg.yield [[ADD]] : i32 - // CHECK: } -> tensor<1x10x10x512xi32> - %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, weight_zp = 42 : i32}, stride = [1, 1], dilation = [2, 2] } : (tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, tensor<512xi32>) -> tensor<1x10x10x512xi32> - return -} - -// ----- - // CHECK-LABEL: @resize_nearest func @resize_nearest(%input: tensor<1x2x2x1xf32>) -> () { // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 4, 4, 1] From 10b3675aa95094bedb5c534e2017652a10f49dfd Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Thu, 23 Dec 2021 14:33:47 -0600 Subject: [PATCH 180/293] [RISCV][VP] Lower mask vector VP AND/OR/XOR to RVV instructions For fixed and scalable vectors, each intrinsic x is lowered to vmx.mm, dropping the mask, which is safe to do as masked-off elements are undef anyway. Differential Revision: https://reviews.llvm.org/D115339 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 40 +- llvm/lib/Target/RISCV/RISCVISelLowering.h | 2 + .../RISCV/rvv/fixed-vectors-marith-vp.ll | 269 +++++++++++ llvm/test/CodeGen/RISCV/rvv/marith-vp.ll | 437 ++++++++++++++++++ 4 files changed, 745 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-marith-vp.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/marith-vp.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4322f9bc1ce7..b455fee62f54 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -547,6 +547,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); + setOperationAction(ISD::VP_AND, VT, Custom); + setOperationAction(ISD::VP_OR, VT, Custom); + setOperationAction(ISD::VP_XOR, VT, Custom); + setOperationAction(ISD::VECREDUCE_AND, VT, Custom); setOperationAction(ISD::VECREDUCE_OR, VT, Custom); setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); @@ -831,6 +835,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // Operations below are different for between masks and other vectors. if (VT.getVectorElementType() == MVT::i1) { + setOperationAction(ISD::VP_AND, VT, Custom); + setOperationAction(ISD::VP_OR, VT, Custom); + setOperationAction(ISD::VP_XOR, VT, Custom); setOperationAction(ISD::AND, VT, Custom); setOperationAction(ISD::OR, VT, Custom); setOperationAction(ISD::XOR, VT, Custom); @@ -3189,11 +3196,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::VP_UREM: return lowerVPOp(Op, DAG, RISCVISD::UREM_VL); case ISD::VP_AND: - return lowerVPOp(Op, DAG, RISCVISD::AND_VL); + return lowerLogicVPOp(Op, DAG, RISCVISD::VMAND_VL, RISCVISD::AND_VL); case ISD::VP_OR: - return lowerVPOp(Op, DAG, RISCVISD::OR_VL); + return lowerLogicVPOp(Op, DAG, RISCVISD::VMOR_VL, RISCVISD::OR_VL); case ISD::VP_XOR: - return lowerVPOp(Op, DAG, RISCVISD::XOR_VL); + return lowerLogicVPOp(Op, DAG, RISCVISD::VMXOR_VL, RISCVISD::XOR_VL); case ISD::VP_ASHR: return lowerVPOp(Op, DAG, RISCVISD::SRA_VL); case ISD::VP_LSHR: @@ -5409,6 +5416,33 @@ SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG, return convertFromScalableVector(VT, VPOp, DAG, Subtarget); } +SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG, + unsigned MaskOpc, + unsigned VecOpc) const { + MVT VT = Op.getSimpleValueType(); + if (VT.getVectorElementType() != MVT::i1) + return lowerVPOp(Op, DAG, VecOpc); + + // It is safe to drop mask parameter as masked-off elements are undef. + SDValue Op1 = Op->getOperand(0); + SDValue Op2 = Op->getOperand(1); + SDValue VL = Op->getOperand(3); + + MVT ContainerVT = VT; + const bool IsFixed = VT.isFixedLengthVector(); + if (IsFixed) { + ContainerVT = getContainerForFixedLengthVector(VT); + Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); + Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget); + } + + SDLoc DL(Op); + SDValue Val = DAG.getNode(MaskOpc, DL, ContainerVT, Op1, Op2, VL); + if (!IsFixed) + return Val; + return convertFromScalableVector(VT, Val, DAG, Subtarget); +} + // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be // matched to a RVV indexed load. The RVV indexed load instructions only // support the "unsigned unscaled" addressing mode; indices are implicitly diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 794ec98bd38d..48c5ce730933 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -601,6 +601,8 @@ class RISCVTargetLowering : public TargetLowering { SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG, unsigned NewOpc, bool HasMask = true) const; SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG, unsigned RISCVISDOpc) const; + SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, + unsigned VecOpc) const; SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const; SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-marith-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-marith-vp.ll new file mode 100644 index 000000000000..632fe1929512 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-marith-vp.ll @@ -0,0 +1,269 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +declare <1 x i1> @llvm.vp.and.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) + +define <1 x i1> @and_v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <1 x i1> @llvm.vp.and.v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 %evl) + ret <1 x i1> %v +} + +declare <2 x i1> @llvm.vp.and.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32) + +define <2 x i1> @and_v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.and.v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 %evl) + ret <2 x i1> %v +} + +declare <4 x i1> @llvm.vp.and.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) + +define <4 x i1> @and_v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.and.v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 %evl) + ret <4 x i1> %v +} + +declare <8 x i1> @llvm.vp.and.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32) + +define <8 x i1> @and_v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <8 x i1> @llvm.vp.and.v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 %evl) + ret <8 x i1> %v +} + +declare <16 x i1> @llvm.vp.and.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32) + +define <16 x i1> @and_v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <16 x i1> @llvm.vp.and.v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 %evl) + ret <16 x i1> %v +} + +declare <1 x i1> @llvm.vp.or.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) + +define <1 x i1> @or_v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <1 x i1> @llvm.vp.or.v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 %evl) + ret <1 x i1> %v +} + +declare <2 x i1> @llvm.vp.or.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32) + +define <2 x i1> @or_v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.or.v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 %evl) + ret <2 x i1> %v +} + +declare <4 x i1> @llvm.vp.or.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) + +define <4 x i1> @or_v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.or.v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 %evl) + ret <4 x i1> %v +} + +declare <8 x i1> @llvm.vp.or.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32) + +define <8 x i1> @or_v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <8 x i1> @llvm.vp.or.v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 %evl) + ret <8 x i1> %v +} + +declare <16 x i1> @llvm.vp.or.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32) + +define <16 x i1> @or_v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <16 x i1> @llvm.vp.or.v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 %evl) + ret <16 x i1> %v +} + +declare <1 x i1> @llvm.vp.xor.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) + +define <1 x i1> @xor_v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <1 x i1> @llvm.vp.xor.v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 %evl) + ret <1 x i1> %v +} + +declare <2 x i1> @llvm.vp.xor.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32) + +define <2 x i1> @xor_v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.xor.v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 %evl) + ret <2 x i1> %v +} + +declare <4 x i1> @llvm.vp.xor.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) + +define <4 x i1> @xor_v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.xor.v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 %evl) + ret <4 x i1> %v +} + +declare <8 x i1> @llvm.vp.xor.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32) + +define <8 x i1> @xor_v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <8 x i1> @llvm.vp.xor.v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 %evl) + ret <8 x i1> %v +} + +declare <16 x i1> @llvm.vp.xor.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32) + +define <16 x i1> @xor_v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <16 x i1> @llvm.vp.xor.v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 %evl) + ret <16 x i1> %v +} + +declare @llvm.vp.xor.nxv1i1(, , , i32) + +define @xor_nxv1i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv1i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv2i1(, , , i32) + +define @xor_nxv2i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv2i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv4i1(, , , i32) + +define @xor_nxv4i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv4i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv8i1(, , , i32) + +define @xor_nxv8i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv8i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv16i1(, , , i32) + +define @xor_nxv16i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv16i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv32i1(, , , i32) + +define @xor_nxv32i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv32i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv64i1(, , , i32) + +define @xor_nxv64i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv64i1( %b, %c, %a, i32 %evl) + ret %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/marith-vp.ll b/llvm/test/CodeGen/RISCV/rvv/marith-vp.ll new file mode 100644 index 000000000000..afd62aebf0a4 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/marith-vp.ll @@ -0,0 +1,437 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +declare <1 x i1> @llvm.vp.and.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) + +define <1 x i1> @and_v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <1 x i1> @llvm.vp.and.v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 %evl) + ret <1 x i1> %v +} + +declare <2 x i1> @llvm.vp.and.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32) + +define <2 x i1> @and_v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.and.v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 %evl) + ret <2 x i1> %v +} + +declare <4 x i1> @llvm.vp.and.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) + +define <4 x i1> @and_v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.and.v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 %evl) + ret <4 x i1> %v +} + +declare <8 x i1> @llvm.vp.and.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32) + +define <8 x i1> @and_v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <8 x i1> @llvm.vp.and.v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 %evl) + ret <8 x i1> %v +} + +declare <16 x i1> @llvm.vp.and.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32) + +define <16 x i1> @and_v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: and_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <16 x i1> @llvm.vp.and.v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 %evl) + ret <16 x i1> %v +} + +declare @llvm.vp.and.nxv1i1(, , , i32) + +define @and_nxv1i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: and_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.and.nxv1i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.and.nxv2i1(, , , i32) + +define @and_nxv2i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: and_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.and.nxv2i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.and.nxv4i1(, , , i32) + +define @and_nxv4i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: and_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.and.nxv4i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.and.nxv8i1(, , , i32) + +define @and_nxv8i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: and_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.and.nxv8i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.and.nxv16i1(, , , i32) + +define @and_nxv16i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: and_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.and.nxv16i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.and.nxv32i1(, , , i32) + +define @and_nxv32i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: and_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.and.nxv32i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.and.nxv64i1(, , , i32) + +define @and_nxv64i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: and_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vmand.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.and.nxv64i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare <1 x i1> @llvm.vp.or.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) + +define <1 x i1> @or_v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <1 x i1> @llvm.vp.or.v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 %evl) + ret <1 x i1> %v +} + +declare <2 x i1> @llvm.vp.or.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32) + +define <2 x i1> @or_v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.or.v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 %evl) + ret <2 x i1> %v +} + +declare <4 x i1> @llvm.vp.or.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) + +define <4 x i1> @or_v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.or.v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 %evl) + ret <4 x i1> %v +} + +declare <8 x i1> @llvm.vp.or.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32) + +define <8 x i1> @or_v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <8 x i1> @llvm.vp.or.v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 %evl) + ret <8 x i1> %v +} + +declare <16 x i1> @llvm.vp.or.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32) + +define <16 x i1> @or_v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: or_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <16 x i1> @llvm.vp.or.v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 %evl) + ret <16 x i1> %v +} + +declare @llvm.vp.or.nxv1i1(, , , i32) + +define @or_nxv1i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: or_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.or.nxv1i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.or.nxv2i1(, , , i32) + +define @or_nxv2i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: or_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.or.nxv2i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.or.nxv4i1(, , , i32) + +define @or_nxv4i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: or_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.or.nxv4i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.or.nxv8i1(, , , i32) + +define @or_nxv8i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: or_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.or.nxv8i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.or.nxv16i1(, , , i32) + +define @or_nxv16i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: or_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.or.nxv16i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.or.nxv32i1(, , , i32) + +define @or_nxv32i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: or_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.or.nxv32i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.or.nxv64i1(, , , i32) + +define @or_nxv64i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: or_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vmor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.or.nxv64i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare <1 x i1> @llvm.vp.xor.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) + +define <1 x i1> @xor_v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <1 x i1> @llvm.vp.xor.v1i1(<1 x i1> %b, <1 x i1> %c, <1 x i1> %a, i32 %evl) + ret <1 x i1> %v +} + +declare <2 x i1> @llvm.vp.xor.v2i1(<2 x i1>, <2 x i1>, <2 x i1>, i32) + +define <2 x i1> @xor_v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <2 x i1> @llvm.vp.xor.v2i1(<2 x i1> %b, <2 x i1> %c, <2 x i1> %a, i32 %evl) + ret <2 x i1> %v +} + +declare <4 x i1> @llvm.vp.xor.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) + +define <4 x i1> @xor_v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.xor.v4i1(<4 x i1> %b, <4 x i1> %c, <4 x i1> %a, i32 %evl) + ret <4 x i1> %v +} + +declare <8 x i1> @llvm.vp.xor.v8i1(<8 x i1>, <8 x i1>, <8 x i1>, i32) + +define <8 x i1> @xor_v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <8 x i1> @llvm.vp.xor.v8i1(<8 x i1> %b, <8 x i1> %c, <8 x i1> %a, i32 %evl) + ret <8 x i1> %v +} + +declare <16 x i1> @llvm.vp.xor.v16i1(<16 x i1>, <16 x i1>, <16 x i1>, i32) + +define <16 x i1> @xor_v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_v16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call <16 x i1> @llvm.vp.xor.v16i1(<16 x i1> %b, <16 x i1> %c, <16 x i1> %a, i32 %evl) + ret <16 x i1> %v +} + +declare @llvm.vp.xor.nxv1i1(, , , i32) + +define @xor_nxv1i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv1i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv1i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv2i1(, , , i32) + +define @xor_nxv2i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv2i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv2i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv4i1(, , , i32) + +define @xor_nxv4i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv4i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv4i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv8i1(, , , i32) + +define @xor_nxv8i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv8i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv8i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv16i1(, , , i32) + +define @xor_nxv16i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv16i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv16i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv32i1(, , , i32) + +define @xor_nxv32i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv32i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv32i1( %b, %c, %a, i32 %evl) + ret %v +} + +declare @llvm.vp.xor.nxv64i1(, , , i32) + +define @xor_nxv64i1( %b, %c, %a, i32 zeroext %evl) { +; CHECK-LABEL: xor_nxv64i1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu +; CHECK-NEXT: vmxor.mm v0, v0, v8 +; CHECK-NEXT: ret + %v = call @llvm.vp.xor.nxv64i1( %b, %c, %a, i32 %evl) + ret %v +} From 5fd328c45f6dc61e0786c53863611291be139933 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 23 Dec 2021 16:40:07 -0500 Subject: [PATCH 181/293] [gn build] (manually) port f103ee2e9e3fc (sanitizer_symbolize_set_inline_frames) --- llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn | 1 + llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn | 1 + 2 files changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn index 4882e080d21c..82f7a6aec927 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn @@ -163,6 +163,7 @@ target(asan_target_type, "asan") { "-Wl,-U,___sanitizer_symbolize_data", "-Wl,-U,___sanitizer_symbolize_demangle", "-Wl,-U,___sanitizer_symbolize_flush", + "-Wl,-U,___sanitizer_symbolize_set_inline_frames", # xray "-Wl,-U,___start_xray_fn_idx", diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn index 68f356318aed..788a5621d393 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn @@ -157,6 +157,7 @@ target(tsan_target_type, "rtl") { "-Wl,-U,___sanitizer_symbolize_data", "-Wl,-U,___sanitizer_symbolize_demangle", "-Wl,-U,___sanitizer_symbolize_flush", + "-Wl,-U,___sanitizer_symbolize_set_inline_frames", # FIXME: better "-Wl,-install_name,@rpath/libclang_rt.tsan_osx_dynamic.dylib", From 5d68dc184ef6cdb581bfa7bb353ee138c7690edd Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 23 Dec 2021 23:20:11 +0100 Subject: [PATCH 182/293] [Verifier] Iteratively traverse all indirect users. The recursive implementation can run into stack overflows, e.g. like in PR52844. The order the users are visited changes, but for the current use case this only impacts the order error messages are emitted. --- llvm/lib/IR/Verifier.cpp | 17 +++++++++++------ llvm/unittests/IR/VerifierTest.cpp | 14 +++++++------- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 50605f50769d..fb7c423e54e2 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -602,17 +602,22 @@ void Verifier::visit(Instruction &I) { InstVisitor::visit(I); } -// Helper to recursively iterate over indirect users. By -// returning false, the callback can ask to stop recursing -// further. +// Helper to iterate over indirect users. By returning false, the callback can ask to stop traversing further. static void forEachUser(const Value *User, SmallPtrSet &Visited, llvm::function_ref Callback) { if (!Visited.insert(User).second) return; - for (const Value *TheNextUser : User->materialized_users()) - if (Callback(TheNextUser)) - forEachUser(TheNextUser, Visited, Callback); + + SmallVector WorkList; + append_range(WorkList, User->materialized_users()); + while (!WorkList.empty()) { + const Value *Cur = WorkList.pop_back_val(); + if (!Visited.insert(Cur).second) + continue; + if (Callback(Cur)) + append_range(WorkList, Cur->materialized_users()); + } } void Verifier::visitGlobalValue(const GlobalValue &GV) { diff --git a/llvm/unittests/IR/VerifierTest.cpp b/llvm/unittests/IR/VerifierTest.cpp index 6031c808f584..8747e171cfba 100644 --- a/llvm/unittests/IR/VerifierTest.cpp +++ b/llvm/unittests/IR/VerifierTest.cpp @@ -136,17 +136,17 @@ TEST(VerifierTest, CrossModuleRef) { raw_string_ostream ErrorOS(Error); EXPECT_TRUE(verifyModule(M2, &ErrorOS)); EXPECT_TRUE(StringRef(ErrorOS.str()) - .equals("Global is used by function in a different module\n" - "i32 ()* @foo2\n" - "; ModuleID = 'M2'\n" - "i32 ()* @foo3\n" - "; ModuleID = 'M3'\n" - "Global is referenced in a different module!\n" + .equals("Global is referenced in a different module!\n" "i32 ()* @foo2\n" "; ModuleID = 'M2'\n" " %call = call i32 @foo2()\n" "i32 ()* @foo1\n" - "; ModuleID = 'M1'\n")); + "; ModuleID = 'M1'\n" + "Global is used by function in a different module\n" + "i32 ()* @foo2\n" + "; ModuleID = 'M2'\n" + "i32 ()* @foo3\n" + "; ModuleID = 'M3'\n")); Error.clear(); EXPECT_TRUE(verifyModule(M1, &ErrorOS)); From 7cd7b4d2f8072495df715132f688c8e1b7f80f9a Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 22 Dec 2021 19:04:13 -0800 Subject: [PATCH 183/293] [sanitizer] Add common "demangle" flag Reviewed By: browneee Differential Revision: https://reviews.llvm.org/D116202 --- .../sanitizer_common_interface_posix.inc | 1 + .../lib/sanitizer_common/sanitizer_flags.inc | 1 + .../sanitizer_symbolizer_libcdep.cpp | 3 ++ .../sanitizer_symbolizer_posix_libcdep.cpp | 8 ++++- .../symbolizer/sanitizer_symbolize.cpp | 10 +++++++ .../symbolizer/scripts/build_symbolizer.sh | 1 + .../symbolizer/scripts/global_symbols.txt | 1 + .../lib/sanitizer_common/weak_symbols.txt | 1 + .../TestCases/symbolize_pc_demangle.cpp | 30 +++++++++++++++++++ 9 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 compiler-rt/test/sanitizer_common/TestCases/symbolize_pc_demangle.cpp diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interface_posix.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interface_posix.inc index 11b894132994..a5259be9335a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interface_posix.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interface_posix.inc @@ -11,4 +11,5 @@ INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_code) INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_data) INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_demangle) INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_flush) +INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_set_demangle) INTERFACE_WEAK_FUNCTION(__sanitizer_symbolize_set_inline_frames) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc index 8295537d6f7a..0ca91aff8dd4 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc @@ -179,6 +179,7 @@ COMMON_FLAG(bool, use_madv_dontdump, true, "in core file.") COMMON_FLAG(bool, symbolize_inline_frames, true, "Print inlined frames in stacktraces. Defaults to true.") +COMMON_FLAG(bool, demangle, true, "Print demangled symbols.") COMMON_FLAG(bool, symbolize_vs_style, false, "Print file locations in Visual Studio style (e.g: " " file(10,42): ...") diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp index e9829326bcd6..8bbd4af0c7c2 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_libcdep.cpp @@ -274,10 +274,13 @@ class LLVMSymbolizerProcess final : public SymbolizerProcess { const char* const kSymbolizerArch = "--default-arch=unknown"; #endif + const char *const demangle_flag = + common_flags()->demangle ? "--demangle" : "--no-demangle"; const char *const inline_flag = common_flags()->symbolize_inline_frames ? "--inlines" : "--no-inlines"; int i = 0; argv[i++] = path_to_binary; + argv[i++] = demangle_flag; argv[i++] = inline_flag; argv[i++] = kSymbolizerArch; argv[i++] = nullptr; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp index 3166c0013f46..5f6e4cc3180e 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp @@ -213,9 +213,11 @@ class Addr2LineProcess final : public SymbolizerProcess { const char *(&argv)[kArgVMax]) const override { int i = 0; argv[i++] = path_to_binary; + if (common_flags()->demangle) + argv[i++] = "-C"; if (common_flags()->symbolize_inline_frames) argv[i++] = "-i"; - argv[i++] = "-Cfe"; + argv[i++] = "-fe"; argv[i++] = module_name_; argv[i++] = nullptr; CHECK_LE(i, kArgVMax); @@ -328,12 +330,16 @@ __sanitizer_symbolize_flush(); SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE int __sanitizer_symbolize_demangle(const char *Name, char *Buffer, int MaxLength); SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool +__sanitizer_symbolize_set_demangle(bool Demangle); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE bool __sanitizer_symbolize_set_inline_frames(bool InlineFrames); } // extern "C" class InternalSymbolizer final : public SymbolizerTool { public: static InternalSymbolizer *get(LowLevelAllocator *alloc) { + if (__sanitizer_symbolize_set_demangle) + CHECK(__sanitizer_symbolize_set_demangle(common_flags()->demangle)); if (__sanitizer_symbolize_set_inline_frames) CHECK(__sanitizer_symbolize_set_inline_frames( common_flags()->symbolize_inline_frames)); diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cpp b/compiler-rt/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cpp index 9b1d81765087..80cab36426c5 100644 --- a/compiler-rt/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cpp +++ b/compiler-rt/lib/sanitizer_common/symbolizer/sanitizer_symbolize.cpp @@ -18,12 +18,14 @@ #include "llvm/DebugInfo/Symbolize/Symbolize.h" static llvm::symbolize::LLVMSymbolizer *Symbolizer = nullptr; +static bool Demangle = true; static bool InlineFrames = true; static llvm::symbolize::LLVMSymbolizer *getDefaultSymbolizer() { if (Symbolizer) return Symbolizer; llvm::symbolize::LLVMSymbolizer::Options Opts; + Opts.Demangle = Demangle; Symbolizer = new llvm::symbolize::LLVMSymbolizer(Opts); return Symbolizer; } @@ -112,6 +114,14 @@ int __sanitizer_symbolize_demangle(const char *Name, char *Buffer, : 0; } +bool __sanitizer_symbolize_set_demangle(bool Value) { + // Must be called before LLVMSymbolizer created. + if (Symbolizer) + return false; + Demangle = Value; + return true; +} + bool __sanitizer_symbolize_set_inline_frames(bool Value) { InlineFrames = Value; return true; diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh index 7f301709c62b..599f063b45c9 100755 --- a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh +++ b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh @@ -159,6 +159,7 @@ SYMBOLIZER_API_LIST=__sanitizer_symbolize_code SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_data SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_flush SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_demangle +SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_set_demangle SYMBOLIZER_API_LIST+=,__sanitizer_symbolize_set_inline_frames LIBCXX_ARCHIVE_DIR=$(dirname $(find $LIBCXX_BUILD -name libc++.a | head -n1)) diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt index 18f74e7741ef..0bb38ba951a8 100644 --- a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt +++ b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt @@ -38,6 +38,7 @@ __sanitizer_symbolize_code T __sanitizer_symbolize_data T __sanitizer_symbolize_demangle T __sanitizer_symbolize_flush T +__sanitizer_symbolize_set_demangle T __sanitizer_symbolize_set_inline_frames T __strdup U __udivdi3 U diff --git a/compiler-rt/lib/sanitizer_common/weak_symbols.txt b/compiler-rt/lib/sanitizer_common/weak_symbols.txt index 8575bf610102..d07f81bc8c12 100644 --- a/compiler-rt/lib/sanitizer_common/weak_symbols.txt +++ b/compiler-rt/lib/sanitizer_common/weak_symbols.txt @@ -6,4 +6,5 @@ ___sanitizer_symbolize_code ___sanitizer_symbolize_data ___sanitizer_symbolize_demangle ___sanitizer_symbolize_flush +___sanitizer_symbolize_set_demangle ___sanitizer_symbolize_set_inline_frames diff --git a/compiler-rt/test/sanitizer_common/TestCases/symbolize_pc_demangle.cpp b/compiler-rt/test/sanitizer_common/TestCases/symbolize_pc_demangle.cpp new file mode 100644 index 000000000000..62ac5d0b2337 --- /dev/null +++ b/compiler-rt/test/sanitizer_common/TestCases/symbolize_pc_demangle.cpp @@ -0,0 +1,30 @@ +// RUN: %clangxx -O1 -fno-omit-frame-pointer %s -o %t +// RUN: %env_tool_opts=strip_path_prefix=/TestCases/ %run %t 2>&1 | FileCheck %s +// RUN: %env_tool_opts=strip_path_prefix=/TestCases/:demangle=0 %run %t 2>&1 | FileCheck %s --check-prefixes=NODEMANGLE +// RUN: %env_tool_opts=strip_path_prefix=/TestCases/:demangle=1 %run %t 2>&1 | FileCheck %s + +// XFAIL: darwin + +#include +#include +#include + +char buffer[10000]; + +__attribute__((noinline)) static void Symbolize() { + __sanitizer_symbolize_pc(__builtin_return_address(0), "%p %F %L", buffer, + sizeof(buffer)); + for (char *p = buffer; strlen(p); p += strlen(p) + 1) + printf("%s\n", p); +} + +struct Symbolizer { + __attribute__((noinline)) ~Symbolizer() { Symbolize(); } +}; + +// NODEMANGLE: in _ZN10SymbolizerD2Ev +// CHECK: in Symbolizer::~Symbolizer +int main() { + Symbolizer(); + return 0; +} From 81378f7e56d960eec20909f1cac6e71bb5553e04 Mon Sep 17 00:00:00 2001 From: Kristina Bessonova Date: Thu, 23 Dec 2021 18:10:52 +0200 Subject: [PATCH 184/293] Revert "[DwarfDebug] Support emitting function-local declaration for a lexical block" & dependent patches Try to revert D113741 once again. This also reverts 0ac75e82fff93a80ca401d3db3541e8d1d9098f9 (D114705) as it causes LLDB's lldb-api.lang/cpp/nsimport.TestCppNsImport.py test failure w/o D113741. This reverts commit f9607d45f399e2afc39ec16222ea68b4e0831564. Differential Revision: https://reviews.llvm.org/D116225 --- lld/test/wasm/debuginfo.test | 32 +- .../CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 132 +- .../lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 41 +- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 264 +- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h | 12 +- llvm/lib/CodeGen/AsmPrinter/DwarfFile.h | 7 +- llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp | 2 +- llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h | 2 +- .../CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll | 4 +- .../CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll | 4 +- .../DebugInfo/AMDGPU/variable-locations.ll | 31 +- llvm/test/DebugInfo/BPF/extern-void.ll | 3 +- .../Generic/cross-cu-linkonce-distinct.ll | 10 +- .../Generic/debug-info-qualifiers.ll | 4 - .../Generic/debug-names-linkage-name.ll | 2 +- llvm/test/DebugInfo/Generic/enum-types.ll | 6 +- .../Generic/import-inlined-declaration.ll | 72 - .../Generic/imported-name-inlined.ll | 10 +- .../Generic/incorrect-variable-debugloc.ll | 3 - .../DebugInfo/Generic/inlined-local-type.ll | 125 - .../DebugInfo/Generic/inlined-static-var.ll | 92 - .../DebugInfo/Generic/lexical_block_static.ll | 141 - .../DebugInfo/Generic/lexical_block_types.ll | 421 - llvm/test/DebugInfo/Generic/namespace.ll | 51 +- llvm/test/DebugInfo/Generic/varargs.ll | 12 +- .../MIR/AArch64/implicit-def-dead-scope.mir | 2 +- llvm/test/DebugInfo/NVPTX/debug-addr-class.ll | 154 +- llvm/test/DebugInfo/NVPTX/debug-info.ll | 12876 ++++++++-------- llvm/test/DebugInfo/NVPTX/debug-loc-offset.ll | 154 +- llvm/test/DebugInfo/PowerPC/strict-dwarf.ll | 4 +- .../X86/2011-09-26-GlobalVarContext.ll | 12 +- .../DebugInfo/X86/DW_AT_calling-convention.ll | 8 +- llvm/test/DebugInfo/X86/align_cpp11.ll | 30 +- llvm/test/DebugInfo/X86/align_objc.ll | 13 +- llvm/test/DebugInfo/X86/arange-and-stub.ll | 2 +- .../X86/containing-type-extension-rust.ll | 1 - llvm/test/DebugInfo/X86/debug-info-access.ll | 8 +- llvm/test/DebugInfo/X86/debug-info-blocks.ll | 8 +- .../DebugInfo/X86/debug-info-static-member.ll | 4 - llvm/test/DebugInfo/X86/debug-loc-offset.mir | 31 +- llvm/test/DebugInfo/X86/dwarf-aranges.ll | 12 +- .../test/DebugInfo/X86/dwarf-linkage-names.ll | 4 +- .../dwarfdump-DIImportedEntity_elements.ll | 14 +- llvm/test/DebugInfo/X86/generate-odr-hash.ll | 18 +- llvm/test/DebugInfo/X86/gnu-public-names.ll | 43 +- .../X86/lexical-block-file-inline.ll | 5 + llvm/test/DebugInfo/X86/linkage-name.ll | 2 +- llvm/test/DebugInfo/X86/namelist1.ll | 5 +- llvm/test/DebugInfo/X86/sret.ll | 4 +- .../DebugInfo/X86/subprogram-across-cus.ll | 6 +- llvm/test/DebugInfo/X86/template.ll | 33 +- llvm/test/DebugInfo/X86/tls.ll | 37 +- .../X86/type-units-maybe-unused-types.ll | 9 +- llvm/test/DebugInfo/X86/vla-global.ll | 3 - llvm/test/DebugInfo/attr-btf_tag.ll | 71 +- llvm/test/MC/WebAssembly/debug-info.ll | 22 +- llvm/test/MC/WebAssembly/debug-info64.ll | 22 +- llvm/test/MC/WebAssembly/dwarfdump.ll | 86 +- llvm/test/MC/WebAssembly/dwarfdump64.ll | 40 +- 59 files changed, 7138 insertions(+), 8088 deletions(-) delete mode 100644 llvm/test/DebugInfo/Generic/import-inlined-declaration.ll delete mode 100644 llvm/test/DebugInfo/Generic/inlined-local-type.ll delete mode 100644 llvm/test/DebugInfo/Generic/inlined-static-var.ll delete mode 100644 llvm/test/DebugInfo/Generic/lexical_block_static.ll delete mode 100644 llvm/test/DebugInfo/Generic/lexical_block_types.ll diff --git a/lld/test/wasm/debuginfo.test b/lld/test/wasm/debuginfo.test index 35de693e9ea7..9cb1cc31e515 100644 --- a/lld/test/wasm/debuginfo.test +++ b/lld/test/wasm/debuginfo.test @@ -44,23 +44,9 @@ CHECK-NEXT: DW_AT_producer ("clang version 7.0.0 (trunk {{.*}})") CHECK-NEXT: DW_AT_language (DW_LANG_C99) CHECK-NEXT: DW_AT_name ("hi_foo.c") -CHECK: DW_TAG_subprogram -CHECK-NEXT: DW_AT_low_pc -CHECK-NEXT: DW_AT_high_pc -CHECK-NEXT: DW_AT_frame_base -CHECK-NEXT: DW_AT_name ("foo") -CHECK-NEXT: DW_AT_decl_file ("{{.*}}hi_foo.c") -CHECK-NEXT: DW_AT_decl_line (3) - -CHECK: DW_TAG_formal_parameter -CHECK-NEXT: DW_AT_location (DW_OP_WASM_location 0x0 0x0, DW_OP_stack_value) -CHECK-NEXT: DW_AT_name ("p") -CHECK-NEXT: DW_AT_decl_file ("{{.*}}hi_foo.c") -CHECK-NEXT: DW_AT_decl_line (3) - CHECK: DW_TAG_variable CHECK-NEXT: DW_AT_name ("y") -CHECK-NEXT: DW_AT_type (0x000000d4 "int[2]") +CHECK-NEXT: DW_AT_type (0x000000ac "int[2]") CHECK-NEXT: DW_AT_external (true) CHECK-NEXT: DW_AT_decl_file ("{{.*}}hi_foo.c") CHECK-NEXT: DW_AT_decl_line (1) @@ -82,9 +68,23 @@ CHECK-NEXT: DW_AT_encoding (DW_ATE_unsigned) CHECK: DW_TAG_variable CHECK-NEXT: DW_AT_name ("z") -CHECK-NEXT: DW_AT_type (0x000000d4 "int[2]") +CHECK-NEXT: DW_AT_type (0x000000ac "int[2]") CHECK-NEXT: DW_AT_external (true) CHECK-NEXT: DW_AT_decl_file ("{{.*}}hi_foo.c") CHECK-NEXT: DW_AT_decl_line (8) CHECK-NEXT: DW_AT_location (DW_OP_addr 0xffffffff) +CHECK: DW_TAG_subprogram +CHECK-NEXT: DW_AT_low_pc +CHECK-NEXT: DW_AT_high_pc +CHECK-NEXT: DW_AT_frame_base +CHECK-NEXT: DW_AT_name ("foo") +CHECK-NEXT: DW_AT_decl_file ("{{.*}}hi_foo.c") +CHECK-NEXT: DW_AT_decl_line (3) + +CHECK: DW_TAG_formal_parameter +CHECK-NEXT: DW_AT_location (DW_OP_WASM_location 0x0 0x0, DW_OP_stack_value) +CHECK-NEXT: DW_AT_name ("p") +CHECK-NEXT: DW_AT_decl_file ("{{.*}}hi_foo.c") +CHECK-NEXT: DW_AT_decl_line (3) + diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index cf8c0c03772e..9b73f0ab2f05 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -551,18 +551,8 @@ void DwarfCompileUnit::constructScopeDIE(LexicalScope *Scope, // Emit lexical blocks. DIE *ScopeDIE = constructLexicalScopeDIE(Scope); assert(ScopeDIE && "Scope DIE should not be null."); - ParentScopeDIE.addChild(ScopeDIE); - - // Track abstract and concrete lexical block scopes. - if (Scope->isAbstractScope()) { - assert(!getAbstractScopeDIEs().count(DS) && - "Abstract DIE for this scope exists!"); - getAbstractScopeDIEs()[DS] = ScopeDIE; - } else if (!Scope->getInlinedAt()) { - assert(!LocalScopeDIEs.count(DS) && "Concrete DIE for this scope exists!"); - LocalScopeDIEs[DS] = ScopeDIE; - } + ParentScopeDIE.addChild(ScopeDIE); createAndAddScopeChildren(Scope, *ScopeDIE); } @@ -656,7 +646,7 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { auto *InlinedSP = getDISubprogram(DS); // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram // was inlined from another compile unit. - DIE *OriginDIE = getAbstractScopeDIEs()[InlinedSP]; + DIE *OriginDIE = getAbstractSPDies()[InlinedSP]; assert(OriginDIE && "Unable to find original DIE for an inlined subprogram."); auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_inlined_subroutine); @@ -1020,12 +1010,6 @@ DIE &DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, if (Scope) { assert(!Scope->getInlinedAt()); assert(!Scope->isAbstractScope()); - - // Remember the subrogram before creating child entities. - assert(!LocalScopeDIEs.count(Sub) && - "Concrete DIE for the subprogram exists!"); - LocalScopeDIEs[Sub] = &ScopeDIE; - // Collect lexical scope children first. // ObjectPointer might be a local (non-argument) local variable if it's a // block's synthetic this pointer. @@ -1061,18 +1045,27 @@ DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope, for (DbgVariable *DV : Locals) ScopeDIE.addChild(constructVariableDIE(*DV, *Scope, ObjectPointer)); + // Emit imported entities (skipped in gmlt-like data). + if (!includeMinimalInlineScopes()) { + for (const auto *IE : ImportedEntities[Scope->getScopeNode()]) + ScopeDIE.addChild(constructImportedEntityDIE(cast(IE))); + } + // Emit labels. for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope)) ScopeDIE.addChild(constructLabelDIE(*DL, *Scope)); // Emit inner lexical scopes. - auto needToEmitLexicalScope = [this](LexicalScope *LS) -> bool { + auto needToEmitLexicalScope = [this](LexicalScope *LS) { if (isa(LS->getScopeNode())) return true; auto Vars = DU->getScopeVariables().lookup(LS); if (!Vars.Args.empty() || !Vars.Locals.empty()) return true; - return LocalScopesWithLocalDecls.count(LS->getScopeNode()); + if (!includeMinimalInlineScopes() && + !ImportedEntities[LS->getScopeNode()].empty()) + return true; + return false; }; for (LexicalScope *LS : Scope->getChildren()) { // If the lexical block doesn't have non-scope children, skip @@ -1088,10 +1081,11 @@ DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope, void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( LexicalScope *Scope) { + DIE *&AbsDef = getAbstractSPDies()[Scope->getScopeNode()]; + if (AbsDef) + return; auto *SP = cast(Scope->getScopeNode()); - if (getAbstractScopeDIEs().count(SP)) - return; DIE *ContextDIE; DwarfCompileUnit *ContextCU = this; @@ -1115,19 +1109,14 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( // Passing null as the associated node because the abstract definition // shouldn't be found by lookup. - DIE &AbsDef = ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram, - *ContextDIE, nullptr); - - // Store the DIE before creating children. - getAbstractScopeDIEs()[SP] = &AbsDef; - - ContextCU->applySubprogramAttributesToDefinition(SP, AbsDef); - ContextCU->addSInt(AbsDef, dwarf::DW_AT_inline, + AbsDef = &ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, nullptr); + ContextCU->applySubprogramAttributesToDefinition(SP, *AbsDef); + ContextCU->addSInt(*AbsDef, dwarf::DW_AT_inline, DD->getDwarfVersion() <= 4 ? Optional() : dwarf::DW_FORM_implicit_const, dwarf::DW_INL_inlined); - if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, AbsDef)) - ContextCU->addDIEEntry(AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer); + if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, *AbsDef)) + ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer); } bool DwarfCompileUnit::useGNUAnalogForDwarf5Feature() const { @@ -1261,61 +1250,47 @@ void DwarfCompileUnit::constructCallSiteParmEntryDIEs( } } -DIE *DwarfCompileUnit::createImportedEntityDIE(const DIImportedEntity *IE) { - DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)IE->getTag()); - insertDIE(IE, IMDie); - +DIE *DwarfCompileUnit::constructImportedEntityDIE( + const DIImportedEntity *Module) { + DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag()); + insertDIE(Module, IMDie); DIE *EntityDie; - auto *Entity = IE->getEntity(); + auto *Entity = Module->getEntity(); if (auto *NS = dyn_cast(Entity)) EntityDie = getOrCreateNameSpace(NS); else if (auto *M = dyn_cast(Entity)) EntityDie = getOrCreateModule(M); - else if (auto *SP = dyn_cast(Entity)) { - // If we have abstract subprogram created, refer it. - if (auto *AbsSPDie = getAbstractScopeDIEs().lookup(SP)) - EntityDie = AbsSPDie; - else - EntityDie = getOrCreateSubprogramDIE(SP); - } else if (auto *T = dyn_cast(Entity)) + else if (auto *SP = dyn_cast(Entity)) + EntityDie = getOrCreateSubprogramDIE(SP); + else if (auto *T = dyn_cast(Entity)) EntityDie = getOrCreateTypeDIE(T); else if (auto *GV = dyn_cast(Entity)) EntityDie = getOrCreateGlobalVariableDIE(GV, {}); else EntityDie = getDIE(Entity); assert(EntityDie); - - addSourceLine(*IMDie, IE->getLine(), IE->getFile()); + addSourceLine(*IMDie, Module->getLine(), Module->getFile()); addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie); - StringRef Name = IE->getName(); + StringRef Name = Module->getName(); if (!Name.empty()) addString(*IMDie, dwarf::DW_AT_name, Name); // This is for imported module with renamed entities (such as variables and // subprograms). - DINodeArray Elements = IE->getElements(); + DINodeArray Elements = Module->getElements(); for (const auto *Element : Elements) { if (!Element) continue; - IMDie->addChild(createImportedEntityDIE(cast(Element))); + IMDie->addChild( + constructImportedEntityDIE(cast(Element))); } - return IMDie; -} -void DwarfCompileUnit::createAndAddImportedEntityDIE( - const DIImportedEntity *IE) { - DIE *ContextDIE = getOrCreateContextDIE(IE->getScope()); - assert(ContextDIE && - "Could not get or create scope for the imported entity!"); - if (!ContextDIE) - return; - - ContextDIE->addChild(createImportedEntityDIE(IE)); + return IMDie; } void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) { DIE *D = getDIE(SP); - if (DIE *AbsSPDIE = getAbstractScopeDIEs().lookup(SP)) { + if (DIE *AbsSPDIE = getAbstractSPDies().lookup(SP)) { if (D) // If this subprogram has an abstract definition, reference that addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE); @@ -1605,38 +1580,3 @@ void DwarfCompileUnit::createBaseTypeDIEs() { Btr.Die = &Die; } } - -static DIE * -findLocalScopeDIE(const DILocalScope *LS, - DenseMap &ScopeDIEs) { - DIE *ScopeDIE = ScopeDIEs.lookup(LS); - if (isa(LS) && !ScopeDIE) - return nullptr; - if (!ScopeDIE) - return findLocalScopeDIE(cast(LS->getScope()), ScopeDIEs); - return ScopeDIE; -} - -DIE *DwarfCompileUnit::findLocalScopeDIE(const DIScope *S) { - auto *LScope = dyn_cast_or_null(S); - if (!LScope) - return nullptr; - - // Check if we have an abstract tree. - if (getAbstractScopeDIEs().count(LScope->getSubprogram())) - return ::findLocalScopeDIE(LScope, getAbstractScopeDIEs()); - - return ::findLocalScopeDIE(LScope, LocalScopeDIEs); -} - -DIE *DwarfCompileUnit::getOrCreateContextDIE(const DIScope *Context) { - if (auto *LScope = dyn_cast_or_null(Context)) { - if (DIE *ScopeDIE = findLocalScopeDIE(LScope)) - return ScopeDIE; - - // If nothing was found, fall back to DISubprogram and let - // DwarfUnit::getOrCreateContextDIE() create a new DIE for it. - Context = LScope->getSubprogram(); - } - return DwarfUnit::getOrCreateContextDIE(Context); -} diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 251bc4bea68b..fb03982b5e4a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -62,6 +62,11 @@ class DwarfCompileUnit final : public DwarfUnit { /// The start of the unit macro info within macro section. MCSymbol *MacroLabelBegin; + using ImportedEntityList = SmallVector; + using ImportedEntityMap = DenseMap; + + ImportedEntityMap ImportedEntities; + /// GlobalNames - A map of globally visible named entities for this unit. StringMap GlobalNames; @@ -75,14 +80,9 @@ class DwarfCompileUnit final : public DwarfUnit { // ranges/locs. const MCSymbol *BaseAddress = nullptr; - DenseMap LocalScopeDIEs; - DenseMap AbstractLocalScopeDIEs; + DenseMap AbstractSPDies; DenseMap> AbstractEntities; - /// LocalScopesWithLocalDecls - A list of non-empty local scopes - /// (with declaraions of static locals, function-local types, or imports). - SmallPtrSet LocalScopesWithLocalDecls; - /// DWO ID for correlating skeleton and split units. uint64_t DWOId = 0; @@ -92,10 +92,10 @@ class DwarfCompileUnit final : public DwarfUnit { bool isDwoUnit() const override; - DenseMap &getAbstractScopeDIEs() { + DenseMap &getAbstractSPDies() { if (isDwoUnit() && !DD->shareAcrossDWOCUs()) - return AbstractLocalScopeDIEs; - return DU->getAbstractScopeDIEs(); + return AbstractSPDies; + return DU->getAbstractSPDies(); } DenseMap> &getAbstractEntities() { @@ -169,6 +169,17 @@ class DwarfCompileUnit final : public DwarfUnit { unsigned getOrCreateSourceID(const DIFile *File) override; + void addImportedEntity(const DIImportedEntity* IE) { + DIScope *Scope = IE->getScope(); + assert(Scope && "Invalid Scope encoding!"); + if (!isa(Scope)) + // No need to add imported enities that are not local declaration. + return; + + auto *LocalScope = cast(Scope)->getNonLexicalBlockFileScope(); + ImportedEntities[LocalScope].push_back(IE); + } + /// addRange - Add an address range to the list of ranges for this unit. void addRange(RangeSpan Range); @@ -210,9 +221,6 @@ class DwarfCompileUnit final : public DwarfUnit { void createBaseTypeDIEs(); - DIE *findLocalScopeDIE(const DIScope *S); - DIE *getOrCreateContextDIE(const DIScope *Ty) override; - /// Construct a DIE for this subprogram scope. DIE &constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope); @@ -251,9 +259,8 @@ class DwarfCompileUnit final : public DwarfUnit { void constructCallSiteParmEntryDIEs(DIE &CallSiteDIE, SmallVector &Params); - /// Construct DIE for an imported entity. - DIE *createImportedEntityDIE(const DIImportedEntity *IE); - void createAndAddImportedEntityDIE(const DIImportedEntity *IE); + /// Construct import_module DIE. + DIE *constructImportedEntityDIE(const DIImportedEntity *Module); void finishSubprogramDefinition(const DISubprogram *SP); void finishEntityDefinition(const DbgEntity *Entity); @@ -350,10 +357,6 @@ class DwarfCompileUnit final : public DwarfUnit { bool hasDwarfPubSections() const; void addBaseTypeRef(DIEValueList &Die, int64_t Idx); - - void recordLocalScopeWithDecls(const DILocalScope *S) { - LocalScopesWithLocalDecls.insert(S); - } }; } // end namespace llvm diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 8f5bc3a07c54..b1c78f0faac4 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -516,7 +516,7 @@ void DwarfDebug::addSubprogramNames(const DICompileUnit &CU, // well into the name table. Only do that if we are going to actually emit // that name. if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName() && - (useAllLinkageNames() || InfoHolder.getAbstractScopeDIEs().lookup(SP))) + (useAllLinkageNames() || InfoHolder.getAbstractSPDies().lookup(SP))) addAccelName(CU, SP->getLinkageName(), Die); // If this is an Objective-C selector name add it to the ObjC accelerator @@ -1067,45 +1067,6 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit, } } } - -// Collect local scopes that contain any local declarations -// (excluding local variables) to be sure they will be emitted -// (see DwarfCompileUnit::createAndAddScopeChildren() for details). -static void collectLocalScopesWithDeclsFromCU(const DICompileUnit *CUNode, - DwarfCompileUnit &CU) { - auto getLocalScope = [](const DIScope *S) -> const DILocalScope * { - if (!S) - return nullptr; - if (isa(S)) - S = S->getScope(); - if (const auto *LScope = dyn_cast_or_null(S)) - return LScope->getNonLexicalBlockFileScope(); - return nullptr; - }; - - for (auto *GVE : CUNode->getGlobalVariables()) - if (auto *LScope = getLocalScope(GVE->getVariable()->getScope())) - CU.recordLocalScopeWithDecls(LScope); - - for (auto *Ty : CUNode->getEnumTypes()) - if (auto *LScope = getLocalScope(Ty->getScope())) - CU.recordLocalScopeWithDecls(LScope); - - for (auto *Ty : CUNode->getRetainedTypes()) - if (DIType *RT = dyn_cast(Ty)) - if (auto *LScope = getLocalScope(RT->getScope())) - CU.recordLocalScopeWithDecls(LScope); - - for (auto *IE : CUNode->getImportedEntities()) - if (auto *LScope = getLocalScope(IE->getScope())) - CU.recordLocalScopeWithDecls(LScope); - - // FIXME: We know nothing about local records and typedefs here. - // since nothing but local variables (and members of local records) - // references them. So that they will be emitted in a first available - // parent scope DIE. -} - // Create new DwarfCompileUnit for the given metadata node with tag // DW_TAG_compile_unit. DwarfCompileUnit & @@ -1120,6 +1081,9 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { DwarfCompileUnit &NewCU = *OwnedUnit; InfoHolder.addUnit(std::move(OwnedUnit)); + for (auto *IE : DIUnit->getImportedEntities()) + NewCU.addImportedEntity(IE); + // LTO with assembly output shares a single line table amongst multiple CUs. // To avoid the compilation directory being ambiguous, let the line table // explicitly describe the directory of all files, never relying on the @@ -1139,12 +1103,57 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { CUMap.insert({DIUnit, &NewCU}); CUDieMap.insert({&NewCU.getUnitDie(), &NewCU}); + return NewCU; +} - // Record local scopes, that have some globals (static locals), - // imports or types declared within. - collectLocalScopesWithDeclsFromCU(DIUnit, NewCU); +void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, + const DIImportedEntity *N) { + if (isa(N->getScope())) + return; + if (DIE *D = TheCU.getOrCreateContextDIE(N->getScope())) + D->addChild(TheCU.constructImportedEntityDIE(N)); +} - return NewCU; +/// Sort and unique GVEs by comparing their fragment offset. +static SmallVectorImpl & +sortGlobalExprs(SmallVectorImpl &GVEs) { + llvm::sort( + GVEs, [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) { + // Sort order: first null exprs, then exprs without fragment + // info, then sort by fragment offset in bits. + // FIXME: Come up with a more comprehensive comparator so + // the sorting isn't non-deterministic, and so the following + // std::unique call works correctly. + if (!A.Expr || !B.Expr) + return !!B.Expr; + auto FragmentA = A.Expr->getFragmentInfo(); + auto FragmentB = B.Expr->getFragmentInfo(); + if (!FragmentA || !FragmentB) + return !!FragmentB; + return FragmentA->OffsetInBits < FragmentB->OffsetInBits; + }); + GVEs.erase(std::unique(GVEs.begin(), GVEs.end(), + [](DwarfCompileUnit::GlobalExpr A, + DwarfCompileUnit::GlobalExpr B) { + return A.Expr == B.Expr; + }), + GVEs.end()); + return GVEs; +} + +/// Create a DIE for \p Ty if it doesn't already exist. If type units are +/// enabled, try to emit a type unit without a CU skeleton DIE. +static void createMaybeUnusedType(DwarfDebug &DD, DwarfCompileUnit &CU, + DIType &Ty) { + // Try to generate a type unit without creating a skeleton DIE in this CU. + if (DICompositeType const *CTy = dyn_cast(&Ty)) { + MDString const *TypeId = CTy->getRawIdentifier(); + if (DD.generateTypeUnits() && TypeId && !Ty.isForwardDecl()) + if (DD.getOrCreateDwarfTypeUnit(CU, TypeId->getString(), CTy)) + return; + } + // We couldn't or shouldn't add a type unit so create the DIE normally. + CU.getOrCreateTypeDIE(&Ty); } // Emit all Dwarf sections that should come prior to the content. Create @@ -1162,6 +1171,14 @@ void DwarfDebug::beginModule(Module *M) { assert(MMI->hasDebugInfo() && "DebugInfoAvailabilty unexpectedly not initialized"); SingleCU = NumDebugCUs == 1; + DenseMap> + GVMap; + for (const GlobalVariable &Global : M->globals()) { + SmallVector GVs; + Global.getDebugInfo(GVs); + for (auto *GVE : GVs) + GVMap[GVE->getVariable()].push_back({&Global, GVE->getExpression()}); + } // Create the symbol that designates the start of the unit's contribution // to the string offsets table. In a split DWARF scenario, only the skeleton @@ -1187,6 +1204,58 @@ void DwarfDebug::beginModule(Module *M) { // address table (.debug_addr) header. AddrPool.setLabel(Asm->createTempSymbol("addr_table_base")); DebugLocs.setSym(Asm->createTempSymbol("loclists_table_base")); + + for (DICompileUnit *CUNode : M->debug_compile_units()) { + // FIXME: Move local imported entities into a list attached to the + // subprogram, then this search won't be needed and a + // getImportedEntities().empty() test should go below with the rest. + bool HasNonLocalImportedEntities = llvm::any_of( + CUNode->getImportedEntities(), [](const DIImportedEntity *IE) { + return !isa(IE->getScope()); + }); + + if (!HasNonLocalImportedEntities && CUNode->getEnumTypes().empty() && + CUNode->getRetainedTypes().empty() && + CUNode->getGlobalVariables().empty() && CUNode->getMacros().empty()) + continue; + + DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(CUNode); + + // Global Variables. + for (auto *GVE : CUNode->getGlobalVariables()) { + // Don't bother adding DIGlobalVariableExpressions listed in the CU if we + // already know about the variable and it isn't adding a constant + // expression. + auto &GVMapEntry = GVMap[GVE->getVariable()]; + auto *Expr = GVE->getExpression(); + if (!GVMapEntry.size() || (Expr && Expr->isConstant())) + GVMapEntry.push_back({nullptr, Expr}); + } + + DenseSet Processed; + for (auto *GVE : CUNode->getGlobalVariables()) { + DIGlobalVariable *GV = GVE->getVariable(); + if (Processed.insert(GV).second) + CU.getOrCreateGlobalVariableDIE(GV, sortGlobalExprs(GVMap[GV])); + } + + for (auto *Ty : CUNode->getEnumTypes()) { + // The enum types array by design contains pointers to + // MDNodes rather than DIRefs. Unique them here. + createMaybeUnusedType(*this, CU, *Ty); + } + for (auto *Ty : CUNode->getRetainedTypes()) { + // The retained types array by design contains pointers to + // MDNodes rather than DIRefs. Unique them here. + if (DIType *RT = dyn_cast(Ty)) + // There is no point in force-emitting a forward declaration. + createMaybeUnusedType(*this, CU, *RT); + } + // Emit imported_modules last so that the relevant context is already + // available. + for (auto *IE : CUNode->getImportedEntities()) + constructAndAddImportedEntityDIE(CU, IE); + } } void DwarfDebug::finishEntityDefinitions() { @@ -1351,47 +1420,6 @@ void DwarfDebug::finalizeModuleInfo() { SkeletonHolder.computeSizeAndOffsets(); } -/// Sort and unique GVEs by comparing their fragment offset. -static SmallVectorImpl & -sortGlobalExprs(SmallVectorImpl &GVEs) { - llvm::sort( - GVEs, [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) { - // Sort order: first null exprs, then exprs without fragment - // info, then sort by fragment offset in bits. - // FIXME: Come up with a more comprehensive comparator so - // the sorting isn't non-deterministic, and so the following - // std::unique call works correctly. - if (!A.Expr || !B.Expr) - return !!B.Expr; - auto FragmentA = A.Expr->getFragmentInfo(); - auto FragmentB = B.Expr->getFragmentInfo(); - if (!FragmentA || !FragmentB) - return !!FragmentB; - return FragmentA->OffsetInBits < FragmentB->OffsetInBits; - }); - GVEs.erase(std::unique(GVEs.begin(), GVEs.end(), - [](DwarfCompileUnit::GlobalExpr A, - DwarfCompileUnit::GlobalExpr B) { - return A.Expr == B.Expr; - }), - GVEs.end()); - return GVEs; -} - -/// Create a DIE for \p Ty if it doesn't already exist. If type units are -/// enabled, try to emit a type unit without a CU skeleton DIE. -static void createMaybeUnusedType(DwarfDebug &DD, DwarfCompileUnit &CU, - DIType &Ty) { - // Try to generate a type unit without creating a skeleton DIE in this CU. - if (DICompositeType const *CTy = dyn_cast(&Ty)) { - MDString const *TypeId = CTy->getRawIdentifier(); - if (DD.generateTypeUnits() && TypeId && !Ty.isForwardDecl()) - if (DD.getOrCreateDwarfTypeUnit(CU, TypeId->getString(), CTy)) - return; - } - // We couldn't or shouldn't add a type unit so create the DIE normally. - CU.getOrCreateTypeDIE(&Ty); -} // Emit all Dwarf sections that should come after the content. void DwarfDebug::endModule() { @@ -1402,69 +1430,9 @@ void DwarfDebug::endModule() { assert(CurFn == nullptr); assert(CurMI == nullptr); - // Collect global variables info. - DenseMap> - GVMap; - for (const GlobalVariable &Global : MMI->getModule()->globals()) { - SmallVector GVs; - Global.getDebugInfo(GVs); - for (auto *GVE : GVs) - GVMap[GVE->getVariable()].push_back({&Global, GVE->getExpression()}); - } - - for (DICompileUnit *CUNode : MMI->getModule()->debug_compile_units()) { - auto *CU = CUMap.lookup(CUNode); - - // If this CU hasn't been emitted yet, create it here unless it is empty. - if (!CU) { - // FIXME: Move local imported entities into a list attached to the - // subprogram, then this search won't be needed and a - // getImportedEntities().empty() test should go below with the rest. - bool HasNonLocalImportedEntities = llvm::any_of( - CUNode->getImportedEntities(), [](const DIImportedEntity *IE) { - return !isa(IE->getScope()); - }); - - if (!HasNonLocalImportedEntities && CUNode->getEnumTypes().empty() && - CUNode->getRetainedTypes().empty() && - CUNode->getGlobalVariables().empty() && CUNode->getMacros().empty()) - continue; - - CU = &getOrCreateDwarfCompileUnit(CUNode); - } - - // Global Variables. - for (auto *GVE : CUNode->getGlobalVariables()) { - // Don't bother adding DIGlobalVariableExpressions listed in the CU if we - // already know about the variable and it isn't adding a constant - // expression. - auto &GVMapEntry = GVMap[GVE->getVariable()]; - auto *Expr = GVE->getExpression(); - if (!GVMapEntry.size() || (Expr && Expr->isConstant())) - GVMapEntry.push_back({nullptr, Expr}); - } - - DenseSet Processed; - for (auto *GVE : CUNode->getGlobalVariables()) { - DIGlobalVariable *GV = GVE->getVariable(); - if (Processed.insert(GV).second) - CU->getOrCreateGlobalVariableDIE(GV, sortGlobalExprs(GVMap[GV])); - } - - for (auto *Ty : CUNode->getEnumTypes()) - createMaybeUnusedType(*this, *CU, *Ty); - - for (auto *Ty : CUNode->getRetainedTypes()) { - if (DIType *RT = dyn_cast(Ty)) - createMaybeUnusedType(*this, *CU, *RT); - } - - // Emit imported entities last so that the relevant context - // is already available. - for (auto *IE : CUNode->getImportedEntities()) - CU->createAndAddImportedEntityDIE(IE); - - CU->createBaseTypeDIEs(); + for (const auto &P : CUMap) { + auto &CU = *P.second; + CU.createBaseTypeDIEs(); } // If we aren't actually generating debug info (check beginModule - diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 79f8423af602..ff2589b3c953 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -316,13 +316,11 @@ class DwarfDebug : public DebugHandlerBase { /// can refer to them in spite of insertions into this list. DebugLocStream DebugLocs; - using SubprogramSetVector = - SetVector, - SmallPtrSet>; - /// This is a collection of subprogram MDNodes that are processed to /// create DIEs. - SubprogramSetVector ProcessedSPNodes; + SetVector, + SmallPtrSet> + ProcessedSPNodes; /// If nonnull, stores the current machine function we're processing. const MachineFunction *CurFn = nullptr; @@ -600,6 +598,10 @@ class DwarfDebug : public DebugHandlerBase { void finishUnitAttributes(const DICompileUnit *DIUnit, DwarfCompileUnit &NewCU); + /// Construct imported_module or imported_declaration DIE. + void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, + const DIImportedEntity *N); + /// Register a source line with debug info. Returns the unique /// label that was emitted and which provides correspondence to the /// source line list. diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h index 464f4f048016..79a6ce7801b7 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -26,7 +26,6 @@ class DbgEntity; class DbgVariable; class DbgLabel; class DINode; -class DILocalScope; class DwarfCompileUnit; class DwarfUnit; class LexicalScope; @@ -88,7 +87,7 @@ class DwarfFile { DenseMap ScopeLabels; // Collection of abstract subprogram DIEs. - DenseMap AbstractLocalScopeDIEs; + DenseMap AbstractSPDies; DenseMap> AbstractEntities; /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can @@ -163,8 +162,8 @@ class DwarfFile { return ScopeLabels; } - DenseMap &getAbstractScopeDIEs() { - return AbstractLocalScopeDIEs; + DenseMap &getAbstractSPDies() { + return AbstractSPDies; } DenseMap> &getAbstractEntities() { diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 7edf9be92eac..6b6d63f14f87 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1219,7 +1219,7 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP, "decl has a linkage name and it is different"); if (DeclLinkageName.empty() && // Always emit it for abstract subprograms. - (DD->useAllLinkageNames() || DU->getAbstractScopeDIEs().lookup(SP))) + (DD->useAllLinkageNames() || DU->getAbstractSPDies().lookup(SP))) addLinkageName(SPDie, LinkageName); if (!DeclDie) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h index 1643edf05b7a..54b0079dd7ce 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -250,7 +250,7 @@ class DwarfUnit : public DIEUnit { DIE *getOrCreateTypeDIE(const MDNode *TyNode); /// Get context owner's DIE. - virtual DIE *getOrCreateContextDIE(const DIScope *Context); + DIE *getOrCreateContextDIE(const DIScope *Context); /// Construct DIEs for types that contain vtables. void constructContainingTypeDIEs(); diff --git a/llvm/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/llvm/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll index 92dccdb77194..31f3da743054 100644 --- a/llvm/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll +++ b/llvm/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll @@ -10,12 +10,12 @@ target triple = "thumbv7-apple-darwin10" @x4 = internal global i8 1, align 1, !dbg !8 @x5 = global i8 1, align 1, !dbg !10 -; CHECK: DW_TAG_variable [6] +; CHECK: DW_TAG_variable ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_name {{.*}} "x1" ; CHECK-NOT: {{DW_TAG|NULL}} ; CHECK: DW_AT_location [DW_FORM_exprloc] (DW_OP_addr [[ADDR:0x[0-9a-fA-F]+]]) -; CHECK: DW_TAG_variable [6] +; CHECK: DW_TAG_variable ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_name {{.*}} "x2" ; CHECK-NOT: {{DW_TAG|NULL}} diff --git a/llvm/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/llvm/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll index d913a16eb93a..ac229419a10f 100644 --- a/llvm/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll +++ b/llvm/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll @@ -1,11 +1,11 @@ ; RUN: llc -arm-global-merge -global-merge-group-by-use=false -filetype=obj < %s | llvm-dwarfdump -debug-info -v - | FileCheck %s -; CHECK: DW_TAG_variable [6] +; CHECK: DW_TAG_variable ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_name {{.*}} "x1" ; CHECK-NOT: {{DW_TAG|NULL}} ; CHECK: DW_AT_location [DW_FORM_exprloc] (DW_OP_addr [[ADDR:0x[0-9a-fA-F]+]]) -; CHECK: DW_TAG_variable [6] +; CHECK: DW_TAG_variable ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_name {{.*}} "x2" ; CHECK-NOT: {{DW_TAG|NULL}} diff --git a/llvm/test/DebugInfo/AMDGPU/variable-locations.ll b/llvm/test/DebugInfo/AMDGPU/variable-locations.ll index b2fabbd3c381..365dd9dfea78 100644 --- a/llvm/test/DebugInfo/AMDGPU/variable-locations.ll +++ b/llvm/test/DebugInfo/AMDGPU/variable-locations.ll @@ -13,7 +13,22 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) +; CHECK: {{.*}}DW_TAG_variable +; CHECK-NEXT: DW_AT_name {{.*}}"GlobA" +; CHECK-NEXT: DW_AT_type +; CHECK-NEXT: DW_AT_external +; CHECK-NEXT: DW_AT_decl_file +; CHECK-NEXT: DW_AT_decl_line +; CHECK-NEXT: DW_AT_location [DW_FORM_block1] (DW_OP_addr 0x0) @GlobA = common addrspace(1) global i32 0, align 4, !dbg !0 + +; CHECK: {{.*}}DW_TAG_variable +; CHECK-NEXT: DW_AT_name {{.*}}"GlobB" +; CHECK-NEXT: DW_AT_type +; CHECK-NEXT: DW_AT_external +; CHECK-NEXT: DW_AT_decl_file +; CHECK-NEXT: DW_AT_decl_line +; CHECK-NEXT: DW_AT_location [DW_FORM_block1] (DW_OP_addr 0x0) @GlobB = common addrspace(1) global i32 0, align 4, !dbg !6 ; CHECK: {{.*}}DW_TAG_subprogram @@ -63,28 +78,12 @@ entry: !llvm.ident = !{!12} !0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) - -; CHECK: {{.*}}DW_TAG_variable -; CHECK-NEXT: DW_AT_name {{.*}}"GlobA" -; CHECK-NEXT: DW_AT_type -; CHECK-NEXT: DW_AT_external -; CHECK-NEXT: DW_AT_decl_file -; CHECK-NEXT: DW_AT_decl_line -; CHECK-NEXT: DW_AT_location [DW_FORM_block1] (DW_OP_addr 0x0) !1 = distinct !DIGlobalVariable(name: "GlobA", scope: !2, file: !3, line: 1, type: !8, isLocal: false, isDefinition: true) !2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 5.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5) !3 = !DIFile(filename: "variable-locations.cl", directory: "/some/random/directory") !4 = !{} !5 = !{!0, !6} !6 = !DIGlobalVariableExpression(var: !7, expr: !DIExpression()) - -; CHECK: {{.*}}DW_TAG_variable -; CHECK-NEXT: DW_AT_name {{.*}}"GlobB" -; CHECK-NEXT: DW_AT_type -; CHECK-NEXT: DW_AT_external -; CHECK-NEXT: DW_AT_decl_file -; CHECK-NEXT: DW_AT_decl_line -; CHECK-NEXT: DW_AT_location [DW_FORM_block1] (DW_OP_addr 0x0) !7 = distinct !DIGlobalVariable(name: "GlobB", scope: !2, file: !3, line: 2, type: !8, isLocal: false, isDefinition: true) !8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) !9 = !{i32 2, i32 0} diff --git a/llvm/test/DebugInfo/BPF/extern-void.ll b/llvm/test/DebugInfo/BPF/extern-void.ll index 283dd5411007..3bf8e27623e7 100644 --- a/llvm/test/DebugInfo/BPF/extern-void.ll +++ b/llvm/test/DebugInfo/BPF/extern-void.ll @@ -36,8 +36,9 @@ entry: ; CHECK: .quad bla1 ; CHECK-NEXT: DW_TAG_variable ; -; CHECK: .quad bla2 +; CHECK: .quad bla2 ; CHECK-NEXT: DW_TAG_const_type +; CHECK-NEXT: DW_TAG_subprogram ; Function Attrs: nounwind readnone speculatable willreturn declare void @llvm.dbg.value(metadata, metadata, metadata) #1 diff --git a/llvm/test/DebugInfo/Generic/cross-cu-linkonce-distinct.ll b/llvm/test/DebugInfo/Generic/cross-cu-linkonce-distinct.ll index 6e36aded9e19..2b6369e00521 100644 --- a/llvm/test/DebugInfo/Generic/cross-cu-linkonce-distinct.ll +++ b/llvm/test/DebugInfo/Generic/cross-cu-linkonce-distinct.ll @@ -27,15 +27,13 @@ ; The DISubprogram should show up in compile unit a. ; CHECK: DW_TAG_compile_unit ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_name ("a.cpp") -; CHECK: DW_TAG_subprogram -; CHECK-NOT: DW_TAG -; CHECK: DW_AT_name ("func") +; CHECK: DW_AT_name ("b.cpp") +; CHECK-NOT: DW_TAG_subprogram ; CHECK: DW_TAG_compile_unit ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_name ("b.cpp") -; CHECK-NOT: DW_TAG_subprogram +; CHECK: DW_AT_name ("a.cpp") +; CHECK: DW_AT_name ("func") source_filename = "test/DebugInfo/Generic/cross-cu-linkonce-distinct.ll" diff --git a/llvm/test/DebugInfo/Generic/debug-info-qualifiers.ll b/llvm/test/DebugInfo/Generic/debug-info-qualifiers.ll index 6b9759284b64..07e420ce47d5 100644 --- a/llvm/test/DebugInfo/Generic/debug-info-qualifiers.ll +++ b/llvm/test/DebugInfo/Generic/debug-info-qualifiers.ll @@ -18,10 +18,6 @@ ; CHECK-NEXT: DW_AT_reference DW_FORM_flag_present ; CHECK: DW_TAG_subroutine_type DW_CHILDREN_yes ; CHECK-NEXT: DW_AT_rvalue_reference DW_FORM_flag_present - -; CHECK: DW_TAG_subprogram -; CHECK-NOT: DW_TAG_subprogram -; CHECK: DW_AT_name {{.*}}"g" ; ; CHECK: DW_TAG_subprogram ; CHECK-NOT: DW_TAG_subprogram diff --git a/llvm/test/DebugInfo/Generic/debug-names-linkage-name.ll b/llvm/test/DebugInfo/Generic/debug-names-linkage-name.ll index 9ea8c724bfc0..7b749f8c7f4d 100644 --- a/llvm/test/DebugInfo/Generic/debug-names-linkage-name.ll +++ b/llvm/test/DebugInfo/Generic/debug-names-linkage-name.ll @@ -11,9 +11,9 @@ ; We should have all three linkage names in the .debug_info and .debug_names ; ALL: .debug_info contents: +; ALL: DW_AT_linkage_name ("_ZN1n1vE") ; ALL: DW_AT_linkage_name ("_Z1fi") ; ALL: DW_AT_linkage_name ("_Z1gi") -; ALL: DW_AT_linkage_name ("_ZN1n1vE") ; ALL: .debug_names contents: ; ALL: String: {{.*}} "_Z1fi" ; ALL: String: {{.*}} "_Z1gi" diff --git a/llvm/test/DebugInfo/Generic/enum-types.ll b/llvm/test/DebugInfo/Generic/enum-types.ll index d40866c091a3..786ee28f54ac 100644 --- a/llvm/test/DebugInfo/Generic/enum-types.ll +++ b/llvm/test/DebugInfo/Generic/enum-types.ll @@ -6,12 +6,12 @@ ; rdar://17628609 ; CHECK: DW_TAG_compile_unit +; CHECK: 0x[[ENUM:.*]]: DW_TAG_enumeration_type +; CHECK-NEXT: DW_AT_name {{.*}}"EA" ; CHECK: DW_TAG_subprogram ; CHECK: DW_AT_MIPS_linkage_name {{.*}}"_Z4topA2EA" ; CHECK: DW_TAG_formal_parameter -; CHECK: DW_AT_type [DW_FORM_ref4] (cu + 0x{{.*}} => {0x[[ENUM:.*]]} -; CHECK: 0x[[ENUM]]: DW_TAG_enumeration_type -; CHECK-NEXT: DW_AT_name {{.*}}"EA" +; CHECK: DW_AT_type [DW_FORM_ref4] (cu + 0x{{.*}} => {0x[[ENUM]]} ; CHECK: DW_TAG_compile_unit ; CHECK: DW_TAG_subprogram diff --git a/llvm/test/DebugInfo/Generic/import-inlined-declaration.ll b/llvm/test/DebugInfo/Generic/import-inlined-declaration.ll deleted file mode 100644 index 471526b1395c..000000000000 --- a/llvm/test/DebugInfo/Generic/import-inlined-declaration.ll +++ /dev/null @@ -1,72 +0,0 @@ -; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump - | FileCheck --implicit-check-not "{{DW_TAG|NULL}}" %s - -; namespace ns { -; inline __attribute__((always_inline)) -; void foo() { int a = 4; } -; } -; -; void goo() { -; using ns::foo; -; foo(); -; } - -; Ensure that imported declarations reference the correct subprograms even if -; those subprograms are inlined. - -; CHECK: DW_TAG_compile_unit -; CHECK: DW_TAG_namespace -; CHECK: DW_AT_name ("ns") -; CHECK: [[FOO:0x.*]]: DW_TAG_subprogram -; CHECK: DW_AT_name ("foo") -; CHECK: DW_TAG_variable -; CHECK: NULL -; CHECK: NULL -; CHECK: DW_TAG_base_type -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("goo") -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_AT_abstract_origin ([[FOO]] -; CHECK: DW_TAG_variable -; CHECK: NULL -; CHECK: DW_TAG_imported_declaration -; CHECK: DW_AT_import ([[FOO]]) -; CHECK: NULL -; CHECK: NULL - -; Function Attrs: mustprogress noinline optnone uwtable -define dso_local void @_Z3goov() !dbg !4 { -entry: - %a.i = alloca i32, align 4 - call void @llvm.dbg.declare(metadata i32* %a.i, metadata !16, metadata !DIExpression()), !dbg !18 - store i32 4, i32* %a.i, align 4, !dbg !18 - ret void, !dbg !20 -} - -; Function Attrs: nofree nosync nounwind readnone speculatable willreturn -declare void @llvm.dbg.declare(metadata, metadata, metadata) - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!10, !11, !12, !13, !14} -!llvm.ident = !{!15} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, imports: !2, splitDebugInlining: false, nameTableKind: None) -!1 = !DIFile(filename: "imported-inlined-declaration.cpp", directory: "") -!2 = !{!3} -!3 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !4, entity: !8, file: !1, line: 7) -!4 = distinct !DISubprogram(name: "goo", linkageName: "_Z3goov", scope: !1, file: !1, line: 6, type: !5, scopeLine: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !7) -!5 = !DISubroutineType(types: !6) -!6 = !{null} -!7 = !{} -!8 = distinct !DISubprogram(name: "foo", linkageName: "_ZN2ns3fooEv", scope: !9, file: !1, line: 3, type: !5, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !7) -!9 = !DINamespace(name: "ns", scope: null) -!10 = !{i32 7, !"Dwarf Version", i32 4} -!11 = !{i32 2, !"Debug Info Version", i32 3} -!12 = !{i32 1, !"wchar_size", i32 4} -!13 = !{i32 7, !"uwtable", i32 1} -!14 = !{i32 7, !"frame-pointer", i32 2} -!15 = !{!"clang version 14.0.0"} -!16 = !DILocalVariable(name: "a", scope: !8, file: !1, line: 3, type: !17) -!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!18 = !DILocation(line: 3, column: 18, scope: !8, inlinedAt: !19) -!19 = distinct !DILocation(line: 8, column: 2, scope: !4) -!20 = !DILocation(line: 9, column: 1, scope: !4) diff --git a/llvm/test/DebugInfo/Generic/imported-name-inlined.ll b/llvm/test/DebugInfo/Generic/imported-name-inlined.ll index a26b687b1f67..a3aaaff66f85 100644 --- a/llvm/test/DebugInfo/Generic/imported-name-inlined.ll +++ b/llvm/test/DebugInfo/Generic/imported-name-inlined.ll @@ -13,17 +13,21 @@ ; Ensure that top level imported declarations don't produce an extra degenerate ; concrete subprogram definition. +; FIXME: imported entities should only be emitted to the abstract origin if one is present + ; CHECK: DW_TAG_compile_unit ; CHECK: DW_TAG_subprogram ; CHECK: DW_AT_name ("f1") ; CHECK: DW_TAG_imported_declaration ; CHECK: NULL +; CHECK: DW_TAG_namespace +; CHECK: DW_TAG_subprogram +; CHECK: NULL ; CHECK: DW_TAG_subprogram ; CHECK: DW_AT_name ("f2") ; CHECK: DW_TAG_inlined_subroutine -; CHECK: NULL -; CHECK: DW_TAG_namespace -; CHECK: DW_TAG_subprogram +; CHECK: DW_TAG_imported_declaration +; CHECK: NULL ; CHECK: NULL ; CHECK: NULL diff --git a/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc.ll b/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc.ll index 2d708b5cb21a..df78a937ed12 100644 --- a/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc.ll +++ b/llvm/test/DebugInfo/Generic/incorrect-variable-debugloc.ll @@ -34,9 +34,6 @@ ; b.m_fn2(); ; } -; CHECK: DW_TAG_structure_type -; CHECK-NEXT: DW_AT_name ("B") - ; CHECK: DW_TAG_structure_type ; CHECK-NEXT: DW_AT_name ("C") ; CHECK: [[M_FN3_DECL:.*]]: DW_TAG_subprogram diff --git a/llvm/test/DebugInfo/Generic/inlined-local-type.ll b/llvm/test/DebugInfo/Generic/inlined-local-type.ll deleted file mode 100644 index b3f3f80c07d2..000000000000 --- a/llvm/test/DebugInfo/Generic/inlined-local-type.ll +++ /dev/null @@ -1,125 +0,0 @@ -; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-info - | FileCheck --implicit-check-not "{{DW_TAG|NULL}}" %s - -; inline __attribute__((always_inline)) -; int removed() { struct A {int i;}; struct A a; return a.i++; } -; -; __attribute__((always_inline)) -; int not_removed() { struct B {int i;}; struct B b; return b.i++; } -; -; int foo() { return removed() + not_removed(); }} - -; Ensure that function-local types have the correct subprogram parent even if -; those subprograms are inlined. - -; CHECK: DW_TAG_compile_unit -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_abstract_origin ({{0x.*}} "not_removed") -; CHECK: DW_TAG_variable -; CHECK: NULL -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("removed") -; CHECK: [[A:0x.*]]: DW_TAG_structure_type -; CHECK: DW_AT_name ("A") -; CHECK: DW_TAG_member -; CHECK: NULL -; CHECK: DW_TAG_variable -; CHECK: DW_AT_type ([[A]] "A") -; CHECK: NULL -; CHECK: DW_TAG_base_type -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("not_removed") -; CHECK: [[B:0x.*]]: DW_TAG_structure_type -; CHECK: DW_AT_name ("B") -; CHECK: DW_TAG_member -; CHECK: NULL -; CHECK: DW_TAG_variable -; CHECK: DW_AT_type ([[B]] "B") -; CHECK: NULL -; CHECK: DW_TAG_subprogram -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_TAG_variable -; CHECK: NULL -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_TAG_variable -; CHECK: NULL -; CHECK: NULL -; CHECK: NULL - -%struct.B = type { i32 } -%struct.A = type { i32 } - -define dso_local i32 @not_removed() !dbg !12 { - %1 = alloca %struct.B, align 4 - call void @llvm.dbg.declare(metadata %struct.B* %1, metadata !18, metadata !DIExpression()), !dbg !22 - %2 = getelementptr inbounds %struct.B, %struct.B* %1, i32 0, i32 0, !dbg !23 - %3 = load i32, i32* %2, align 4, !dbg !24 - %4 = add nsw i32 %3, 1, !dbg !24 - store i32 %4, i32* %2, align 4, !dbg !24 - ret i32 %3, !dbg !25 -} - -declare void @llvm.dbg.declare(metadata, metadata, metadata) - -define dso_local i32 @foo() !dbg !26 { - %1 = alloca %struct.A, align 4 - %2 = alloca %struct.B, align 4 - call void @llvm.dbg.declare(metadata %struct.A* %1, metadata !27, metadata !DIExpression()), !dbg !32 - %3 = getelementptr inbounds %struct.A, %struct.A* %1, i32 0, i32 0, !dbg !34 - %4 = load i32, i32* %3, align 4, !dbg !35 - %5 = add nsw i32 %4, 1, !dbg !35 - store i32 %5, i32* %3, align 4, !dbg !35 - call void @llvm.dbg.declare(metadata %struct.B* %2, metadata !18, metadata !DIExpression()), !dbg !36 - %6 = getelementptr inbounds %struct.B, %struct.B* %2, i32 0, i32 0, !dbg !38 - %7 = load i32, i32* %6, align 4, !dbg !39 - %8 = add nsw i32 %7, 1, !dbg !39 - store i32 %8, i32* %6, align 4, !dbg !39 - %9 = add nsw i32 %4, %7, !dbg !40 - ret i32 %9, !dbg !41 -} - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8, !9, !10} -!llvm.ident = !{!11} - -!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) -!1 = !DIFile(filename: "inlined-local-type.cpp", directory: "") -!2 = !{i32 7, !"Dwarf Version", i32 4} -!3 = !{i32 2, !"Debug Info Version", i32 3} -!4 = !{i32 1, !"wchar_size", i32 4} -!5 = !{i32 1, !"branch-target-enforcement", i32 0} -!6 = !{i32 1, !"sign-return-address", i32 0} -!7 = !{i32 1, !"sign-return-address-all", i32 0} -!8 = !{i32 1, !"sign-return-address-with-bkey", i32 0} -!9 = !{i32 7, !"uwtable", i32 1} -!10 = !{i32 7, !"frame-pointer", i32 1} -!11 = !{!"clang version 14.0.0"} -!12 = distinct !DISubprogram(name: "not_removed", scope: !13, file: !13, line: 5, type: !14, scopeLine: 5, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !17) -!13 = !DIFile(filename: "inlined-local-type.cpp", directory: "") -!14 = !DISubroutineType(types: !15) -!15 = !{!16} -!16 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!17 = !{} -!18 = !DILocalVariable(name: "b", scope: !12, file: !13, line: 5, type: !19) -!19 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "B", scope: !12, file: !13, line: 5, size: 32, elements: !20) -!20 = !{!21} -!21 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !19, file: !13, line: 5, baseType: !16, size: 32) -!22 = !DILocation(line: 5, column: 49, scope: !12) -!23 = !DILocation(line: 5, column: 61, scope: !12) -!24 = !DILocation(line: 5, column: 62, scope: !12) -!25 = !DILocation(line: 5, column: 52, scope: !12) -!26 = distinct !DISubprogram(name: "foo", scope: !13, file: !13, line: 7, type: !14, scopeLine: 7, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !17) -!27 = !DILocalVariable(name: "a", scope: !28, file: !13, line: 2, type: !29) -!28 = distinct !DISubprogram(name: "removed", scope: !13, file: !13, line: 2, type: !14, scopeLine: 2, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !17) -!29 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "A", scope: !28, file: !13, line: 2, size: 32, elements: !30) -!30 = !{!31} -!31 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !29, file: !13, line: 2, baseType: !16, size: 32) -!32 = !DILocation(line: 2, column: 45, scope: !28, inlinedAt: !33) -!33 = distinct !DILocation(line: 7, column: 20, scope: !26) -!34 = !DILocation(line: 2, column: 57, scope: !28, inlinedAt: !33) -!35 = !DILocation(line: 2, column: 58, scope: !28, inlinedAt: !33) -!36 = !DILocation(line: 5, column: 49, scope: !12, inlinedAt: !37) -!37 = distinct !DILocation(line: 7, column: 32, scope: !26) -!38 = !DILocation(line: 5, column: 61, scope: !12, inlinedAt: !37) -!39 = !DILocation(line: 5, column: 62, scope: !12, inlinedAt: !37) -!40 = !DILocation(line: 7, column: 30, scope: !26) -!41 = !DILocation(line: 7, column: 13, scope: !26) diff --git a/llvm/test/DebugInfo/Generic/inlined-static-var.ll b/llvm/test/DebugInfo/Generic/inlined-static-var.ll deleted file mode 100644 index 60c986fc2b64..000000000000 --- a/llvm/test/DebugInfo/Generic/inlined-static-var.ll +++ /dev/null @@ -1,92 +0,0 @@ -; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-info - | FileCheck --implicit-check-not "{{DW_TAG|NULL}}" %s - -; inline __attribute__((always_inline)) -; int removed() { static int A; return A++; } -; -; __attribute__((always_inline)) -; int not_removed() { static int B; return B++; } -; -; int foo() { return removed() + not_removed(); } - -; Ensure that global variables belong to the correct subprograms even if those -; subprograms are inlined. - -; CHECK: DW_TAG_compile_unit -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_abstract_origin {{.*}} "_Z11not_removedv" -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("removed") -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("A") -; CHECK: NULL -; CHECK: DW_TAG_base_type -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("not_removed") -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("B") -; CHECK: NULL -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("foo") -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_TAG_inlined_subroutine -; CHECK: NULL -; CHECK: NULL - -@_ZZ11not_removedvE1A = internal global i32 0, align 4, !dbg !0 -@_ZZ7removedvE1A = linkonce_odr dso_local global i32 0, align 4, !dbg !10 - -define dso_local i32 @_Z11not_removedv() !dbg !2 { - %1 = load i32, i32* @_ZZ11not_removedvE1A, align 4, !dbg !24 - %2 = add nsw i32 %1, 1, !dbg !24 - store i32 %2, i32* @_ZZ11not_removedvE1A, align 4, !dbg !24 - ret i32 %1, !dbg !25 -} - -define dso_local i32 @_Z3foov() !dbg !26 { - %1 = load i32, i32* @_ZZ7removedvE1A, align 4, !dbg !27 - %2 = add nsw i32 %1, 1, !dbg !27 - store i32 %2, i32* @_ZZ7removedvE1A, align 4, !dbg !27 - %3 = load i32, i32* @_ZZ11not_removedvE1A, align 4, !dbg !29 - %4 = add nsw i32 %3, 1, !dbg !29 - store i32 %4, i32* @_ZZ11not_removedvE1A, align 4, !dbg !29 - %5 = add nsw i32 %1, %3, !dbg !31 - ret i32 %5, !dbg !32 -} - -!llvm.dbg.cu = !{!7} -!llvm.module.flags = !{!14, !15, !16, !17, !18, !19, !20, !21, !22} -!llvm.ident = !{!23} - -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "B", scope: !2, file: !3, line: 5, type: !6, isLocal: true, isDefinition: true) -!2 = distinct !DISubprogram(name: "not_removed", linkageName: "_Z11not_removedv", scope: !3, file: !3, line: 5, type: !4, scopeLine: 5, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !7, retainedNodes: !13) -!3 = !DIFile(filename: "example.cpp", directory: "") -!4 = !DISubroutineType(types: !5) -!5 = !{!6} -!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!7 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !8, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !9, splitDebugInlining: false, nameTableKind: None) -!8 = !DIFile(filename: "example.cpp", directory: "") -!9 = !{!0, !10} -!10 = !DIGlobalVariableExpression(var: !11, expr: !DIExpression()) -!11 = distinct !DIGlobalVariable(name: "A", scope: !12, file: !3, line: 2, type: !6, isLocal: false, isDefinition: true) -!12 = distinct !DISubprogram(name: "removed", linkageName: "_Z7removedv", scope: !3, file: !3, line: 2, type: !4, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !7, retainedNodes: !13) -!13 = !{} -!14 = !{i32 7, !"Dwarf Version", i32 4} -!15 = !{i32 2, !"Debug Info Version", i32 3} -!16 = !{i32 1, !"wchar_size", i32 4} -!17 = !{i32 1, !"branch-target-enforcement", i32 0} -!18 = !{i32 1, !"sign-return-address", i32 0} -!19 = !{i32 1, !"sign-return-address-all", i32 0} -!20 = !{i32 1, !"sign-return-address-with-bkey", i32 0} -!21 = !{i32 7, !"uwtable", i32 1} -!22 = !{i32 7, !"frame-pointer", i32 1} -!23 = !{!"clang version 14.0.0"} -!24 = !DILocation(line: 5, column: 43, scope: !2) -!25 = !DILocation(line: 5, column: 35, scope: !2) -!26 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !3, file: !3, line: 7, type: !4, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !7, retainedNodes: !13) -!27 = !DILocation(line: 2, column: 39, scope: !12, inlinedAt: !28) -!28 = distinct !DILocation(line: 7, column: 20, scope: !26) -!29 = !DILocation(line: 5, column: 43, scope: !2, inlinedAt: !30) -!30 = distinct !DILocation(line: 7, column: 32, scope: !26) -!31 = !DILocation(line: 7, column: 30, scope: !26) -!32 = !DILocation(line: 7, column: 13, scope: !26) diff --git a/llvm/test/DebugInfo/Generic/lexical_block_static.ll b/llvm/test/DebugInfo/Generic/lexical_block_static.ll deleted file mode 100644 index 56327b27cdb1..000000000000 --- a/llvm/test/DebugInfo/Generic/lexical_block_static.ll +++ /dev/null @@ -1,141 +0,0 @@ -; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-info - | FileCheck --implicit-check-not "{{DW_TAG|NULL}}" %s - -; inline __attribute__((always_inline)) -; int removed() { -; { -; static int A; -; return A++; -; } -; } -; -; __attribute__((always_inline)) -; int not_removed() { -; { -; static int B; -; return B++; -; } -; } -; -; int foo() { -; { -; static int C; -; return ++C + removed() + not_removed(); -; } -; } - -; CHECK: DW_TAG_compile_unit - -; Out-of-line definition of `not_removed()`. -; The empty lexical block is created to match abstract origin. -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_abstract_origin {{.*}} "_Z11not_removedv" -; CHECK: DW_TAG_lexical_block -; CHECK: NULL - -; Abstract definition of `removed()` -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("removed") -; CHECK: DW_AT_inline (DW_INL_inlined) -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("A") -; CHECK: DW_AT_location -; CHECK: NULL -; CHECK: NULL -; CHECK: DW_TAG_base_type - -; Abstract definition of `not_removed()` -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("not_removed") -; CHECK: DW_AT_inline (DW_INL_inlined) -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("B") -; CHECK: DW_AT_location -; CHECK: NULL -; CHECK: NULL - -; Definition of foo(). -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("foo") -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_TAG_lexical_block -; CHECK: NULL -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_TAG_lexical_block -; CHECK: NULL -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("C") -; CHECK: DW_AT_location -; CHECK: NULL -; CHECK: NULL -; CHECK: NULL - -@_ZZ11not_removedvE1B = internal global i32 0, align 4, !dbg !0 -@_ZZ3foovE1C = internal global i32 0, align 4, !dbg !10 -@_ZZ7removedvE1A = linkonce_odr dso_local global i32 0, align 4, !dbg !15 - -define dso_local i32 @_Z11not_removedv() !dbg !4 { -entry: - %0 = load i32, i32* @_ZZ11not_removedvE1B, align 4, !dbg !25 - %inc = add nsw i32 %0, 1, !dbg !25 - store i32 %inc, i32* @_ZZ11not_removedvE1B, align 4, !dbg !25 - ret i32 %0, !dbg !26 -} - -define dso_local i32 @_Z3foov() !dbg !13 { -entry: - %0 = load i32, i32* @_ZZ3foovE1C, align 4, !dbg !27 - %inc = add nsw i32 %0, 1, !dbg !27 - store i32 %inc, i32* @_ZZ3foovE1C, align 4, !dbg !27 - %1 = load i32, i32* @_ZZ7removedvE1A, align 4, !dbg !28 - %inc.i3 = add nsw i32 %1, 1, !dbg !28 - store i32 %inc.i3, i32* @_ZZ7removedvE1A, align 4, !dbg !28 - %add = add nsw i32 %inc, %1, !dbg !30 - %2 = load i32, i32* @_ZZ11not_removedvE1B, align 4, !dbg !31 - %inc.i = add nsw i32 %2, 1, !dbg !31 - store i32 %inc.i, i32* @_ZZ11not_removedvE1B, align 4, !dbg !31 - %add2 = add nsw i32 %add, %2, !dbg !33 - ret i32 %add2, !dbg !34 -} - -!llvm.dbg.cu = !{!8} -!llvm.module.flags = !{!19, !20, !21, !22, !23} -!llvm.ident = !{!24} - -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "B", scope: !2, file: !3, line: 13, type: !7, isLocal: true, isDefinition: true) -!2 = distinct !DILexicalBlock(scope: !4, file: !3, line: 12, column: 3) -!3 = !DIFile(filename: "test_static.cpp", directory: "/") -!4 = distinct !DISubprogram(name: "not_removed", linkageName: "_Z11not_removedv", scope: !3, file: !3, line: 11, type: !5, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !8, retainedNodes: !14) -!5 = !DISubroutineType(types: !6) -!6 = !{!7} -!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!8 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 14.0.0 (git@github.com:llvm/llvm-project.git e1d09ac2d118825452bfc26e44565f7f4122fd6d)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !9, splitDebugInlining: false, nameTableKind: None) -!9 = !{!0, !10, !15} -!10 = !DIGlobalVariableExpression(var: !11, expr: !DIExpression()) -!11 = distinct !DIGlobalVariable(name: "C", scope: !12, file: !3, line: 20, type: !7, isLocal: true, isDefinition: true) -!12 = distinct !DILexicalBlock(scope: !13, file: !3, line: 19, column: 3) -!13 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !3, file: !3, line: 18, type: !5, scopeLine: 18, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !8, retainedNodes: !14) -!14 = !{} -!15 = !DIGlobalVariableExpression(var: !16, expr: !DIExpression()) -!16 = distinct !DIGlobalVariable(name: "A", scope: !17, file: !3, line: 5, type: !7, isLocal: false, isDefinition: true) -!17 = distinct !DILexicalBlock(scope: !18, file: !3, line: 4, column: 3) -!18 = distinct !DISubprogram(name: "removed", linkageName: "_Z7removedv", scope: !3, file: !3, line: 3, type: !5, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !8, retainedNodes: !14) -!19 = !{i32 7, !"Dwarf Version", i32 4} -!20 = !{i32 2, !"Debug Info Version", i32 3} -!21 = !{i32 1, !"wchar_size", i32 4} -!22 = !{i32 7, !"uwtable", i32 1} -!23 = !{i32 7, !"frame-pointer", i32 2} -!24 = !{!"clang version 14.0.0 (git@github.com:llvm/llvm-project.git)"} -!25 = !DILocation(line: 14, column: 13, scope: !2) -!26 = !DILocation(line: 14, column: 5, scope: !2) -!27 = !DILocation(line: 21, column: 12, scope: !12) -!28 = !DILocation(line: 6, column: 13, scope: !17, inlinedAt: !29) -!29 = distinct !DILocation(line: 21, column: 18, scope: !12) -!30 = !DILocation(line: 21, column: 16, scope: !12) -!31 = !DILocation(line: 14, column: 13, scope: !2, inlinedAt: !32) -!32 = distinct !DILocation(line: 21, column: 30, scope: !12) -!33 = !DILocation(line: 21, column: 28, scope: !12) -!34 = !DILocation(line: 21, column: 5, scope: !12) diff --git a/llvm/test/DebugInfo/Generic/lexical_block_types.ll b/llvm/test/DebugInfo/Generic/lexical_block_types.ll deleted file mode 100644 index 1d4e9e7eb3e7..000000000000 --- a/llvm/test/DebugInfo/Generic/lexical_block_types.ll +++ /dev/null @@ -1,421 +0,0 @@ -; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-info - | FileCheck --implicit-check-not "{{DW_TAG|NULL}}" %s - -; inline __attribute__((always_inline)) -; void removed() { -; struct A1 { int i; }; -; typedef int Int1; -; { -; struct I1 { Int1 j; }; -; struct C1 { typedef char Char1; Char1 c; }; -; A1 a1; a1.i++; -; { -; I1 i1; i1.j++; -; C1 c1; c1.c++; -; } -; } -; } -; -; __attribute__((always_inline)) -; void not_removed() { -; struct A2 { int i; }; -; typedef int Int2; -; { -; struct I2 { Int2 j; }; -; struct C2 { typedef char Char2; Char2 c; }; -; A2 a2; a2.i++; -; { -; I2 i2; i2.j++; -; C2 c2; c2.c++; -; } -; } -; } -; -; void foo() { -; struct A3 { int i; }; -; typedef int Int3; -; { -; struct I3 { Int3 j; }; -; { -; struct C3 { typedef char Char3; Char3 c; }; -; A3 a3; a3.i++; -; { -; I3 i3; i3.j++; -; C3 c3; c3.c++; -; } -; } -; } -; removed(); -; not_removed(); -; } -; -; CHECK: DW_TAG_compile_unit - -; Out-of-line definition of `not_removed()` shouldn't contain any debug info for types. -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_abstract_origin {{.*}} "_Z11not_removedv" -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_abstract_origin {{.*}} "a2" -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_abstract_origin {{.*}} "i2" -; CHECK: DW_TAG_variable -; CHECK: DW_AT_abstract_origin {{.*}} "c2" -; CHECK: NULL -; CHECK: NULL -; CHECK: NULL - -; Abstract definition of `removed()`. -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("removed") -; CHECK: DW_AT_inline (DW_INL_inlined) - -; I1 and C1 defined in the first lexical block, typedef Char1 is a child of C1. -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("a1") -; CHECK: DW_AT_type {{.*}} "A1" -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_type {{.*}} "I1" -; CHECK: DW_TAG_variable -; CHECK: DW_AT_type {{.*}} "C1" -; CHECK: NULL -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("I1") -; CHECK: DW_TAG_member -; CHECK: DW_AT_type {{.*}} "Int1" -; CHECK: NULL -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("C1") -; CHECK: DW_TAG_member -; CHECK: DW_AT_type {{.*}} "C1::Char1" -; CHECK: DW_TAG_typedef -; CHECK: DW_AT_name ("Char1") -; CHECK: NULL -; CHECK: NULL - -; A1 and typedef Int1 defined in subprogram scope. -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("A1") -; CHECK: DW_TAG_member -; CHECK: NULL -; CHECK: DW_TAG_typedef -; CHECK: DW_AT_name ("Int1") -; CHECK: NULL - -; CHECK: DW_TAG_base_type -; CHECK: DW_TAG_base_type - -; Abstract definition of `not_removed()`. -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("not_removed") -; CHECK: DW_AT_inline (DW_INL_inlined) - -; I2 and C2 defined in the first lexical block, typedef Char2 is a child of C2. -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("a2") -; CHECK: DW_AT_type {{.*}} "A2" -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("i2") -; CHECK: DW_AT_type {{.*}} "I2" -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("c2") -; CHECK: DW_AT_type {{.*}} "C2" -; CHECK: NULL -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("I2") -; CHECK: DW_TAG_member -; CHECK: DW_AT_type {{.*}} "Int2" -; CHECK: NULL -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("C2") -; CHECK: DW_TAG_member -; CHECK: DW_AT_type {{.*}} "C2::Char2" -; CHECK: DW_TAG_typedef -; CHECK: DW_AT_name ("Char2") -; CHECK: NULL -; CHECK: NULL - -; A2 and typedef Int2 defined in subprogram scope. -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("A2") -; CHECK: DW_TAG_member -; CHECK: NULL -; CHECK: DW_TAG_typedef -; CHECK: DW_AT_name ("Int2") -; CHECK: NULL - -; Definition of `foo()`. -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name ("foo") - -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("a3") -; CHECK: DW_AT_type {{.*}} "A3" -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("i3") -; CHECK: DW_AT_type {{.*}} "I3" -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name ("c3") -; CHECK: DW_AT_type {{.*}} "C3" -; CHECK: NULL - -; C3 has the inner lexical block scope, typedef Char3 is a child of C3. -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("C3") -; CHECK: DW_TAG_member -; CHECK: DW_AT_type {{.*}} "C3::Char3" -; CHECK: DW_TAG_typedef -; CHECK: DW_AT_name ("Char3") -; CHECK: NULL -; CHECK: NULL - -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_AT_abstract_origin {{.*}} "_Z7removedv" -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_variable -; CHECK: NULL -; CHECK: NULL -; CHECK: NULL - -; CHECK: DW_TAG_inlined_subroutine -; CHECK: DW_AT_abstract_origin {{.*}} "_Z11not_removedv" -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_variable -; CHECK: NULL -; CHECK: NULL -; CHECK: NULL - -; A3 and typedef Int3 defined in the subprogram scope. -; FIXME: I3 has subprogram scope here, but should be in the outer lexical block -; (which is ommitted). It wasn't placed correctly, because it's the only non-scope -; entity in the block and it isn't listed in retainedTypes; we simply wasn't aware -; about it while deciding whether to create a lexical block or not. -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("A3") -; CHECK: DW_TAG_member -; CHECK: NULL -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name ("I3") -; CHECK: DW_TAG_member -; CHECK: DW_AT_type {{.*}} "Int3" -; CHECK: NULL -; CHECK: DW_TAG_typedef -; CHECK: DW_AT_name ("Int3") -; CHECK: NULL -; CHECK: NULL - -%struct.A2 = type { i32 } -%struct.I2 = type { i32 } -%struct.C2 = type { i8 } -%struct.A1 = type { i32 } -%struct.I1 = type { i32 } -%struct.C1 = type { i8 } -%struct.A3 = type { i32 } -%struct.I3 = type { i32 } -%struct.C3 = type { i8 } - -define dso_local void @_Z11not_removedv() !dbg !8 { -entry: - %a2 = alloca %struct.A2, align 4 - %i2 = alloca %struct.I2, align 4 - %c2 = alloca %struct.C2, align 1 - call void @llvm.dbg.declare(metadata %struct.A2* %a2, metadata !12, metadata !DIExpression()), !dbg !18 - %i = getelementptr inbounds %struct.A2, %struct.A2* %a2, i32 0, i32 0, !dbg !19 - %0 = load i32, i32* %i, align 4, !dbg !20 - %inc = add nsw i32 %0, 1, !dbg !20 - store i32 %inc, i32* %i, align 4, !dbg !20 - call void @llvm.dbg.declare(metadata %struct.I2* %i2, metadata !21, metadata !DIExpression()), !dbg !27 - %j = getelementptr inbounds %struct.I2, %struct.I2* %i2, i32 0, i32 0, !dbg !28 - %1 = load i32, i32* %j, align 4, !dbg !29 - %inc1 = add nsw i32 %1, 1, !dbg !29 - store i32 %inc1, i32* %j, align 4, !dbg !29 - call void @llvm.dbg.declare(metadata %struct.C2* %c2, metadata !30, metadata !DIExpression()), !dbg !36 - %c = getelementptr inbounds %struct.C2, %struct.C2* %c2, i32 0, i32 0, !dbg !37 - %2 = load i8, i8* %c, align 1, !dbg !38 - %inc2 = add i8 %2, 1, !dbg !38 - store i8 %inc2, i8* %c, align 1, !dbg !38 - ret void, !dbg !39 -} - -declare void @llvm.dbg.declare(metadata, metadata, metadata) - -define dso_local void @_Z3foov() !dbg !40 { -entry: - %a1.i = alloca %struct.A1, align 4 - %i1.i = alloca %struct.I1, align 4 - %c1.i = alloca %struct.C1, align 1 - %a2.i = alloca %struct.A2, align 4 - %i2.i = alloca %struct.I2, align 4 - %c2.i = alloca %struct.C2, align 1 - %a3 = alloca %struct.A3, align 4 - %i3 = alloca %struct.I3, align 4 - %c3 = alloca %struct.C3, align 1 - call void @llvm.dbg.declare(metadata %struct.A3* %a3, metadata !41, metadata !DIExpression()), !dbg !47 - %i = getelementptr inbounds %struct.A3, %struct.A3* %a3, i32 0, i32 0, !dbg !48 - %0 = load i32, i32* %i, align 4, !dbg !49 - %inc = add nsw i32 %0, 1, !dbg !49 - store i32 %inc, i32* %i, align 4, !dbg !49 - call void @llvm.dbg.declare(metadata %struct.I3* %i3, metadata !50, metadata !DIExpression()), !dbg !56 - %j = getelementptr inbounds %struct.I3, %struct.I3* %i3, i32 0, i32 0, !dbg !57 - %1 = load i32, i32* %j, align 4, !dbg !58 - %inc1 = add nsw i32 %1, 1, !dbg !58 - store i32 %inc1, i32* %j, align 4, !dbg !58 - call void @llvm.dbg.declare(metadata %struct.C3* %c3, metadata !59, metadata !DIExpression()), !dbg !64 - %c = getelementptr inbounds %struct.C3, %struct.C3* %c3, i32 0, i32 0, !dbg !65 - %2 = load i8, i8* %c, align 1, !dbg !66 - %inc2 = add i8 %2, 1, !dbg !66 - store i8 %inc2, i8* %c, align 1, !dbg !66 - call void @llvm.dbg.declare(metadata %struct.A1* %a1.i, metadata !67, metadata !DIExpression()), !dbg !73 - %i.i3 = getelementptr inbounds %struct.A1, %struct.A1* %a1.i, i32 0, i32 0, !dbg !75 - %3 = load i32, i32* %i.i3, align 4, !dbg !76 - %inc.i4 = add nsw i32 %3, 1, !dbg !76 - store i32 %inc.i4, i32* %i.i3, align 4, !dbg !76 - call void @llvm.dbg.declare(metadata %struct.I1* %i1.i, metadata !77, metadata !DIExpression()), !dbg !83 - %j.i5 = getelementptr inbounds %struct.I1, %struct.I1* %i1.i, i32 0, i32 0, !dbg !84 - %4 = load i32, i32* %j.i5, align 4, !dbg !85 - %inc1.i6 = add nsw i32 %4, 1, !dbg !85 - store i32 %inc1.i6, i32* %j.i5, align 4, !dbg !85 - call void @llvm.dbg.declare(metadata %struct.C1* %c1.i, metadata !86, metadata !DIExpression()), !dbg !91 - %c.i7 = getelementptr inbounds %struct.C1, %struct.C1* %c1.i, i32 0, i32 0, !dbg !92 - %5 = load i8, i8* %c.i7, align 1, !dbg !93 - %inc2.i8 = add i8 %5, 1, !dbg !93 - store i8 %inc2.i8, i8* %c.i7, align 1, !dbg !93 - call void @llvm.dbg.declare(metadata %struct.A2* %a2.i, metadata !12, metadata !DIExpression()), !dbg !94 - %i.i = getelementptr inbounds %struct.A2, %struct.A2* %a2.i, i32 0, i32 0, !dbg !96 - %6 = load i32, i32* %i.i, align 4, !dbg !97 - %inc.i = add nsw i32 %6, 1, !dbg !97 - store i32 %inc.i, i32* %i.i, align 4, !dbg !97 - call void @llvm.dbg.declare(metadata %struct.I2* %i2.i, metadata !21, metadata !DIExpression()), !dbg !98 - %j.i = getelementptr inbounds %struct.I2, %struct.I2* %i2.i, i32 0, i32 0, !dbg !99 - %7 = load i32, i32* %j.i, align 4, !dbg !100 - %inc1.i = add nsw i32 %7, 1, !dbg !100 - store i32 %inc1.i, i32* %j.i, align 4, !dbg !100 - call void @llvm.dbg.declare(metadata %struct.C2* %c2.i, metadata !30, metadata !DIExpression()), !dbg !101 - %c.i = getelementptr inbounds %struct.C2, %struct.C2* %c2.i, i32 0, i32 0, !dbg !102 - %8 = load i8, i8* %c.i, align 1, !dbg !103 - %inc2.i = add i8 %8, 1, !dbg !103 - store i8 %inc2.i, i8* %c.i, align 1, !dbg !103 - ret void, !dbg !104 -} - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!2, !3, !4, !5, !6} -!llvm.ident = !{!7} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) -!1 = !DIFile(filename: "test.cpp", directory: "/") -!2 = !{i32 7, !"Dwarf Version", i32 4} -!3 = !{i32 2, !"Debug Info Version", i32 3} -!4 = !{i32 1, !"wchar_size", i32 4} -!5 = !{i32 7, !"uwtable", i32 1} -!6 = !{i32 7, !"frame-pointer", i32 2} -!7 = !{!"clang version 14.0.0"} -!8 = distinct !DISubprogram(name: "not_removed", linkageName: "_Z11not_removedv", scope: !1, file: !1, line: 17, type: !9, scopeLine: 17, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !11) -!9 = !DISubroutineType(types: !10) -!10 = !{null} -!11 = !{} -!12 = !DILocalVariable(name: "a2", scope: !13, file: !1, line: 23, type: !14) -!13 = distinct !DILexicalBlock(scope: !8, file: !1, line: 20, column: 3) -!14 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "A2", scope: !8, file: !1, line: 18, size: 32, flags: DIFlagTypePassByValue, elements: !15) -!15 = !{!16} -!16 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !14, file: !1, line: 18, baseType: !17, size: 32) -!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!18 = !DILocation(line: 23, column: 8, scope: !13) -!19 = !DILocation(line: 23, column: 15, scope: !13) -!20 = !DILocation(line: 23, column: 16, scope: !13) -!21 = !DILocalVariable(name: "i2", scope: !22, file: !1, line: 25, type: !23) -!22 = distinct !DILexicalBlock(scope: !13, file: !1, line: 24, column: 5) -!23 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "I2", scope: !13, file: !1, line: 21, size: 32, flags: DIFlagTypePassByValue, elements: !24) -!24 = !{!25} -!25 = !DIDerivedType(tag: DW_TAG_member, name: "j", scope: !23, file: !1, line: 21, baseType: !26, size: 32) -!26 = !DIDerivedType(tag: DW_TAG_typedef, name: "Int2", scope: !8, file: !1, line: 19, baseType: !17) -!27 = !DILocation(line: 25, column: 10, scope: !22) -!28 = !DILocation(line: 25, column: 17, scope: !22) -!29 = !DILocation(line: 25, column: 18, scope: !22) -!30 = !DILocalVariable(name: "c2", scope: !22, file: !1, line: 26, type: !31) -!31 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "C2", scope: !13, file: !1, line: 22, size: 8, flags: DIFlagTypePassByValue, elements: !32) -!32 = !{!33} -!33 = !DIDerivedType(tag: DW_TAG_member, name: "c", scope: !31, file: !1, line: 22, baseType: !34, size: 8) -!34 = !DIDerivedType(tag: DW_TAG_typedef, name: "Char2", scope: !31, file: !1, line: 22, baseType: !35) -!35 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) -!36 = !DILocation(line: 26, column: 10, scope: !22) -!37 = !DILocation(line: 26, column: 17, scope: !22) -!38 = !DILocation(line: 26, column: 18, scope: !22) -!39 = !DILocation(line: 29, column: 1, scope: !8) -!40 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 31, type: !9, scopeLine: 31, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !11) -!41 = !DILocalVariable(name: "a3", scope: !42, file: !1, line: 38, type: !44) -!42 = distinct !DILexicalBlock(scope: !43, file: !1, line: 36, column: 5) -!43 = distinct !DILexicalBlock(scope: !40, file: !1, line: 34, column: 3) -!44 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "A3", scope: !40, file: !1, line: 32, size: 32, flags: DIFlagTypePassByValue, elements: !45) -!45 = !{!46} -!46 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !44, file: !1, line: 32, baseType: !17, size: 32) -!47 = !DILocation(line: 38, column: 10, scope: !42) -!48 = !DILocation(line: 38, column: 17, scope: !42) -!49 = !DILocation(line: 38, column: 18, scope: !42) -!50 = !DILocalVariable(name: "i3", scope: !51, file: !1, line: 40, type: !52) -!51 = distinct !DILexicalBlock(scope: !42, file: !1, line: 39, column: 7) -!52 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "I3", scope: !43, file: !1, line: 35, size: 32, flags: DIFlagTypePassByValue, elements: !53) -!53 = !{!54} -!54 = !DIDerivedType(tag: DW_TAG_member, name: "j", scope: !52, file: !1, line: 35, baseType: !55, size: 32) -!55 = !DIDerivedType(tag: DW_TAG_typedef, name: "Int3", scope: !40, file: !1, line: 33, baseType: !17) -!56 = !DILocation(line: 40, column: 12, scope: !51) -!57 = !DILocation(line: 40, column: 19, scope: !51) -!58 = !DILocation(line: 40, column: 20, scope: !51) -!59 = !DILocalVariable(name: "c3", scope: !51, file: !1, line: 41, type: !60) -!60 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "C3", scope: !42, file: !1, line: 37, size: 8, flags: DIFlagTypePassByValue, elements: !61) -!61 = !{!62} -!62 = !DIDerivedType(tag: DW_TAG_member, name: "c", scope: !60, file: !1, line: 37, baseType: !63, size: 8) -!63 = !DIDerivedType(tag: DW_TAG_typedef, name: "Char3", scope: !60, file: !1, line: 37, baseType: !35) -!64 = !DILocation(line: 41, column: 12, scope: !51) -!65 = !DILocation(line: 41, column: 19, scope: !51) -!66 = !DILocation(line: 41, column: 20, scope: !51) -!67 = !DILocalVariable(name: "a1", scope: !68, file: !1, line: 8, type: !70) -!68 = distinct !DILexicalBlock(scope: !69, file: !1, line: 5, column: 3) -!69 = distinct !DISubprogram(name: "removed", linkageName: "_Z7removedv", scope: !1, file: !1, line: 2, type: !9, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !11) -!70 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "A1", scope: !69, file: !1, line: 3, size: 32, flags: DIFlagTypePassByValue, elements: !71, identifier: "_ZTSZ7removedvE2A1") -!71 = !{!72} -!72 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !70, file: !1, line: 3, baseType: !17, size: 32) -!73 = !DILocation(line: 8, column: 8, scope: !68, inlinedAt: !74) -!74 = distinct !DILocation(line: 45, column: 3, scope: !40) -!75 = !DILocation(line: 8, column: 15, scope: !68, inlinedAt: !74) -!76 = !DILocation(line: 8, column: 16, scope: !68, inlinedAt: !74) -!77 = !DILocalVariable(name: "i1", scope: !78, file: !1, line: 10, type: !79) -!78 = distinct !DILexicalBlock(scope: !68, file: !1, line: 9, column: 5) -!79 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "I1", scope: !68, file: !1, line: 6, size: 32, flags: DIFlagTypePassByValue, elements: !80, identifier: "_ZTSZ7removedvE2I1") -!80 = !{!81} -!81 = !DIDerivedType(tag: DW_TAG_member, name: "j", scope: !79, file: !1, line: 6, baseType: !82, size: 32) -!82 = !DIDerivedType(tag: DW_TAG_typedef, name: "Int1", scope: !69, file: !1, line: 4, baseType: !17) -!83 = !DILocation(line: 10, column: 10, scope: !78, inlinedAt: !74) -!84 = !DILocation(line: 10, column: 17, scope: !78, inlinedAt: !74) -!85 = !DILocation(line: 10, column: 18, scope: !78, inlinedAt: !74) -!86 = !DILocalVariable(name: "c1", scope: !78, file: !1, line: 11, type: !87) -!87 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "C1", scope: !68, file: !1, line: 7, size: 8, flags: DIFlagTypePassByValue, elements: !88, identifier: "_ZTSZ7removedvE2C1") -!88 = !{!89} -!89 = !DIDerivedType(tag: DW_TAG_member, name: "c", scope: !87, file: !1, line: 7, baseType: !90, size: 8) -!90 = !DIDerivedType(tag: DW_TAG_typedef, name: "Char1", scope: !87, file: !1, line: 7, baseType: !35) -!91 = !DILocation(line: 11, column: 10, scope: !78, inlinedAt: !74) -!92 = !DILocation(line: 11, column: 17, scope: !78, inlinedAt: !74) -!93 = !DILocation(line: 11, column: 18, scope: !78, inlinedAt: !74) -!94 = !DILocation(line: 23, column: 8, scope: !13, inlinedAt: !95) -!95 = distinct !DILocation(line: 46, column: 3, scope: !40) -!96 = !DILocation(line: 23, column: 15, scope: !13, inlinedAt: !95) -!97 = !DILocation(line: 23, column: 16, scope: !13, inlinedAt: !95) -!98 = !DILocation(line: 25, column: 10, scope: !22, inlinedAt: !95) -!99 = !DILocation(line: 25, column: 17, scope: !22, inlinedAt: !95) -!100 = !DILocation(line: 25, column: 18, scope: !22, inlinedAt: !95) -!101 = !DILocation(line: 26, column: 10, scope: !22, inlinedAt: !95) -!102 = !DILocation(line: 26, column: 17, scope: !22, inlinedAt: !95) -!103 = !DILocation(line: 26, column: 18, scope: !22, inlinedAt: !95) -!104 = !DILocation(line: 47, column: 1, scope: !40) diff --git a/llvm/test/DebugInfo/Generic/namespace.ll b/llvm/test/DebugInfo/Generic/namespace.ll index 7b9bf9b531b8..2c635ee5731a 100644 --- a/llvm/test/DebugInfo/Generic/namespace.ll +++ b/llvm/test/DebugInfo/Generic/namespace.ll @@ -13,19 +13,6 @@ ; CHECK-NOT: DW_AT_decl_file ; CHECK-NOT: DW_AT_decl_line -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_MIPS_linkage_name -; CHECK: DW_AT_name ("f1") -; CHECK: [[FUNC1:.*]]: DW_TAG_subprogram -; CHECK: DW_AT_MIPS_linkage_name -; CHECK: DW_AT_name ("f1") -; CHECK: DW_TAG_formal_parameter -; CHECK: NULL - -; CHECK: [[FUNC_FWD:0x[0-9a-f]*]]:{{.*}}DW_TAG_subprogram -; CHECK: DW_AT_name ("func_fwd") -; CHECK-NOT: DW_AT_declaration - ; CHECK: [[I:0x[0-9a-f]*]]:{{ *}}DW_TAG_variable ; CHECK: DW_AT_name ("i") ; CHECK: [[VAR_FWD:0x[0-9a-f]*]]:{{ *}}DW_TAG_variable @@ -37,6 +24,15 @@ ; CHECK: [[BAR:0x[0-9a-f]*]]:{{ *}}DW_TAG_structure_type ; CHECK: DW_AT_name ("bar") +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_MIPS_linkage_name +; CHECK: DW_AT_name ("f1") +; CHECK: [[FUNC1:.*]]: DW_TAG_subprogram +; CHECK: DW_AT_MIPS_linkage_name +; CHECK: DW_AT_name ("f1") +; CHECK: DW_TAG_formal_parameter +; CHECK: NULL + ; CHECK: [[BAZ:0x[0-9a-f]*]]:{{.*}}DW_TAG_typedef ; CHECK: DW_AT_name ("baz") @@ -47,6 +43,10 @@ ; CHECK: [[FUNC_DECL:0x[0-9a-f]*]]:{{.*}}DW_TAG_subprogram ; CHECK: DW_AT_name ("func_decl") ; CHECK: DW_AT_declaration + +; CHECK: [[FUNC_FWD:0x[0-9a-f]*]]:{{.*}}DW_TAG_subprogram +; CHECK: DW_AT_name ("func_fwd") +; CHECK-NOT: DW_AT_declaration ; CHECK: NULL ; CHECK: DW_TAG_imported_module @@ -56,17 +56,18 @@ ; CHECK: DW_TAG_imported_declaration ; CHECK: NULL +; CHECK: DW_TAG_base_type +; CHECK: DW_TAG_imported_module +; CHECK: DW_AT_decl_file ([[F2:.*]]) +; CHECK: DW_AT_decl_line (18) +; CHECK: DW_AT_import ([[NS1]]) +; CHECK: DW_TAG_imported_declaration + ; CHECK: DW_TAG_subprogram ; CHECK: DW_TAG_subprogram ; CHECK: DW_AT_MIPS_linkage_name ; CHECK: DW_AT_name ("func") ; CHECK: DW_TAG_formal_parameter -; CHECK: DW_TAG_lexical_block -; CHECK: DW_TAG_imported_module -; CHECK: DW_AT_decl_file ([[F2]]) -; CHECK: DW_AT_decl_line (23) -; CHECK: DW_AT_import {{.*}} -; CHECK: NULL ; CHECK: DW_TAG_imported_module ; CHECK: DW_AT_decl_file ([[F2:.*]]) ; CHECK: DW_AT_decl_line (26) @@ -117,16 +118,16 @@ ; CHECK: DW_AT_decl_file ([[F2]]) ; CHECK: DW_AT_decl_line (37) ; CHECK: DW_AT_import ([[FUNC_FWD]]) +; CHECK: DW_TAG_lexical_block +; CHECK: DW_TAG_imported_module +; CHECK: DW_AT_decl_file ([[F2]]) +; CHECK: DW_AT_decl_line (23) +; CHECK: DW_AT_import {{.*}} +; CHECK: NULL ; CHECK: NULL ; CHECK: DW_TAG_subprogram ; CHECK: DW_TAG_base_type -; CHECK: DW_TAG_imported_module -; CHECK: DW_AT_decl_file ([[F2:.*]]) -; CHECK: DW_AT_decl_line (18) -; CHECK: DW_AT_import ([[NS1]]) -; CHECK: DW_TAG_imported_declaration -; CHECK: DW_TAG_base_type ; CHECK: NULL ; IR generated from clang/test/CodeGenCXX/debug-info-namespace.cpp, file paths diff --git a/llvm/test/DebugInfo/Generic/varargs.ll b/llvm/test/DebugInfo/Generic/varargs.ll index 99acaff77671..81b5091afd42 100644 --- a/llvm/test/DebugInfo/Generic/varargs.ll +++ b/llvm/test/DebugInfo/Generic/varargs.ll @@ -12,23 +12,23 @@ ; ; CHECK: DW_TAG_subprogram ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_name ("b") +; CHECK: DW_AT_name ("a") ; CHECK-NOT: DW_TAG ; CHECK: DW_TAG_formal_parameter ; CHECK-NOT: DW_TAG -; CHECK: DW_TAG_variable -; CHECK-NOT: DW_TAG -; CHECK: DW_TAG_variable +; CHECK: DW_TAG_formal_parameter ; CHECK-NOT: DW_TAG ; CHECK: DW_TAG_unspecified_parameters ; ; CHECK: DW_TAG_subprogram ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_name ("a") +; CHECK: DW_AT_name ("b") ; CHECK-NOT: DW_TAG ; CHECK: DW_TAG_formal_parameter ; CHECK-NOT: DW_TAG -; CHECK: DW_TAG_formal_parameter +; CHECK: DW_TAG_variable +; CHECK-NOT: DW_TAG +; CHECK: DW_TAG_variable ; CHECK-NOT: DW_TAG ; CHECK: DW_TAG_unspecified_parameters ; diff --git a/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir b/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir index 261da42a88d0..98ec9dc8fa41 100644 --- a/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir +++ b/llvm/test/DebugInfo/MIR/AArch64/implicit-def-dead-scope.mir @@ -5,7 +5,7 @@ # encountering an IMPLICIT_DEF in its own lexical scope. # CHECK: .debug_info contents: -# CHECK: DW_TAG_formal_parameter [13] +# CHECK: DW_TAG_formal_parameter [14] # CHECK-NEXT: DW_AT_const_value [DW_FORM_udata] (0) # CHECK-NEXT: DW_AT_abstract_origin {{.*}} "name" --- | diff --git a/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll b/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll index d2b7a31ec9f6..73ec34787ad6 100644 --- a/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll +++ b/llvm/test/DebugInfo/NVPTX/debug-addr-class.ll @@ -105,68 +105,68 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 2 // Abbreviation Code -; CHECK-NEXT:.b8 46 // DW_TAG_subprogram -; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes -; CHECK-NEXT:.b8 17 // DW_AT_low_pc -; CHECK-NEXT:.b8 1 // DW_FORM_addr -; CHECK-NEXT:.b8 18 // DW_AT_high_pc -; CHECK-NEXT:.b8 1 // DW_FORM_addr -; CHECK-NEXT:.b8 64 // DW_AT_frame_base -; CHECK-NEXT:.b8 10 // DW_FORM_block1 -; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 64 -; CHECK-NEXT:.b8 8 // DW_FORM_string +; CHECK-NEXT:.b8 52 // DW_TAG_variable +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string +; CHECK-NEXT:.b8 73 // DW_AT_type +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 63 // DW_AT_external +; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 63 // DW_AT_external -; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 51 // DW_AT_address_class +; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 2 // DW_AT_location +; CHECK-NEXT:.b8 10 // DW_FORM_block1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 3 // Abbreviation Code -; CHECK-NEXT:.b8 5 // DW_TAG_formal_parameter +; CHECK-NEXT:.b8 36 // DW_TAG_base_type ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string -; CHECK-NEXT:.b8 58 // DW_AT_decl_file +; CHECK-NEXT:.b8 62 // DW_AT_encoding ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 59 // DW_AT_decl_line +; CHECK-NEXT:.b8 11 // DW_AT_byte_size ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 73 // DW_AT_type -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 4 // Abbreviation Code -; CHECK-NEXT:.b8 52 // DW_TAG_variable -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 46 // DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 17 // DW_AT_low_pc +; CHECK-NEXT:.b8 1 // DW_FORM_addr +; CHECK-NEXT:.b8 18 // DW_AT_high_pc +; CHECK-NEXT:.b8 1 // DW_FORM_addr +; CHECK-NEXT:.b8 64 // DW_AT_frame_base +; CHECK-NEXT:.b8 10 // DW_FORM_block1 +; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 64 +; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string -; CHECK-NEXT:.b8 73 // DW_AT_type -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 63 // DW_AT_external -; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 51 // DW_AT_address_class -; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 2 // DW_AT_location -; CHECK-NEXT:.b8 10 // DW_FORM_block1 +; CHECK-NEXT:.b8 63 // DW_AT_external +; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 5 // Abbreviation Code -; CHECK-NEXT:.b8 36 // DW_TAG_base_type +; CHECK-NEXT:.b8 5 // DW_TAG_formal_parameter ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string -; CHECK-NEXT:.b8 62 // DW_AT_encoding +; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 11 // DW_AT_byte_size +; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 73 // DW_AT_type +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 6 // Abbreviation Code @@ -258,7 +258,46 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b64 Lfunc_begin0 // DW_AT_low_pc ; CHECK-NEXT:.b64 Lfunc_end0 // DW_AT_high_pc -; CHECK-NEXT:.b8 2 // Abbrev [2] 0x65:0x45 DW_TAG_subprogram +; CHECK-NEXT:.b8 2 // Abbrev [2] 0x65:0x1a DW_TAG_variable +; CHECK-NEXT:.b8 71 // DW_AT_name +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 79 +; CHECK-NEXT:.b8 66 +; CHECK-NEXT:.b8 65 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b32 127 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 3 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 // DW_AT_address_class +; CHECK-NEXT:.b8 9 // DW_AT_location +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b64 GLOBAL +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x7f:0x7 DW_TAG_base_type +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 5 // DW_AT_encoding +; CHECK-NEXT:.b8 4 // DW_AT_byte_size +; CHECK-NEXT:.b8 2 // Abbrev [2] 0x86:0x1a DW_TAG_variable +; CHECK-NEXT:.b8 83 // DW_AT_name +; CHECK-NEXT:.b8 72 +; CHECK-NEXT:.b8 65 +; CHECK-NEXT:.b8 82 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 68 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b32 127 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 4 // DW_AT_decl_line +; CHECK-NEXT:.b8 8 // DW_AT_address_class +; CHECK-NEXT:.b8 9 // DW_AT_location +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b64 SHARED +; CHECK-NEXT:.b8 4 // Abbrev [4] 0xa0:0x45 DW_TAG_subprogram ; CHECK-NEXT:.b64 Lfunc_begin0 // DW_AT_low_pc ; CHECK-NEXT:.b64 Lfunc_end0 // DW_AT_high_pc ; CHECK-NEXT:.b8 1 // DW_AT_frame_base @@ -276,71 +315,32 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x85:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xc0:0x9 DW_TAG_formal_parameter ; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line ; CHECK-NEXT:.b32 229 // DW_AT_type -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x8e:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xc9:0x9 DW_TAG_formal_parameter ; CHECK-NEXT:.b8 120 // DW_AT_name ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line ; CHECK-NEXT:.b32 238 // DW_AT_type -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x97:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xd2:0x9 DW_TAG_formal_parameter ; CHECK-NEXT:.b8 121 // DW_AT_name ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line ; CHECK-NEXT:.b32 238 // DW_AT_type -; CHECK-NEXT:.b8 3 // Abbrev [3] 0xa0:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xdb:0x9 DW_TAG_formal_parameter ; CHECK-NEXT:.b8 105 // DW_AT_name ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 6 // DW_AT_decl_line -; CHECK-NEXT:.b32 196 // DW_AT_type +; CHECK-NEXT:.b32 127 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 4 // Abbrev [4] 0xaa:0x1a DW_TAG_variable -; CHECK-NEXT:.b8 71 // DW_AT_name -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 79 -; CHECK-NEXT:.b8 66 -; CHECK-NEXT:.b8 65 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 196 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 3 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 // DW_AT_address_class -; CHECK-NEXT:.b8 9 // DW_AT_location -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b64 GLOBAL -; CHECK-NEXT:.b8 5 // Abbrev [5] 0xc4:0x7 DW_TAG_base_type -; CHECK-NEXT:.b8 105 // DW_AT_name -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_encoding -; CHECK-NEXT:.b8 4 // DW_AT_byte_size -; CHECK-NEXT:.b8 4 // Abbrev [4] 0xcb:0x1a DW_TAG_variable -; CHECK-NEXT:.b8 83 // DW_AT_name -; CHECK-NEXT:.b8 72 -; CHECK-NEXT:.b8 65 -; CHECK-NEXT:.b8 82 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 68 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 196 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 4 // DW_AT_decl_line -; CHECK-NEXT:.b8 8 // DW_AT_address_class -; CHECK-NEXT:.b8 9 // DW_AT_location -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b64 SHARED -; CHECK-NEXT:.b8 5 // Abbrev [5] 0xe5:0x9 DW_TAG_base_type +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xe5:0x9 DW_TAG_base_type ; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 diff --git a/llvm/test/DebugInfo/NVPTX/debug-info.ll b/llvm/test/DebugInfo/NVPTX/debug-info.ll index 7329e92337ec..2edd807a8d82 100644 --- a/llvm/test/DebugInfo/NVPTX/debug-info.ll +++ b/llvm/test/DebugInfo/NVPTX/debug-info.ll @@ -127,39 +127,35 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 2 // Abbreviation Code -; CHECK-NEXT:.b8 19 // DW_TAG_structure_type +; CHECK-NEXT:.b8 57 // DW_TAG_namespace ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string -; CHECK-NEXT:.b8 11 // DW_AT_byte_size -; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 0 // EOM(1) +; CHECK-NEXT:.b8 0 // EOM(2) +; CHECK-NEXT:.b8 3 // Abbreviation Code +; CHECK-NEXT:.b8 8 // DW_TAG_imported_declaration +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 24 // DW_AT_import +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 3 // Abbreviation Code -; CHECK-NEXT:.b8 46 // DW_TAG_subprogram +; CHECK-NEXT:.b8 4 // Abbreviation Code +; CHECK-NEXT:.b8 8 // DW_TAG_imported_declaration ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 64 -; CHECK-NEXT:.b8 8 // DW_FORM_string -; CHECK-NEXT:.b8 3 // DW_AT_name -; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 73 // DW_AT_type +; CHECK-NEXT:.b8 5 // DW_FORM_data2 +; CHECK-NEXT:.b8 24 // DW_AT_import ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 60 // DW_AT_declaration -; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 63 // DW_AT_external -; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 4 // Abbreviation Code +; CHECK-NEXT:.b8 5 // Abbreviation Code ; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes ; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name @@ -175,69 +171,43 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 60 // DW_AT_declaration ; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 63 // DW_AT_external -; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 5 // Abbreviation Code +; CHECK-NEXT:.b8 6 // Abbreviation Code ; CHECK-NEXT:.b8 5 // DW_TAG_formal_parameter ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 73 // DW_AT_type ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 52 // DW_AT_artificial -; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 6 // Abbreviation Code -; CHECK-NEXT:.b8 46 // DW_TAG_subprogram -; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 7 // Abbreviation Code +; CHECK-NEXT:.b8 36 // DW_TAG_base_type +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string -; CHECK-NEXT:.b8 58 // DW_AT_decl_file -; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 59 // DW_AT_decl_line +; CHECK-NEXT:.b8 62 // DW_AT_encoding ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 60 // DW_AT_declaration -; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 63 // DW_AT_external -; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 50 // DW_AT_accessibility +; CHECK-NEXT:.b8 11 // DW_AT_byte_size ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 7 // Abbreviation Code -; CHECK-NEXT:.b8 5 // DW_TAG_formal_parameter +; CHECK-NEXT:.b8 8 // Abbreviation Code +; CHECK-NEXT:.b8 15 // DW_TAG_pointer_type ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 73 // DW_AT_type ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 8 // Abbreviation Code -; CHECK-NEXT:.b8 46 // DW_TAG_subprogram -; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes -; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 64 -; CHECK-NEXT:.b8 8 // DW_FORM_string -; CHECK-NEXT:.b8 3 // DW_AT_name -; CHECK-NEXT:.b8 8 // DW_FORM_string -; CHECK-NEXT:.b8 58 // DW_AT_decl_file -; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 60 // DW_AT_declaration -; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 63 // DW_AT_external -; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 50 // DW_AT_accessibility -; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 9 // Abbreviation Code +; CHECK-NEXT:.b8 38 // DW_TAG_const_type +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 73 // DW_AT_type +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 9 // Abbreviation Code +; CHECK-NEXT:.b8 10 // Abbreviation Code ; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes -; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 64 -; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file @@ -250,80 +220,40 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 63 // DW_AT_external ; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 50 // DW_AT_accessibility -; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 0 // EOM(1) -; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 10 // Abbreviation Code -; CHECK-NEXT:.b8 36 // DW_TAG_base_type -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 3 // DW_AT_name -; CHECK-NEXT:.b8 8 // DW_FORM_string -; CHECK-NEXT:.b8 62 // DW_AT_encoding -; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 11 // DW_AT_byte_size -; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 11 // Abbreviation Code -; CHECK-NEXT:.b8 13 // DW_TAG_member +; CHECK-NEXT:.b8 22 // DW_TAG_typedef ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 3 // DW_AT_name -; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 73 // DW_AT_type ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 3 // DW_AT_name +; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 56 // DW_AT_data_member_location -; CHECK-NEXT:.b8 10 // DW_FORM_block1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 12 // Abbreviation Code -; CHECK-NEXT:.b8 15 // DW_TAG_pointer_type +; CHECK-NEXT:.b8 19 // DW_TAG_structure_type ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 73 // DW_AT_type -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 60 // DW_AT_declaration +; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 13 // Abbreviation Code -; CHECK-NEXT:.b8 38 // DW_TAG_const_type -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 73 // DW_AT_type -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 0 // EOM(1) -; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 14 // Abbreviation Code -; CHECK-NEXT:.b8 16 // DW_TAG_reference_type -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 73 // DW_AT_type -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 0 // EOM(1) -; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 15 // Abbreviation Code -; CHECK-NEXT:.b8 46 // DW_TAG_subprogram -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 71 // DW_AT_specification -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 32 // DW_AT_inline -; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 0 // EOM(1) -; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 16 // Abbreviation Code ; CHECK-NEXT:.b8 19 // DW_TAG_structure_type ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes -; CHECK-NEXT:.b8 3 // DW_AT_name -; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 11 // DW_AT_byte_size ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 // DW_FORM_data2 +; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 17 // Abbreviation Code +; CHECK-NEXT:.b8 14 // Abbreviation Code ; CHECK-NEXT:.b8 13 // DW_TAG_member ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 3 // DW_AT_name @@ -333,14 +263,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 // DW_FORM_data2 +; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 56 // DW_AT_data_member_location ; CHECK-NEXT:.b8 10 // DW_FORM_block1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 18 // Abbreviation Code +; CHECK-NEXT:.b8 15 // Abbreviation Code ; CHECK-NEXT:.b8 46 // DW_TAG_subprogram -; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file @@ -351,14 +281,14 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 63 // DW_AT_external ; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 135 // DW_AT_noreturn +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 19 // Abbreviation Code +; CHECK-NEXT:.b8 16 // Abbreviation Code ; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes -; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 64 -; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file @@ -373,6 +303,21 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) +; CHECK-NEXT:.b8 17 // Abbreviation Code +; CHECK-NEXT:.b8 21 // DW_TAG_subroutine_type +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 0 // EOM(1) +; CHECK-NEXT:.b8 0 // EOM(2) +; CHECK-NEXT:.b8 18 // Abbreviation Code +; CHECK-NEXT:.b8 15 // DW_TAG_pointer_type +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 0 // EOM(1) +; CHECK-NEXT:.b8 0 // EOM(2) +; CHECK-NEXT:.b8 19 // Abbreviation Code +; CHECK-NEXT:.b8 38 // DW_TAG_const_type +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 0 // EOM(1) +; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 20 // Abbreviation Code ; CHECK-NEXT:.b8 22 // DW_TAG_typedef ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no @@ -387,45 +332,65 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 21 // Abbreviation Code +; CHECK-NEXT:.b8 21 // DW_TAG_subroutine_type +; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 73 // DW_AT_type +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 0 // EOM(1) +; CHECK-NEXT:.b8 0 // EOM(2) +; CHECK-NEXT:.b8 22 // Abbreviation Code ; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes -; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 64 -; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 5 // DW_FORM_data2 +; CHECK-NEXT:.b8 60 // DW_AT_declaration +; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 63 // DW_AT_external ; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 32 // DW_AT_inline +; CHECK-NEXT:.b8 135 // DW_AT_noreturn +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 0 // EOM(1) +; CHECK-NEXT:.b8 0 // EOM(2) +; CHECK-NEXT:.b8 23 // Abbreviation Code +; CHECK-NEXT:.b8 46 // DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 3 // DW_AT_name +; CHECK-NEXT:.b8 8 // DW_FORM_string +; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 59 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 // DW_FORM_data2 +; CHECK-NEXT:.b8 60 // DW_AT_declaration +; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 63 // DW_AT_external +; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 22 // Abbreviation Code -; CHECK-NEXT:.b8 5 // DW_TAG_formal_parameter +; CHECK-NEXT:.b8 24 // Abbreviation Code +; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 5 // DW_FORM_data2 ; CHECK-NEXT:.b8 73 // DW_AT_type ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 60 // DW_AT_declaration +; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 63 // DW_AT_external +; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 23 // Abbreviation Code +; CHECK-NEXT:.b8 25 // Abbreviation Code ; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes -; CHECK-NEXT:.b8 17 // DW_AT_low_pc -; CHECK-NEXT:.b8 1 // DW_FORM_addr -; CHECK-NEXT:.b8 18 // DW_AT_high_pc -; CHECK-NEXT:.b8 1 // DW_FORM_addr -; CHECK-NEXT:.b8 64 // DW_AT_frame_base -; CHECK-NEXT:.b8 10 // DW_FORM_block1 ; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 64 ; CHECK-NEXT:.b8 8 // DW_FORM_string @@ -435,91 +400,89 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 73 // DW_AT_type +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 60 // DW_AT_declaration +; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 63 // DW_AT_external ; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 24 // Abbreviation Code -; CHECK-NEXT:.b8 52 // DW_TAG_variable -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 26 // Abbreviation Code +; CHECK-NEXT:.b8 46 // DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 64 +; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 5 // DW_FORM_data2 ; CHECK-NEXT:.b8 73 // DW_AT_type ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 60 // DW_AT_declaration +; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 25 // Abbreviation Code -; CHECK-NEXT:.b8 29 // DW_TAG_inlined_subroutine -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 49 // DW_AT_abstract_origin -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 17 // DW_AT_low_pc -; CHECK-NEXT:.b8 1 // DW_FORM_addr -; CHECK-NEXT:.b8 18 // DW_AT_high_pc -; CHECK-NEXT:.b8 1 // DW_FORM_addr -; CHECK-NEXT:.b8 88 // DW_AT_call_file +; CHECK-NEXT:.b8 27 // Abbreviation Code +; CHECK-NEXT:.b8 19 // DW_TAG_structure_type +; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 3 // DW_AT_name +; CHECK-NEXT:.b8 8 // DW_FORM_string +; CHECK-NEXT:.b8 11 // DW_AT_byte_size ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 89 // DW_AT_call_line +; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 87 // DW_AT_call_column +; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 26 // Abbreviation Code -; CHECK-NEXT:.b8 29 // DW_TAG_inlined_subroutine -; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes -; CHECK-NEXT:.b8 49 // DW_AT_abstract_origin -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 17 // DW_AT_low_pc -; CHECK-NEXT:.b8 1 // DW_FORM_addr -; CHECK-NEXT:.b8 18 // DW_AT_high_pc -; CHECK-NEXT:.b8 1 // DW_FORM_addr -; CHECK-NEXT:.b8 88 // DW_AT_call_file -; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 89 // DW_AT_call_line +; CHECK-NEXT:.b8 28 // Abbreviation Code +; CHECK-NEXT:.b8 46 // DW_TAG_subprogram +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 64 +; CHECK-NEXT:.b8 8 // DW_FORM_string +; CHECK-NEXT:.b8 3 // DW_AT_name +; CHECK-NEXT:.b8 8 // DW_FORM_string +; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 87 // DW_AT_call_column +; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 73 // DW_AT_type +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 60 // DW_AT_declaration +; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 63 // DW_AT_external +; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 27 // Abbreviation Code +; CHECK-NEXT:.b8 29 // Abbreviation Code ; CHECK-NEXT:.b8 5 // DW_TAG_formal_parameter ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 49 // DW_AT_abstract_origin +; CHECK-NEXT:.b8 73 // DW_AT_type ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 52 // DW_AT_artificial +; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 28 // Abbreviation Code -; CHECK-NEXT:.b8 57 // DW_TAG_namespace +; CHECK-NEXT:.b8 30 // Abbreviation Code +; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string -; CHECK-NEXT:.b8 0 // EOM(1) -; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 29 // Abbreviation Code -; CHECK-NEXT:.b8 8 // DW_TAG_imported_declaration -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 24 // DW_AT_import -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 0 // EOM(1) -; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 30 // Abbreviation Code -; CHECK-NEXT:.b8 8 // DW_TAG_imported_declaration -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 58 // DW_AT_decl_file +; CHECK-NEXT:.b8 60 // DW_AT_declaration +; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 63 // DW_AT_external +; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 50 // DW_AT_accessibility ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 // DW_FORM_data2 -; CHECK-NEXT:.b8 24 // DW_AT_import -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 31 // Abbreviation Code @@ -534,15 +497,20 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 73 // DW_AT_type -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 60 // DW_AT_declaration ; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 63 // DW_AT_external +; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 50 // DW_AT_accessibility +; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 32 // Abbreviation Code ; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 64 +; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file @@ -555,60 +523,60 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 63 // DW_AT_external ; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 50 // DW_AT_accessibility +; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 33 // Abbreviation Code -; CHECK-NEXT:.b8 22 // DW_TAG_typedef +; CHECK-NEXT:.b8 16 // DW_TAG_reference_type ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 73 // DW_AT_type ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 3 // DW_AT_name -; CHECK-NEXT:.b8 8 // DW_FORM_string -; CHECK-NEXT:.b8 58 // DW_AT_decl_file -; CHECK-NEXT:.b8 11 // DW_FORM_data1 -; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 34 // Abbreviation Code -; CHECK-NEXT:.b8 19 // DW_TAG_structure_type +; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 60 // DW_AT_declaration -; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 71 // DW_AT_specification +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 32 // DW_AT_inline +; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 35 // Abbreviation Code ; CHECK-NEXT:.b8 19 // DW_TAG_structure_type ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 3 // DW_AT_name +; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 11 // DW_AT_byte_size ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 5 // DW_FORM_data2 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 36 // Abbreviation Code -; CHECK-NEXT:.b8 46 // DW_TAG_subprogram +; CHECK-NEXT:.b8 13 // DW_TAG_member ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string +; CHECK-NEXT:.b8 73 // DW_AT_type +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line ; CHECK-NEXT:.b8 5 // DW_FORM_data2 -; CHECK-NEXT:.b8 60 // DW_AT_declaration -; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 63 // DW_AT_external -; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 135 // DW_AT_noreturn -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 56 // DW_AT_data_member_location +; CHECK-NEXT:.b8 10 // DW_FORM_block1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 37 // Abbreviation Code ; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 64 +; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file @@ -624,65 +592,45 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 38 // Abbreviation Code -; CHECK-NEXT:.b8 21 // DW_TAG_subroutine_type -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 0 // EOM(1) -; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 39 // Abbreviation Code -; CHECK-NEXT:.b8 15 // DW_TAG_pointer_type -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 0 // EOM(1) -; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 40 // Abbreviation Code -; CHECK-NEXT:.b8 38 // DW_TAG_const_type -; CHECK-NEXT:.b8 0 // DW_CHILDREN_no -; CHECK-NEXT:.b8 0 // EOM(1) -; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 41 // Abbreviation Code -; CHECK-NEXT:.b8 21 // DW_TAG_subroutine_type -; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes -; CHECK-NEXT:.b8 73 // DW_AT_type -; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 0 // EOM(1) -; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 42 // Abbreviation Code ; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 64 +; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 // DW_FORM_data2 -; CHECK-NEXT:.b8 60 // DW_AT_declaration -; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 63 // DW_AT_external ; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 135 // DW_AT_noreturn -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 32 // DW_AT_inline +; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 43 // Abbreviation Code -; CHECK-NEXT:.b8 46 // DW_TAG_subprogram +; CHECK-NEXT:.b8 39 // Abbreviation Code +; CHECK-NEXT:.b8 5 // DW_TAG_formal_parameter ; CHECK-NEXT:.b8 0 // DW_CHILDREN_no ; CHECK-NEXT:.b8 3 // DW_AT_name ; CHECK-NEXT:.b8 8 // DW_FORM_string ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 // DW_FORM_data2 +; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 73 // DW_AT_type ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 60 // DW_AT_declaration -; CHECK-NEXT:.b8 12 // DW_FORM_flag -; CHECK-NEXT:.b8 63 // DW_AT_external -; CHECK-NEXT:.b8 12 // DW_FORM_flag ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) -; CHECK-NEXT:.b8 44 // Abbreviation Code +; CHECK-NEXT:.b8 40 // Abbreviation Code ; CHECK-NEXT:.b8 46 // DW_TAG_subprogram ; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 17 // DW_AT_low_pc +; CHECK-NEXT:.b8 1 // DW_FORM_addr +; CHECK-NEXT:.b8 18 // DW_AT_high_pc +; CHECK-NEXT:.b8 1 // DW_FORM_addr +; CHECK-NEXT:.b8 64 // DW_AT_frame_base +; CHECK-NEXT:.b8 10 // DW_FORM_block1 ; CHECK-NEXT:.b8 135 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 64 ; CHECK-NEXT:.b8 8 // DW_FORM_string @@ -691,11 +639,63 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 58 // DW_AT_decl_file ; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 // DW_FORM_data2 +; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 63 // DW_AT_external +; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 0 // EOM(1) +; CHECK-NEXT:.b8 0 // EOM(2) +; CHECK-NEXT:.b8 41 // Abbreviation Code +; CHECK-NEXT:.b8 52 // DW_TAG_variable +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 3 // DW_AT_name +; CHECK-NEXT:.b8 8 // DW_FORM_string +; CHECK-NEXT:.b8 58 // DW_AT_decl_file +; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 59 // DW_AT_decl_line +; CHECK-NEXT:.b8 11 // DW_FORM_data1 ; CHECK-NEXT:.b8 73 // DW_AT_type ; CHECK-NEXT:.b8 19 // DW_FORM_ref4 -; CHECK-NEXT:.b8 60 // DW_AT_declaration -; CHECK-NEXT:.b8 12 // DW_FORM_flag +; CHECK-NEXT:.b8 0 // EOM(1) +; CHECK-NEXT:.b8 0 // EOM(2) +; CHECK-NEXT:.b8 42 // Abbreviation Code +; CHECK-NEXT:.b8 29 // DW_TAG_inlined_subroutine +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 49 // DW_AT_abstract_origin +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 17 // DW_AT_low_pc +; CHECK-NEXT:.b8 1 // DW_FORM_addr +; CHECK-NEXT:.b8 18 // DW_AT_high_pc +; CHECK-NEXT:.b8 1 // DW_FORM_addr +; CHECK-NEXT:.b8 88 // DW_AT_call_file +; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 89 // DW_AT_call_line +; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 87 // DW_AT_call_column +; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 0 // EOM(1) +; CHECK-NEXT:.b8 0 // EOM(2) +; CHECK-NEXT:.b8 43 // Abbreviation Code +; CHECK-NEXT:.b8 29 // DW_TAG_inlined_subroutine +; CHECK-NEXT:.b8 1 // DW_CHILDREN_yes +; CHECK-NEXT:.b8 49 // DW_AT_abstract_origin +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 +; CHECK-NEXT:.b8 17 // DW_AT_low_pc +; CHECK-NEXT:.b8 1 // DW_FORM_addr +; CHECK-NEXT:.b8 18 // DW_AT_high_pc +; CHECK-NEXT:.b8 1 // DW_FORM_addr +; CHECK-NEXT:.b8 88 // DW_AT_call_file +; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 89 // DW_AT_call_line +; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 87 // DW_AT_call_column +; CHECK-NEXT:.b8 11 // DW_FORM_data1 +; CHECK-NEXT:.b8 0 // EOM(1) +; CHECK-NEXT:.b8 0 // EOM(2) +; CHECK-NEXT:.b8 44 // Abbreviation Code +; CHECK-NEXT:.b8 5 // DW_TAG_formal_parameter +; CHECK-NEXT:.b8 0 // DW_CHILDREN_no +; CHECK-NEXT:.b8 49 // DW_AT_abstract_origin +; CHECK-NEXT:.b8 19 // DW_FORM_ref4 ; CHECK-NEXT:.b8 0 // EOM(1) ; CHECK-NEXT:.b8 0 // EOM(2) ; CHECK-NEXT:.b8 0 // EOM(3) @@ -744,7658 +744,7658 @@ if.end: ; preds = %if.then, %entry ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b64 Lfunc_begin0 // DW_AT_low_pc ; CHECK-NEXT:.b64 Lfunc_end0 // DW_AT_high_pc -; CHECK-NEXT:.b8 2 // Abbrev [2] 0x41:0x22a DW_TAG_structure_type -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 2 // Abbrev [2] 0x41:0x588 DW_TAG_namespace +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 73 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 1 // DW_AT_byte_size -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 77 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x5f:0x4f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x46:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 202 // DW_AT_decl_line +; CHECK-NEXT:.b32 1481 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x4d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 203 // DW_AT_decl_line +; CHECK-NEXT:.b32 1525 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x54:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 204 // DW_AT_decl_line +; CHECK-NEXT:.b32 1563 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x5b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 205 // DW_AT_decl_line +; CHECK-NEXT:.b32 1594 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x62:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 206 // DW_AT_decl_line +; CHECK-NEXT:.b32 1623 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x69:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 207 // DW_AT_decl_line +; CHECK-NEXT:.b32 1654 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x70:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 208 // DW_AT_decl_line +; CHECK-NEXT:.b32 1683 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x77:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 209 // DW_AT_decl_line +; CHECK-NEXT:.b32 1720 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x7e:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 210 // DW_AT_decl_line +; CHECK-NEXT:.b32 1751 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x85:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 211 // DW_AT_decl_line +; CHECK-NEXT:.b32 1780 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x8c:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 212 // DW_AT_decl_line +; CHECK-NEXT:.b32 1809 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x93:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 213 // DW_AT_decl_line +; CHECK-NEXT:.b32 1852 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x9a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 214 // DW_AT_decl_line +; CHECK-NEXT:.b32 1879 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xa1:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 215 // DW_AT_decl_line +; CHECK-NEXT:.b32 1908 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xa8:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 216 // DW_AT_decl_line +; CHECK-NEXT:.b32 1935 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xaf:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 217 // DW_AT_decl_line +; CHECK-NEXT:.b32 1964 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xb6:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 218 // DW_AT_decl_line +; CHECK-NEXT:.b32 1991 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xbd:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 219 // DW_AT_decl_line +; CHECK-NEXT:.b32 2020 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xc4:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 220 // DW_AT_decl_line +; CHECK-NEXT:.b32 2051 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xcb:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 221 // DW_AT_decl_line +; CHECK-NEXT:.b32 2080 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xd2:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 222 // DW_AT_decl_line +; CHECK-NEXT:.b32 2115 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xd9:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 223 // DW_AT_decl_line +; CHECK-NEXT:.b32 2146 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xe0:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 224 // DW_AT_decl_line +; CHECK-NEXT:.b32 2185 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xe7:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 225 // DW_AT_decl_line +; CHECK-NEXT:.b32 2220 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xee:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 226 // DW_AT_decl_line +; CHECK-NEXT:.b32 2255 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xf5:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 227 // DW_AT_decl_line +; CHECK-NEXT:.b32 2290 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0xfc:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 228 // DW_AT_decl_line +; CHECK-NEXT:.b32 2339 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x103:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 229 // DW_AT_decl_line +; CHECK-NEXT:.b32 2382 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x10a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 230 // DW_AT_decl_line +; CHECK-NEXT:.b32 2419 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x111:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 231 // DW_AT_decl_line +; CHECK-NEXT:.b32 2450 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x118:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 232 // DW_AT_decl_line +; CHECK-NEXT:.b32 2495 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x11f:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 233 // DW_AT_decl_line +; CHECK-NEXT:.b32 2540 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x126:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 234 // DW_AT_decl_line +; CHECK-NEXT:.b32 2596 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x12d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 235 // DW_AT_decl_line +; CHECK-NEXT:.b32 2627 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x134:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 236 // DW_AT_decl_line +; CHECK-NEXT:.b32 2666 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x13b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 237 // DW_AT_decl_line +; CHECK-NEXT:.b32 2716 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x142:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 238 // DW_AT_decl_line +; CHECK-NEXT:.b32 2770 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x149:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 239 // DW_AT_decl_line +; CHECK-NEXT:.b32 2801 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x150:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 240 // DW_AT_decl_line +; CHECK-NEXT:.b32 2838 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x157:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 241 // DW_AT_decl_line +; CHECK-NEXT:.b32 2888 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x15e:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 242 // DW_AT_decl_line +; CHECK-NEXT:.b32 2929 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x165:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 243 // DW_AT_decl_line +; CHECK-NEXT:.b32 2966 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x16c:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 244 // DW_AT_decl_line +; CHECK-NEXT:.b32 2999 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x173:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 245 // DW_AT_decl_line +; CHECK-NEXT:.b32 3030 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x17a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 246 // DW_AT_decl_line +; CHECK-NEXT:.b32 3063 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x181:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 247 // DW_AT_decl_line +; CHECK-NEXT:.b32 3090 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x188:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 248 // DW_AT_decl_line +; CHECK-NEXT:.b32 3121 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x18f:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 249 // DW_AT_decl_line +; CHECK-NEXT:.b32 3152 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x196:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 250 // DW_AT_decl_line +; CHECK-NEXT:.b32 3181 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x19d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 251 // DW_AT_decl_line +; CHECK-NEXT:.b32 3210 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x1a4:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 252 // DW_AT_decl_line +; CHECK-NEXT:.b32 3241 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x1ab:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 253 // DW_AT_decl_line +; CHECK-NEXT:.b32 3274 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x1b2:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 254 // DW_AT_decl_line +; CHECK-NEXT:.b32 3309 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x1b9:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 255 // DW_AT_decl_line +; CHECK-NEXT:.b32 3350 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x1c0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 0 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3407 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x1c8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 1 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3438 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x1d0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 2 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3477 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x1d8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 3 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3522 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x1e0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 4 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3555 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x1e8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 5 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3600 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x1f0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 6 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3646 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x1f8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 7 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3675 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x200:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 8 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3706 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x208:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 9 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3747 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x210:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 10 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3786 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x218:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 11 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3821 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x220:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 12 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3848 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x228:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 13 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3877 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x230:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 14 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3906 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x238:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 15 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3933 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x240:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 16 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3962 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x248:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 17 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 3995 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x250:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 102 // DW_AT_decl_line +; CHECK-NEXT:.b32 4026 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x257:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 121 // DW_AT_decl_line +; CHECK-NEXT:.b32 4046 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x25e:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 140 // DW_AT_decl_line +; CHECK-NEXT:.b32 4066 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x265:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 159 // DW_AT_decl_line +; CHECK-NEXT:.b32 4086 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x26c:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 180 // DW_AT_decl_line +; CHECK-NEXT:.b32 4112 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x273:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 199 // DW_AT_decl_line +; CHECK-NEXT:.b32 4132 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x27a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 218 // DW_AT_decl_line +; CHECK-NEXT:.b32 4151 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x281:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 237 // DW_AT_decl_line +; CHECK-NEXT:.b32 4171 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x288:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 0 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4190 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x290:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 19 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4210 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x298:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 38 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4231 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x2a0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 59 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4256 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x2a8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file ; CHECK-NEXT:.b8 78 // DW_AT_decl_line -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // Abbrev [3] 0xae:0x4f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 79 // DW_AT_decl_line -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // Abbrev [3] 0xfd:0x4f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 122 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 122 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 80 // DW_AT_decl_line -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 4 // Abbrev [4] 0x14c:0x49 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 111 // DW_AT_name -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 32 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 83 // DW_AT_decl_line -; CHECK-NEXT:.b32 635 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x18e:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 682 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 6 // Abbrev [6] 0x195:0x27 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 85 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x1b5:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 692 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1bc:0x2c DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 85 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x1dc:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 692 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 697 // DW_AT_type +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4282 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x2b0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 97 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4308 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x2b8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 116 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4327 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x2c0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 135 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4348 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x2c8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 147 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4378 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x2d0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 184 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4402 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x2d8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 203 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4421 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x2e0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 222 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4441 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x2e8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 241 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4461 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x2f0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 3 // DW_AT_decl_file +; CHECK-NEXT:.b8 4 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b32 4480 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x2f8:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 118 // DW_AT_decl_line +; CHECK-NEXT:.b32 4500 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x2ff:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 119 // DW_AT_decl_line +; CHECK-NEXT:.b32 4515 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x306:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 121 // DW_AT_decl_line +; CHECK-NEXT:.b32 4563 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x30d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 122 // DW_AT_decl_line +; CHECK-NEXT:.b32 4576 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x314:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 123 // DW_AT_decl_line +; CHECK-NEXT:.b32 4596 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x31b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 129 // DW_AT_decl_line +; CHECK-NEXT:.b32 4625 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x322:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 130 // DW_AT_decl_line +; CHECK-NEXT:.b32 4645 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x329:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 131 // DW_AT_decl_line +; CHECK-NEXT:.b32 4666 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x330:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 132 // DW_AT_decl_line +; CHECK-NEXT:.b32 4687 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x337:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 133 // DW_AT_decl_line +; CHECK-NEXT:.b32 4815 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x33e:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 134 // DW_AT_decl_line +; CHECK-NEXT:.b32 4843 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x345:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 135 // DW_AT_decl_line +; CHECK-NEXT:.b32 4868 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x34c:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 136 // DW_AT_decl_line +; CHECK-NEXT:.b32 4886 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x353:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 137 // DW_AT_decl_line +; CHECK-NEXT:.b32 4903 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x35a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 138 // DW_AT_decl_line +; CHECK-NEXT:.b32 4931 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x361:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 139 // DW_AT_decl_line +; CHECK-NEXT:.b32 4952 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x368:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 140 // DW_AT_decl_line +; CHECK-NEXT:.b32 4978 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x36f:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 142 // DW_AT_decl_line +; CHECK-NEXT:.b32 5001 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x376:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 143 // DW_AT_decl_line +; CHECK-NEXT:.b32 5028 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x37d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 144 // DW_AT_decl_line +; CHECK-NEXT:.b32 5079 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x384:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 146 // DW_AT_decl_line +; CHECK-NEXT:.b32 5112 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x38b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 152 // DW_AT_decl_line +; CHECK-NEXT:.b32 5145 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x392:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 153 // DW_AT_decl_line +; CHECK-NEXT:.b32 5160 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x399:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 154 // DW_AT_decl_line +; CHECK-NEXT:.b32 5189 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3a0:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 155 // DW_AT_decl_line +; CHECK-NEXT:.b32 5223 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3a7:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 156 // DW_AT_decl_line +; CHECK-NEXT:.b32 5255 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3ae:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 157 // DW_AT_decl_line +; CHECK-NEXT:.b32 5287 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3b5:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 158 // DW_AT_decl_line +; CHECK-NEXT:.b32 5320 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3bc:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 160 // DW_AT_decl_line +; CHECK-NEXT:.b32 5343 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3c3:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 161 // DW_AT_decl_line +; CHECK-NEXT:.b32 5388 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3ca:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 241 // DW_AT_decl_line +; CHECK-NEXT:.b32 5536 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3d1:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 243 // DW_AT_decl_line +; CHECK-NEXT:.b32 5585 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3d8:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 245 // DW_AT_decl_line +; CHECK-NEXT:.b32 5604 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3df:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 246 // DW_AT_decl_line +; CHECK-NEXT:.b32 5490 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3e6:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 247 // DW_AT_decl_line +; CHECK-NEXT:.b32 5626 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3ed:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 249 // DW_AT_decl_line +; CHECK-NEXT:.b32 5653 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3f4:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 250 // DW_AT_decl_line +; CHECK-NEXT:.b32 5768 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x3fb:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 251 // DW_AT_decl_line +; CHECK-NEXT:.b32 5675 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x402:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 252 // DW_AT_decl_line +; CHECK-NEXT:.b32 5708 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x409:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 253 // DW_AT_decl_line +; CHECK-NEXT:.b32 5795 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x410:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 149 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 5838 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x418:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 150 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 5870 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x420:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 151 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 5904 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x428:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 152 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 5936 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x430:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 153 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 5970 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x438:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 154 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6010 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x440:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 155 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6042 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x448:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 156 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6076 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x450:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 157 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6108 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x458:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 158 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6140 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x460:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 159 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6186 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x468:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 160 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6216 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x470:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 161 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6248 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x478:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 162 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6280 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x480:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 163 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6310 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x488:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 164 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6342 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x490:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 165 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6372 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x498:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 166 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6406 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4a0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 167 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6438 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4a8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 168 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6476 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4b0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 169 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6510 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4b8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 170 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6552 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4c0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 171 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6590 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4c8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 172 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6628 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4d0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 173 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6666 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4d8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 174 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6707 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4e0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 175 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6747 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4e8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 176 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6781 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4f0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 177 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6821 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x4f8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 178 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6857 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x500:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 179 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6893 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x508:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 180 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6931 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x510:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 181 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6965 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x518:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 182 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 6999 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x520:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 183 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7031 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x528:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 184 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7063 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x530:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 185 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7093 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x538:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 186 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7127 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x540:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 187 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7163 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x548:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 188 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7202 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x550:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 189 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7245 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x558:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 190 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7294 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x560:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 191 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7330 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x568:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 192 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7379 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x570:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 193 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7428 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x578:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 194 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7460 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x580:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 195 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7494 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x588:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 196 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7538 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x590:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 197 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7580 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x598:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 198 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7610 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x5a0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 199 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7642 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x5a8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 200 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7674 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x5b0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 201 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7704 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x5b8:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 202 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7736 // DW_AT_import +; CHECK-NEXT:.b8 4 // Abbrev [4] 0x5c0:0x8 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 10 // DW_AT_decl_file +; CHECK-NEXT:.b8 203 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 7772 // DW_AT_import ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 8 // Abbrev [8] 0x1e8:0x43 DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x5c9:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 83 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 82 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 83 -; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 111 // DW_AT_name -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 61 +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 85 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 44 // DW_AT_decl_line +; CHECK-NEXT:.b32 1508 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x21f:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 682 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x225:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 697 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x5de:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1508 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 9 // Abbrev [9] 0x22b:0x3f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x5e4:0x11 DW_TAG_base_type +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 32 ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 111 // DW_AT_name -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 38 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 85 // DW_AT_decl_line -; CHECK-NEXT:.b32 702 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x263:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 682 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x26b:0x10 DW_TAG_base_type -; CHECK-NEXT:.b8 117 // DW_AT_name ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 32 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_encoding -; CHECK-NEXT:.b8 4 // DW_AT_byte_size -; CHECK-NEXT:.b8 2 // Abbrev [2] 0x27b:0x2f DW_TAG_structure_type -; CHECK-NEXT:.b8 117 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_byte_size -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 190 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x285:0xc DW_TAG_member -; CHECK-NEXT:.b8 120 // DW_AT_name -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 192 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 // DW_AT_data_member_location -; CHECK-NEXT:.b8 35 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x291:0xc DW_TAG_member -; CHECK-NEXT:.b8 121 // DW_AT_name -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 192 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 // DW_AT_data_member_location -; CHECK-NEXT:.b8 35 -; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x29d:0xc DW_TAG_member -; CHECK-NEXT:.b8 122 // DW_AT_name -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 192 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 // DW_AT_data_member_location -; CHECK-NEXT:.b8 35 -; CHECK-NEXT:.b8 8 -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x2aa:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 687 // DW_AT_type -; CHECK-NEXT:.b8 13 // Abbrev [13] 0x2af:0x5 DW_TAG_const_type -; CHECK-NEXT:.b32 65 // DW_AT_type -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x2b4:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 65 // DW_AT_type -; CHECK-NEXT:.b8 14 // Abbrev [14] 0x2b9:0x5 DW_TAG_reference_type -; CHECK-NEXT:.b32 687 // DW_AT_type -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x2be:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 65 // DW_AT_type -; CHECK-NEXT:.b8 15 // Abbrev [15] 0x2c3:0x6 DW_TAG_subprogram -; CHECK-NEXT:.b32 95 // DW_AT_specification -; CHECK-NEXT:.b8 1 // DW_AT_inline -; CHECK-NEXT:.b8 2 // Abbrev [2] 0x2c9:0x228 DW_TAG_structure_type -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 68 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 1 // DW_AT_byte_size -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 88 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x2e7:0x4f DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // DW_AT_encoding +; CHECK-NEXT:.b8 8 // DW_AT_byte_size +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x5f5:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 68 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 46 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x60c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x612:0x9 DW_TAG_base_type +; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 89 // DW_AT_decl_line -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x336:0x4f DW_TAG_subprogram +; CHECK-NEXT:.b8 4 // DW_AT_encoding +; CHECK-NEXT:.b8 4 // DW_AT_byte_size +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x61b:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 68 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 48 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x634:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x63a:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 121 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 90 // DW_AT_decl_line -; CHECK-NEXT:.b32 619 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 50 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x385:0x4f DW_TAG_subprogram +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x651:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x657:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 68 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 122 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 52 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x670:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x676:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 122 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 91 // DW_AT_decl_line -; CHECK-NEXT:.b32 619 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 56 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 4 // Abbrev [4] 0x3d4:0x47 DW_TAG_subprogram +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x68d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x693:0x25 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 68 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 111 // DW_AT_name -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 54 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x6ad:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x6b2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x6b8:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 32 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 94 // DW_AT_decl_line -; CHECK-NEXT:.b32 1265 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 58 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x414:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1441 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x6d1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 6 // Abbrev [6] 0x41b:0x27 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x6d7:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 68 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 96 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 60 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x43b:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1451 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x6ee:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 6 // Abbrev [6] 0x442:0x2c DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x6f4:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 99 // DW_AT_name +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 68 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 96 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 62 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x462:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1451 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x468:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1456 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x70b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 8 // Abbrev [8] 0x46e:0x43 DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x711:0x2b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 56 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 99 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 64 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x731:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x736:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x73c:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 66 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x751:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x757:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 68 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 83 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 82 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 83 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 111 // DW_AT_name -; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 99 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 68 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x76e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x774:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 101 // DW_AT_name ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 61 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 72 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x789:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x78f:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 101 // DW_AT_name +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 70 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x7a6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x7ac:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 101 // DW_AT_name +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 76 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x7c1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x7c7:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 101 // DW_AT_name +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 74 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x7de:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x7e4:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 101 // DW_AT_name +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 49 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 96 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 78 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x4a5:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1441 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x4ab:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1456 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x7fd:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 9 // Abbrev [9] 0x4b1:0x3f DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x803:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 107 -; CHECK-NEXT:.b8 68 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 111 // DW_AT_name -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 38 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 96 // DW_AT_decl_line -; CHECK-NEXT:.b32 1461 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 80 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x4e9:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1441 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x81a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 16 // Abbrev [16] 0x4f1:0x9d DW_TAG_structure_type -; CHECK-NEXT:.b8 100 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_byte_size -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 161 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b8 17 // Abbrev [17] 0x4fb:0xd DW_TAG_member -; CHECK-NEXT:.b8 120 // DW_AT_name -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 163 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b8 2 // DW_AT_data_member_location -; CHECK-NEXT:.b8 35 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 17 // Abbrev [17] 0x508:0xd DW_TAG_member -; CHECK-NEXT:.b8 121 // DW_AT_name -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 163 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b8 2 // DW_AT_data_member_location -; CHECK-NEXT:.b8 35 -; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b8 17 // Abbrev [17] 0x515:0xd DW_TAG_member -; CHECK-NEXT:.b8 122 // DW_AT_name -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 163 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b8 2 // DW_AT_data_member_location -; CHECK-NEXT:.b8 35 -; CHECK-NEXT:.b8 8 -; CHECK-NEXT:.b8 18 // Abbrev [18] 0x522:0x21 DW_TAG_subprogram -; CHECK-NEXT:.b8 100 // DW_AT_name +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x820:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 165 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x52d:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1422 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x533:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x538:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x53d:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 619 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 18 // Abbrev [18] 0x543:0x17 DW_TAG_subprogram -; CHECK-NEXT:.b8 100 // DW_AT_name +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 166 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 82 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x54e:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1422 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x554:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1427 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x838:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x83d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 19 // Abbrev [19] 0x55a:0x33 DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x843:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 111 // DW_AT_name -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 32 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 167 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 1427 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 84 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x586:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 1422 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x85c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x58e:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 1265 // DW_AT_type -; CHECK-NEXT:.b8 20 // Abbrev [20] 0x593:0xe DW_TAG_typedef -; CHECK-NEXT:.b32 635 // DW_AT_type -; CHECK-NEXT:.b8 117 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x862:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 3 // DW_AT_decl_file -; CHECK-NEXT:.b8 127 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x5a1:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 1446 // DW_AT_type -; CHECK-NEXT:.b8 13 // Abbrev [13] 0x5a6:0x5 DW_TAG_const_type -; CHECK-NEXT:.b32 713 // DW_AT_type -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x5ab:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 713 // DW_AT_type -; CHECK-NEXT:.b8 14 // Abbrev [14] 0x5b0:0x5 DW_TAG_reference_type -; CHECK-NEXT:.b32 1446 // DW_AT_type -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x5b5:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 713 // DW_AT_type -; CHECK-NEXT:.b8 15 // Abbrev [15] 0x5ba:0x6 DW_TAG_subprogram -; CHECK-NEXT:.b32 743 // DW_AT_specification -; CHECK-NEXT:.b8 1 // DW_AT_inline -; CHECK-NEXT:.b8 2 // Abbrev [2] 0x5c0:0x233 DW_TAG_structure_type -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 1 // DW_AT_byte_size -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 66 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x5df:0x50 DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 86 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x879:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x87e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x883:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x889:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 88 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x8a1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x8a6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x8ac:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 67 // DW_AT_decl_line -; CHECK-NEXT:.b32 619 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 90 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x62f:0x50 DW_TAG_subprogram +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x8c4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x8c9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x8cf:0x23 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 92 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x8e7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x8ec:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x8f2:0x2a DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 48 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 112 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 112 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 121 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 68 // DW_AT_decl_line -; CHECK-NEXT:.b32 619 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 94 // DW_AT_decl_line +; CHECK-NEXT:.b32 2332 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // Abbrev [3] 0x67f:0x50 DW_TAG_subprogram +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x916:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x91c:0x7 DW_TAG_base_type +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 5 // DW_AT_encoding +; CHECK-NEXT:.b8 4 // DW_AT_byte_size +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x923:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 80 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 96 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x93e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x943:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2377 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x949:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x94e:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 104 // DW_AT_name +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 98 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x968:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x96d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x973:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 122 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 100 // DW_AT_decl_line +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x98c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x992:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 56 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 122 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 69 // DW_AT_decl_line -; CHECK-NEXT:.b32 619 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 102 // DW_AT_decl_line +; CHECK-NEXT:.b32 2487 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 4 // Abbrev [4] 0x6cf:0x4a DW_TAG_subprogram +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x9b1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x9b7:0x8 DW_TAG_base_type +; CHECK-NEXT:.b8 98 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 2 // DW_AT_encoding +; CHECK-NEXT:.b8 1 // DW_AT_byte_size +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x9bf:0x2d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 57 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 111 // DW_AT_name -; CHECK-NEXT:.b8 112 ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 32 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 72 // DW_AT_decl_line -; CHECK-NEXT:.b32 635 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 106 // DW_AT_decl_line +; CHECK-NEXT:.b32 2487 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x712:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2035 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x9e1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x9e6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 6 // Abbrev [6] 0x719:0x28 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 5 // Abbrev [5] 0x9ec:0x38 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 113 ; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 74 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 105 // DW_AT_decl_line +; CHECK-NEXT:.b32 2487 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x73a:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2045 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xa19:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xa1e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 6 // Abbrev [6] 0x741:0x2d DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xa24:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 74 // DW_AT_decl_line +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 108 // DW_AT_decl_line +; CHECK-NEXT:.b32 2487 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x762:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2045 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x768:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2050 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xa3d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 8 // Abbrev [8] 0x76e:0x44 DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xa43:0x27 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 112 // DW_AT_decl_line +; CHECK-NEXT:.b32 2487 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xa5f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xa64:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xa6a:0x32 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 49 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 83 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 82 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 83 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 111 // DW_AT_name -; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 61 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 74 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 111 // DW_AT_decl_line +; CHECK-NEXT:.b32 2487 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x7a6:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2035 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x7ac:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2050 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xa91:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xa96:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 9 // Abbrev [9] 0x7b2:0x40 DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xa9c:0x36 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 73 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 111 // DW_AT_name -; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 38 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_decl_file -; CHECK-NEXT:.b8 74 // DW_AT_decl_line -; CHECK-NEXT:.b32 2055 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 114 // DW_AT_decl_line +; CHECK-NEXT:.b32 2487 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 3 // DW_AT_accessibility -; CHECK-NEXT: // DW_ACCESS_private -; CHECK-NEXT:.b8 5 // Abbrev [5] 0x7eb:0x6 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2035 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xac7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xacc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x7f3:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 2040 // DW_AT_type -; CHECK-NEXT:.b8 13 // Abbrev [13] 0x7f8:0x5 DW_TAG_const_type -; CHECK-NEXT:.b32 1472 // DW_AT_type -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x7fd:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 1472 // DW_AT_type -; CHECK-NEXT:.b8 14 // Abbrev [14] 0x802:0x5 DW_TAG_reference_type -; CHECK-NEXT:.b32 2040 // DW_AT_type -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x807:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 1472 // DW_AT_type -; CHECK-NEXT:.b8 15 // Abbrev [15] 0x80c:0x6 DW_TAG_subprogram -; CHECK-NEXT:.b32 1503 // DW_AT_specification -; CHECK-NEXT:.b8 1 // DW_AT_inline -; CHECK-NEXT:.b8 21 // Abbrev [21] 0x812:0x32 DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xad2:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 80 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 114 // DW_AT_name -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 3 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 1 // DW_AT_inline -; CHECK-NEXT:.b8 22 // Abbrev [22] 0x826:0x9 DW_TAG_formal_parameter -; CHECK-NEXT:.b8 120 // DW_AT_name -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 3 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 22 // Abbrev [22] 0x82f:0x9 DW_TAG_formal_parameter -; CHECK-NEXT:.b8 121 // DW_AT_name -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 3 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 22 // Abbrev [22] 0x838:0xb DW_TAG_formal_parameter -; CHECK-NEXT:.b8 114 // DW_AT_name -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 105 // DW_AT_name ; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 3 // DW_AT_decl_line -; CHECK-NEXT:.b32 2125 // DW_AT_type +; CHECK-NEXT:.b8 116 // DW_AT_decl_line +; CHECK-NEXT:.b32 2487 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xaeb:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x844:0x9 DW_TAG_base_type -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_encoding -; CHECK-NEXT:.b8 4 // DW_AT_byte_size -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x84d:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 23 // Abbrev [23] 0x852:0xbf DW_TAG_subprogram -; CHECK-NEXT:.b64 Lfunc_begin0 // DW_AT_low_pc -; CHECK-NEXT:.b64 Lfunc_end0 // DW_AT_high_pc -; CHECK-NEXT:.b8 1 // DW_AT_frame_base -; CHECK-NEXT:.b8 156 +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xaf1:0x25 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 56 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 80 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 83 -; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 5 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 22 // Abbrev [22] 0x87d:0x9 DW_TAG_formal_parameter -; CHECK-NEXT:.b8 110 // DW_AT_name +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 5 // DW_AT_decl_line -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 22 // Abbrev [22] 0x886:0x9 DW_TAG_formal_parameter -; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 118 // DW_AT_decl_line +; CHECK-NEXT:.b32 2487 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xb10:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xb16:0x32 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 5 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 22 // Abbrev [22] 0x88f:0x9 DW_TAG_formal_parameter -; CHECK-NEXT:.b8 120 // DW_AT_name +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 5 // DW_AT_decl_line -; CHECK-NEXT:.b32 2125 // DW_AT_type -; CHECK-NEXT:.b8 22 // Abbrev [22] 0x898:0x9 DW_TAG_formal_parameter -; CHECK-NEXT:.b8 121 // DW_AT_name +; CHECK-NEXT:.b8 120 // DW_AT_decl_line +; CHECK-NEXT:.b32 2487 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xb3d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xb42:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xb48:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 5 // DW_AT_decl_line -; CHECK-NEXT:.b32 2125 // DW_AT_type -; CHECK-NEXT:.b8 24 // Abbrev [24] 0x8a1:0x9 DW_TAG_variable -; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 1 // DW_AT_decl_file -; CHECK-NEXT:.b8 6 // DW_AT_decl_line -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 25 // Abbrev [25] 0x8aa:0x18 DW_TAG_inlined_subroutine -; CHECK-NEXT:.b32 707 // DW_AT_abstract_origin -; CHECK-NEXT:.b64 Ltmp0 // DW_AT_low_pc -; CHECK-NEXT:.b64 Ltmp1 // DW_AT_high_pc -; CHECK-NEXT:.b8 1 // DW_AT_call_file -; CHECK-NEXT:.b8 6 // DW_AT_call_line -; CHECK-NEXT:.b8 11 // DW_AT_call_column -; CHECK-NEXT:.b8 25 // Abbrev [25] 0x8c2:0x18 DW_TAG_inlined_subroutine -; CHECK-NEXT:.b32 1466 // DW_AT_abstract_origin -; CHECK-NEXT:.b64 Ltmp1 // DW_AT_low_pc -; CHECK-NEXT:.b64 Ltmp2 // DW_AT_high_pc -; CHECK-NEXT:.b8 1 // DW_AT_call_file -; CHECK-NEXT:.b8 6 // DW_AT_call_line -; CHECK-NEXT:.b8 24 // DW_AT_call_column -; CHECK-NEXT:.b8 25 // Abbrev [25] 0x8da:0x18 DW_TAG_inlined_subroutine -; CHECK-NEXT:.b32 2060 // DW_AT_abstract_origin -; CHECK-NEXT:.b64 Ltmp2 // DW_AT_low_pc -; CHECK-NEXT:.b64 Ltmp3 // DW_AT_high_pc -; CHECK-NEXT:.b8 1 // DW_AT_call_file -; CHECK-NEXT:.b8 6 // DW_AT_call_line -; CHECK-NEXT:.b8 37 // DW_AT_call_column -; CHECK-NEXT:.b8 26 // Abbrev [26] 0x8f2:0x1e DW_TAG_inlined_subroutine -; CHECK-NEXT:.b32 2066 // DW_AT_abstract_origin -; CHECK-NEXT:.b64 Ltmp9 // DW_AT_low_pc -; CHECK-NEXT:.b64 Ltmp10 // DW_AT_high_pc -; CHECK-NEXT:.b8 1 // DW_AT_call_file -; CHECK-NEXT:.b8 8 // DW_AT_call_line -; CHECK-NEXT:.b8 5 // DW_AT_call_column -; CHECK-NEXT:.b8 27 // Abbrev [27] 0x90a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2095 // DW_AT_abstract_origin -; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 121 // DW_AT_decl_line +; CHECK-NEXT:.b32 2917 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xb5f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2917 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 28 // Abbrev [28] 0x911:0x588 DW_TAG_namespace -; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xb65:0xc DW_TAG_base_type +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 5 // DW_AT_encoding +; CHECK-NEXT:.b8 8 // DW_AT_byte_size +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xb71:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x916:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 202 // DW_AT_decl_line -; CHECK-NEXT:.b32 3737 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x91d:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 203 // DW_AT_decl_line -; CHECK-NEXT:.b32 3781 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x924:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 204 // DW_AT_decl_line -; CHECK-NEXT:.b32 3810 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x92b:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 205 // DW_AT_decl_line -; CHECK-NEXT:.b32 3841 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x932:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 206 // DW_AT_decl_line -; CHECK-NEXT:.b32 3870 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x939:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 207 // DW_AT_decl_line -; CHECK-NEXT:.b32 3901 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x940:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 208 // DW_AT_decl_line -; CHECK-NEXT:.b32 3930 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x947:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 209 // DW_AT_decl_line -; CHECK-NEXT:.b32 3967 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x94e:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 210 // DW_AT_decl_line -; CHECK-NEXT:.b32 3998 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x955:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 211 // DW_AT_decl_line -; CHECK-NEXT:.b32 4027 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x95c:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 212 // DW_AT_decl_line -; CHECK-NEXT:.b32 4056 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x963:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 213 // DW_AT_decl_line -; CHECK-NEXT:.b32 4099 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x96a:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 214 // DW_AT_decl_line -; CHECK-NEXT:.b32 4126 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x971:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 215 // DW_AT_decl_line -; CHECK-NEXT:.b32 4155 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x978:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 216 // DW_AT_decl_line -; CHECK-NEXT:.b32 4182 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x97f:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 217 // DW_AT_decl_line -; CHECK-NEXT:.b32 4211 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x986:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 218 // DW_AT_decl_line -; CHECK-NEXT:.b32 4238 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x98d:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 219 // DW_AT_decl_line -; CHECK-NEXT:.b32 4267 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x994:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 220 // DW_AT_decl_line -; CHECK-NEXT:.b32 4298 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x99b:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 221 // DW_AT_decl_line -; CHECK-NEXT:.b32 4327 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9a2:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 222 // DW_AT_decl_line -; CHECK-NEXT:.b32 4362 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9a9:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 223 // DW_AT_decl_line -; CHECK-NEXT:.b32 4393 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9b0:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 224 // DW_AT_decl_line -; CHECK-NEXT:.b32 4432 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9b7:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 225 // DW_AT_decl_line -; CHECK-NEXT:.b32 4467 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9be:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 226 // DW_AT_decl_line -; CHECK-NEXT:.b32 4502 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9c5:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 227 // DW_AT_decl_line -; CHECK-NEXT:.b32 4537 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9cc:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 228 // DW_AT_decl_line -; CHECK-NEXT:.b32 4586 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9d3:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 229 // DW_AT_decl_line -; CHECK-NEXT:.b32 4629 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9da:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 230 // DW_AT_decl_line -; CHECK-NEXT:.b32 4666 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9e1:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 231 // DW_AT_decl_line -; CHECK-NEXT:.b32 4697 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9e8:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 232 // DW_AT_decl_line -; CHECK-NEXT:.b32 4742 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9ef:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 233 // DW_AT_decl_line -; CHECK-NEXT:.b32 4787 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9f6:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 234 // DW_AT_decl_line -; CHECK-NEXT:.b32 4843 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x9fd:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 235 // DW_AT_decl_line -; CHECK-NEXT:.b32 4874 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa04:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 236 // DW_AT_decl_line -; CHECK-NEXT:.b32 4913 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa0b:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 237 // DW_AT_decl_line -; CHECK-NEXT:.b32 4963 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa12:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 238 // DW_AT_decl_line -; CHECK-NEXT:.b32 5017 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa19:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 239 // DW_AT_decl_line -; CHECK-NEXT:.b32 5048 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa20:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 240 // DW_AT_decl_line -; CHECK-NEXT:.b32 5085 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa27:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 241 // DW_AT_decl_line -; CHECK-NEXT:.b32 5135 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa2e:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 242 // DW_AT_decl_line -; CHECK-NEXT:.b32 5176 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa35:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 243 // DW_AT_decl_line -; CHECK-NEXT:.b32 5213 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa3c:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 244 // DW_AT_decl_line -; CHECK-NEXT:.b32 5246 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa43:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 245 // DW_AT_decl_line -; CHECK-NEXT:.b32 5277 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa4a:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 246 // DW_AT_decl_line -; CHECK-NEXT:.b32 5310 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa51:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 247 // DW_AT_decl_line -; CHECK-NEXT:.b32 5337 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa58:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 248 // DW_AT_decl_line -; CHECK-NEXT:.b32 5368 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa5f:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 249 // DW_AT_decl_line -; CHECK-NEXT:.b32 5399 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa66:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 250 // DW_AT_decl_line -; CHECK-NEXT:.b32 5428 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa6d:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 251 // DW_AT_decl_line -; CHECK-NEXT:.b32 5457 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa74:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 252 // DW_AT_decl_line -; CHECK-NEXT:.b32 5488 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa7b:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 253 // DW_AT_decl_line -; CHECK-NEXT:.b32 5521 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa82:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 254 // DW_AT_decl_line -; CHECK-NEXT:.b32 5556 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xa89:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 255 // DW_AT_decl_line -; CHECK-NEXT:.b32 5592 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xa90:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 0 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5649 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xa98:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 1 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5680 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaa0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 2 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5719 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaa8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 3 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5764 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xab0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 4 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5797 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xab8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 5 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5842 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xac0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 6 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5888 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xac8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 7 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5917 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xad0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 8 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5948 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xad8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 9 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5989 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xae0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 10 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6028 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xae8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 11 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6063 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaf0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 12 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6090 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xaf8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 13 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6119 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb00:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 14 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6148 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb08:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 15 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6175 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb10:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 16 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6204 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb18:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 17 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6237 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb20:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 102 // DW_AT_decl_line -; CHECK-NEXT:.b32 6268 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb27:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 121 // DW_AT_decl_line -; CHECK-NEXT:.b32 6288 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb2e:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 140 // DW_AT_decl_line -; CHECK-NEXT:.b32 6308 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb35:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 159 // DW_AT_decl_line -; CHECK-NEXT:.b32 6328 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb3c:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 180 // DW_AT_decl_line -; CHECK-NEXT:.b32 6354 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb43:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 199 // DW_AT_decl_line -; CHECK-NEXT:.b32 6374 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb4a:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 218 // DW_AT_decl_line -; CHECK-NEXT:.b32 6393 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xb51:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 237 // DW_AT_decl_line -; CHECK-NEXT:.b32 6413 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb58:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 0 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6432 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb60:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 19 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6452 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb68:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 38 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6473 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb70:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 59 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6498 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb78:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 78 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6524 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb80:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 97 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6550 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb88:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 116 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6569 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb90:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 135 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6590 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xb98:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 147 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6620 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xba0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 184 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6644 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xba8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 203 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6663 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xbb0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 222 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6683 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xbb8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 241 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6703 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xbc0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 6 // DW_AT_decl_file -; CHECK-NEXT:.b8 4 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 6722 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbc8:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 118 // DW_AT_decl_line -; CHECK-NEXT:.b32 6742 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbcf:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 119 // DW_AT_decl_line -; CHECK-NEXT:.b32 6757 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbd6:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 121 // DW_AT_decl_line -; CHECK-NEXT:.b32 6805 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbdd:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 122 // DW_AT_decl_line -; CHECK-NEXT:.b32 6818 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbe4:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 123 // DW_AT_decl_line -; CHECK-NEXT:.b32 6838 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbeb:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 129 // DW_AT_decl_line -; CHECK-NEXT:.b32 6867 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbf2:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xb8b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xb90:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xb96:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 125 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xbb1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xbb7:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 126 // DW_AT_decl_line +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xbd0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xbd6:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 128 // DW_AT_decl_line +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xbf1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xbf7:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 138 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xc0c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xc12:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 48 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 48 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 130 // DW_AT_decl_line -; CHECK-NEXT:.b32 6887 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xbf9:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 131 // DW_AT_decl_line -; CHECK-NEXT:.b32 6908 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc00:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xc2b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xc31:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 132 // DW_AT_decl_line -; CHECK-NEXT:.b32 6929 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc07:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 133 // DW_AT_decl_line -; CHECK-NEXT:.b32 7057 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc0e:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xc4a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xc50:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 134 // DW_AT_decl_line -; CHECK-NEXT:.b32 7085 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc15:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 135 // DW_AT_decl_line -; CHECK-NEXT:.b32 7110 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc1c:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xc67:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xc6d:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 136 // DW_AT_decl_line -; CHECK-NEXT:.b32 7128 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc23:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 137 // DW_AT_decl_line -; CHECK-NEXT:.b32 7145 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc2a:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 138 // DW_AT_decl_line -; CHECK-NEXT:.b32 7173 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc31:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 139 // DW_AT_decl_line -; CHECK-NEXT:.b32 7194 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc38:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xc84:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xc8a:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 140 // DW_AT_decl_line -; CHECK-NEXT:.b32 7220 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc3f:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file +; CHECK-NEXT:.b32 2917 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xca3:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xca9:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 142 // DW_AT_decl_line -; CHECK-NEXT:.b32 7243 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc46:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file +; CHECK-NEXT:.b32 2917 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xcc4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xcca:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 143 // DW_AT_decl_line -; CHECK-NEXT:.b32 7270 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc4d:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 144 // DW_AT_decl_line -; CHECK-NEXT:.b32 7321 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc54:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xce7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xced:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 80 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 109 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 145 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xd06:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xd0b:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3345 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 8 // Abbrev [8] 0xd11:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xd16:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 80 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 110 // DW_AT_name +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file ; CHECK-NEXT:.b8 146 // DW_AT_decl_line -; CHECK-NEXT:.b32 7354 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc5b:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 152 // DW_AT_decl_line -; CHECK-NEXT:.b32 7387 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc62:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 153 // DW_AT_decl_line -; CHECK-NEXT:.b32 7402 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc69:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 154 // DW_AT_decl_line -; CHECK-NEXT:.b32 7431 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc70:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 155 // DW_AT_decl_line -; CHECK-NEXT:.b32 7449 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc77:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 156 // DW_AT_decl_line -; CHECK-NEXT:.b32 7481 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc7e:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 157 // DW_AT_decl_line -; CHECK-NEXT:.b32 7513 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc85:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 158 // DW_AT_decl_line -; CHECK-NEXT:.b32 7546 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc8c:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 160 // DW_AT_decl_line -; CHECK-NEXT:.b32 7569 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc93:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 161 // DW_AT_decl_line -; CHECK-NEXT:.b32 7614 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xc9a:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 241 // DW_AT_decl_line -; CHECK-NEXT:.b32 7762 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xca1:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 243 // DW_AT_decl_line -; CHECK-NEXT:.b32 7811 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xca8:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 245 // DW_AT_decl_line -; CHECK-NEXT:.b32 7830 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcaf:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 246 // DW_AT_decl_line -; CHECK-NEXT:.b32 7716 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcb6:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 247 // DW_AT_decl_line -; CHECK-NEXT:.b32 7852 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcbd:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 249 // DW_AT_decl_line -; CHECK-NEXT:.b32 7879 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcc4:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 250 // DW_AT_decl_line -; CHECK-NEXT:.b32 7994 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xccb:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 251 // DW_AT_decl_line -; CHECK-NEXT:.b32 7901 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcd2:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 252 // DW_AT_decl_line -; CHECK-NEXT:.b32 7934 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0xcd9:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 253 // DW_AT_decl_line -; CHECK-NEXT:.b32 8021 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xce0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 149 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8064 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xce8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 150 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8096 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xcf0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 151 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8130 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xcf8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 152 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8162 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd00:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 153 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8196 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd08:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 154 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8236 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd10:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 155 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8268 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd18:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 156 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8302 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd20:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 157 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8334 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd28:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 158 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8366 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd30:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 159 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8412 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd38:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 160 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8442 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd40:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 161 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8474 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd48:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 162 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8506 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd50:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 163 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8536 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd58:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 164 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8568 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd60:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 165 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8598 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd68:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 166 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8632 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd70:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 167 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8664 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd78:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 168 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8702 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd80:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 169 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8736 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd88:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 170 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8778 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd90:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 171 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8816 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xd98:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 172 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8854 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xda0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 173 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8892 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xda8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 174 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8933 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdb0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 175 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 8973 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdb8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 176 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9007 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdc0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 177 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9047 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdc8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 178 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9083 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdd0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 179 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9119 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdd8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 180 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9157 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xde0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 181 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9191 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xde8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 182 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9225 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdf0:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 183 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9257 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xdf8:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 184 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9289 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe00:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 185 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9319 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe08:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 186 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9353 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe10:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 187 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9389 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe18:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 188 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9428 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe20:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 189 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9471 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe28:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 190 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9520 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe30:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 191 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9556 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe38:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 192 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9605 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe40:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 193 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9654 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe48:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 194 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9686 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe50:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 195 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9720 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe58:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 196 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9764 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe60:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 197 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9806 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe68:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 198 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9836 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe70:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 199 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9868 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe78:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 200 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9900 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe80:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 201 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9930 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe88:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 202 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9962 // DW_AT_import -; CHECK-NEXT:.b8 30 // Abbrev [30] 0xe90:0x8 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 13 // DW_AT_decl_file -; CHECK-NEXT:.b8 203 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 9998 // DW_AT_import +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xd2d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xd33:0xa DW_TAG_base_type +; CHECK-NEXT:.b8 100 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_encoding +; CHECK-NEXT:.b8 8 // DW_AT_byte_size +; CHECK-NEXT:.b8 8 // Abbrev [8] 0xd3d:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 3394 // DW_AT_type +; CHECK-NEXT:.b8 9 // Abbrev [9] 0xd42:0x5 DW_TAG_const_type +; CHECK-NEXT:.b32 3399 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0xd47:0x8 DW_TAG_base_type +; CHECK-NEXT:.b8 99 // DW_AT_name +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 8 // DW_AT_encoding +; CHECK-NEXT:.b8 1 // DW_AT_byte_size +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xd4f:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 80 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 110 // DW_AT_name +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 147 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xd68:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xe99:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xd6e:0x27 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 57 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 110 // DW_AT_name +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 44 // DW_AT_decl_line -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 149 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xeae:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xd8f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0xeb4:0x11 DW_TAG_base_type -; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xd95:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 57 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 110 // DW_AT_name +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 151 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xdb7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xdbc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xdc2:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 119 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 112 // DW_AT_name ; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 119 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 155 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xdd8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xddd:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xde3:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 57 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 32 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 114 // DW_AT_name +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 157 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xe05:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xe0a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xe10:0x2e DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 80 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 114 // DW_AT_name +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 159 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xe2e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xe33:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xe38:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2377 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xe3e:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_encoding -; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xec5:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 161 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xe55:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xe5b:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 46 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 163 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xedc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xe74:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xee2:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xe7a:0x29 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 165 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xe98:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xe9d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2917 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xea3:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 167 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xebf:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xec4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xeca:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 55 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 48 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 169 // DW_AT_decl_line +; CHECK-NEXT:.b32 2487 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xefb:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xee7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf01:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xeed:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 50 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 171 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf18:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xf02:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf1e:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xf08:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 52 ; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 52 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 173 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf37:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xf1f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf3d:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xf25:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 56 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 175 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf54:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xf3c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf5a:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xf42:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 116 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 54 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 177 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf74:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf79:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xf57:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf7f:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xf5d:0x1d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 52 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 116 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 58 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 179 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xf98:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xf74:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xf9e:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xf7a:0x21 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 54 ; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 116 // DW_AT_name +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 60 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 181 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xfb5:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xf95:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xfbb:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 5 // Abbrev [5] 0xf9b:0x1f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 // DW_AT_name +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 62 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_decl_file +; CHECK-NEXT:.b8 183 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xfd2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xfb4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0xfd8:0x2b DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 56 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0xfba:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 121 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 54 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xfc8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 10 // Abbrev [10] 0xfce:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 64 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 56 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xff8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0xffd:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xfdc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1003:0x1b DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0xfe2:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 58 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0xff0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 10 // Abbrev [10] 0xff6:0x1a DW_TAG_subprogram +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 60 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1005:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x100a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1010:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 66 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 178 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1018:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x101e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x101e:0x1d DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1024:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 63 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1031:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1037:0x14 DW_TAG_subprogram ; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 68 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 72 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1035:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1045:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x103b:0x1b DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x104b:0x13 DW_TAG_subprogram ; CHECK-NEXT:.b8 101 // DW_AT_name -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 72 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 100 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1050:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1058:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1056:0x1d DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x105e:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 101 // DW_AT_name +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 181 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x106c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1072:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 70 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 184 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x106d:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1081:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1073:0x1b DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 101 // DW_AT_name -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1087:0x19 DW_TAG_subprogram +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 76 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 187 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1088:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1095:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x109a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x108e:0x1d DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x10a0:0x1a DW_TAG_subprogram +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 101 // DW_AT_name -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 74 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 103 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10a5:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x10af:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x10b4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2377 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x10ab:0x1f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x10ba:0x1a DW_TAG_subprogram +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 101 // DW_AT_name -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 106 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x10c9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x10ce:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x10d4:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 109 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x10e1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x10e7:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 48 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 78 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 112 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10c4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x10f6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x10ca:0x1d DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x10fc:0x19 DW_TAG_subprogram +; CHECK-NEXT:.b8 109 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 115 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x110a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x110f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4373 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x1115:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x111a:0x18 DW_TAG_subprogram +; CHECK-NEXT:.b8 112 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 119 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 80 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 153 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10e1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1127:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x112c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x10e7:0x23 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1132:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 65 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x113f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1145:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 82 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 74 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x10ff:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1104:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1153:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x110a:0x1f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1159:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 113 ; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 84 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 156 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1123:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1167:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1129:0x27 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x116d:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 116 // DW_AT_name ; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 86 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 67 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1140:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1145:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x114a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x117a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1150:0x23 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1180:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 116 // DW_AT_name ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 88 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 2 // DW_AT_decl_file +; CHECK-NEXT:.b8 76 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1168:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x116d:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x118e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3379 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1173:0x23 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1194:0xd DW_TAG_typedef +; CHECK-NEXT:.b32 4513 // DW_AT_type +; CHECK-NEXT:.b8 100 // DW_AT_name ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 101 // DW_AT_decl_line +; CHECK-NEXT:.b8 12 // Abbrev [12] 0x11a1:0x2 DW_TAG_structure_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 11 // Abbrev [11] 0x11a3:0xe DW_TAG_typedef +; CHECK-NEXT:.b32 4529 // DW_AT_type +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 90 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x118b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1190:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1196:0x23 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 109 // DW_AT_decl_line +; CHECK-NEXT:.b8 13 // Abbrev [13] 0x11b1:0x22 DW_TAG_structure_type +; CHECK-NEXT:.b8 16 // DW_AT_byte_size +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 105 // DW_AT_decl_line +; CHECK-NEXT:.b8 14 // Abbrev [14] 0x11b5:0xf DW_TAG_member +; CHECK-NEXT:.b8 113 // DW_AT_name +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b32 2917 // DW_AT_type +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 107 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 // DW_AT_data_member_location +; CHECK-NEXT:.b8 35 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 14 // Abbrev [14] 0x11c4:0xe DW_TAG_member +; CHECK-NEXT:.b8 114 // DW_AT_name +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b32 2917 // DW_AT_type ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 92 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11ae:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11b3:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x11b9:0x2a DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 48 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 108 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 // DW_AT_data_member_location +; CHECK-NEXT:.b8 35 +; CHECK-NEXT:.b8 8 +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 15 // Abbrev [15] 0x11d3:0xd DW_TAG_subprogram +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 3 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 1 // DW_AT_noreturn +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x11e0:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 121 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 94 // DW_AT_decl_line -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 7 // DW_AT_decl_line +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b32 2332 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x11dd:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x11ee:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x11e3:0x7 DW_TAG_base_type -; CHECK-NEXT:.b8 105 // DW_AT_name -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x11f4:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_encoding -; CHECK-NEXT:.b8 4 // DW_AT_byte_size -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x11ea:0x26 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 80 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 96 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 7 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b32 2332 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1205:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x120a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4624 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1205:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4619 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1210:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1215:0x25 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x120b:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 4624 // DW_AT_type +; CHECK-NEXT:.b8 17 // Abbrev [17] 0x1210:0x1 DW_TAG_subroutine_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1211:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 104 // DW_AT_name -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 6 // DW_AT_decl_file +; CHECK-NEXT:.b8 26 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x121f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x1225:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 98 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 22 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 2332 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x122f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1234:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1234:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x123a:0x1f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x123a:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 100 // DW_AT_decl_line -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 27 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 2917 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1253:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1249:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1259:0x25 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 56 -; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x124f:0x2b DW_TAG_subprogram +; CHECK-NEXT:.b8 98 // DW_AT_name ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 7 // DW_AT_decl_file +; CHECK-NEXT:.b8 20 // DW_AT_decl_line +; CHECK-NEXT:.b32 4730 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1260:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4731 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1265:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4731 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x126a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x126f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1274:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4772 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 18 // Abbrev [18] 0x127a:0x1 DW_TAG_pointer_type +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x127b:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 4736 // DW_AT_type +; CHECK-NEXT:.b8 19 // Abbrev [19] 0x1280:0x1 DW_TAG_const_type +; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1281:0xe DW_TAG_typedef +; CHECK-NEXT:.b32 4751 // DW_AT_type +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 122 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 8 // DW_AT_decl_file +; CHECK-NEXT:.b8 62 // DW_AT_decl_line +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x128f:0x15 DW_TAG_base_type +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 32 ; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 102 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1278:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x127e:0x8 DW_TAG_base_type -; CHECK-NEXT:.b8 98 // DW_AT_name -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 7 // DW_AT_encoding +; CHECK-NEXT:.b8 8 // DW_AT_byte_size +; CHECK-NEXT:.b8 20 // Abbrev [20] 0x12a4:0x16 DW_TAG_typedef +; CHECK-NEXT:.b32 4794 // DW_AT_type +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 2 // DW_AT_encoding -; CHECK-NEXT:.b8 1 // DW_AT_byte_size -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1286:0x2d DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 57 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 112 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 230 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x12ba:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 4799 // DW_AT_type +; CHECK-NEXT:.b8 21 // Abbrev [21] 0x12bf:0x10 DW_TAG_subroutine_type +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x12c4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4731 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x12c9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4731 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x12cf:0x1c DW_TAG_subprogram +; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 106 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b8 212 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4730 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12a8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12ad:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x12e0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x12e5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x12b3:0x38 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x12eb:0x19 DW_TAG_subprogram +; CHECK-NEXT:.b8 100 // DW_AT_name +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 21 // DW_AT_decl_line +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b32 4500 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x12f9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x12fe:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 22 // Abbrev [22] 0x1304:0x12 DW_TAG_subprogram +; CHECK-NEXT:.b8 101 // DW_AT_name +; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 31 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 1 // DW_AT_noreturn +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1310:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 23 // Abbrev [23] 0x1316:0x11 DW_TAG_subprogram +; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 113 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 227 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1321:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4730 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x1327:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 103 // DW_AT_name ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 113 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 105 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b8 52 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b32 4926 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12e0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x12e5:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1338:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x12eb:0x1f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x133e:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 3399 // DW_AT_type +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x1343:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 108 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b8 8 // DW_AT_decl_line +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b32 2917 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1304:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1352:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2917 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x130a:0x27 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x1358:0x1a DW_TAG_subprogram +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 112 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b8 23 // DW_AT_decl_line +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b32 4515 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1326:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x132b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1367:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2917 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x136c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2917 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1331:0x32 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 113 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x1372:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 109 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 113 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 111 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b8 210 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4730 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1358:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x135d:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1383:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1363:0x36 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x1389:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 109 // DW_AT_name +; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 95 // DW_AT_decl_line +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1399:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x139e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x13a4:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 109 // DW_AT_name +; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 119 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 106 // DW_AT_decl_line +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b32 4737 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x13b7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5063 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x13bc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x13c1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x13c7:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 5068 // DW_AT_type +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x13cc:0xb DW_TAG_base_type +; CHECK-NEXT:.b8 119 // DW_AT_name +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 5 // DW_AT_encoding +; CHECK-NEXT:.b8 4 // DW_AT_byte_size +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x13d7:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 109 // DW_AT_name +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 119 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 98 // DW_AT_decl_line +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x13e8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5063 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x13ed:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x13f2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 23 // Abbrev [23] 0x13f8:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 113 // DW_AT_name ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 114 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b8 253 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x138e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1393:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1399:0x1f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1404:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4730 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1409:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x140e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1413:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4772 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 24 // Abbrev [24] 0x1419:0xf DW_TAG_subprogram +; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 118 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x1428:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 114 // DW_AT_name +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 116 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b8 224 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 4730 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x13b2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x143a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4730 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x143f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x13b8:0x25 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 56 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 23 // Abbrev [23] 0x1445:0x12 DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name -; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 118 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b8 120 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x13d7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1451:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5207 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x13dd:0x32 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1457:0x10 DW_TAG_base_type +; CHECK-NEXT:.b8 117 // DW_AT_name +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 7 // DW_AT_encoding +; CHECK-NEXT:.b8 4 // DW_AT_byte_size +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1467:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 164 // DW_AT_decl_line +; CHECK-NEXT:.b32 3379 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1477:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x147c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5250 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x1482:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 4926 // DW_AT_type +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1487:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 183 // DW_AT_decl_line +; CHECK-NEXT:.b32 2917 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1497:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x149c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5250 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x14a1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x14a7:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 187 // DW_AT_decl_line +; CHECK-NEXT:.b32 4751 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x14b8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x14bd:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5250 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x14c2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x14c8:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 121 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 120 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b8 205 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b32 2332 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1404:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1409:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x14d9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x140f:0x1d DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x14df:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 119 // DW_AT_name +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 109 // DW_AT_decl_line +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b32 4737 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x14f2:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4926 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x14f7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5378 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x14fc:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4737 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x1502:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 5383 // DW_AT_type +; CHECK-NEXT:.b8 9 // Abbrev [9] 0x1507:0x5 DW_TAG_const_type +; CHECK-NEXT:.b32 5068 // DW_AT_type +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x150c:0x1c DW_TAG_subprogram +; CHECK-NEXT:.b8 119 // DW_AT_name +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 121 // DW_AT_decl_line -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 102 // DW_AT_decl_line +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b32 2332 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1426:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x151d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 4926 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1522:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5068 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x142c:0xc DW_TAG_base_type -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 2 // Abbrev [2] 0x1528:0x78 DW_TAG_namespace +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 32 -; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_encoding -; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1438:0x25 DW_TAG_subprogram +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x1533:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 201 // DW_AT_decl_line +; CHECK-NEXT:.b32 5536 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x153a:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 207 // DW_AT_decl_line +; CHECK-NEXT:.b32 5585 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x1541:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 211 // DW_AT_decl_line +; CHECK-NEXT:.b32 5604 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x1548:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 217 // DW_AT_decl_line +; CHECK-NEXT:.b32 5626 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x154f:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 228 // DW_AT_decl_line +; CHECK-NEXT:.b32 5653 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x1556:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 229 // DW_AT_decl_line +; CHECK-NEXT:.b32 5675 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x155d:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 230 // DW_AT_decl_line +; CHECK-NEXT:.b32 5708 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x1564:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 232 // DW_AT_decl_line +; CHECK-NEXT:.b32 5768 // DW_AT_import +; CHECK-NEXT:.b8 3 // Abbrev [3] 0x156b:0x7 DW_TAG_imported_declaration +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 233 // DW_AT_decl_line +; CHECK-NEXT:.b32 5795 // DW_AT_import +; CHECK-NEXT:.b8 25 // Abbrev [25] 0x1572:0x2d DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 57 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 69 ; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 100 // DW_AT_name ; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 5 // DW_AT_decl_file +; CHECK-NEXT:.b8 214 // DW_AT_decl_line +; CHECK-NEXT:.b32 5536 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1594:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1599:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 11 // Abbrev [11] 0x15a0:0xf DW_TAG_typedef +; CHECK-NEXT:.b32 5551 // DW_AT_type ; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 121 // DW_AT_decl_line +; CHECK-NEXT:.b8 13 // Abbrev [13] 0x15af:0x22 DW_TAG_structure_type +; CHECK-NEXT:.b8 16 // DW_AT_byte_size +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 117 // DW_AT_decl_line +; CHECK-NEXT:.b8 14 // Abbrev [14] 0x15b3:0xf DW_TAG_member +; CHECK-NEXT:.b8 113 // DW_AT_name +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 119 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 // DW_AT_data_member_location +; CHECK-NEXT:.b8 35 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 14 // Abbrev [14] 0x15c2:0xe DW_TAG_member +; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 120 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 // DW_AT_data_member_location +; CHECK-NEXT:.b8 35 +; CHECK-NEXT:.b8 8 +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 22 // Abbrev [22] 0x15d1:0x13 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 69 ; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 123 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 45 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1452:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1457:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 1 // DW_AT_noreturn +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x15de:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x145d:0x21 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x15e4:0x16 DW_TAG_subprogram +; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 12 // DW_AT_decl_line +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x15f4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x15fa:0x1b DW_TAG_subprogram ; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 125 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 29 // DW_AT_decl_line +; CHECK-NEXT:.b8 3 +; CHECK-NEXT:.b32 5536 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1478:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x160a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x160f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1508 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x147e:0x1f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 16 // Abbrev [16] 0x1615:0x16 DW_TAG_subprogram +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 126 // DW_AT_decl_line -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 36 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 1508 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1497:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1625:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x149d:0x21 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x162b:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 209 // DW_AT_decl_line +; CHECK-NEXT:.b32 1508 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x163c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1641:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5250 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1646:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x164c:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 4 // DW_AT_decl_file +; CHECK-NEXT:.b8 214 // DW_AT_decl_line +; CHECK-NEXT:.b32 5742 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x165e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1663:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5250 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1668:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x166e:0x1a DW_TAG_base_type +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 32 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 7 // DW_AT_encoding +; CHECK-NEXT:.b8 8 // DW_AT_byte_size +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1688:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 128 // DW_AT_decl_line -; CHECK-NEXT:.b32 3764 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14b8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x14be:0x1b DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 138 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 172 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14d3:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1698:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x169d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5250 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x14d9:0x1f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 48 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 10 // Abbrev [10] 0x16a3:0x1c DW_TAG_subprogram +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 48 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 130 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 175 // DW_AT_decl_line +; CHECK-NEXT:.b32 5823 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x14f2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x16b4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3389 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x16b9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5250 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x14f8:0x1f DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16bf:0xf DW_TAG_base_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 132 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1511:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1517:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 4 // DW_AT_encoding +; CHECK-NEXT:.b8 8 // DW_AT_byte_size +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x16ce:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 134 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 62 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x152e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x16e8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1534:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x16ee:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 136 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 90 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x154b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x170a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1551:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1710:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 140 // DW_AT_decl_line -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 57 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x156a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x172a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1570:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1730:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 142 // DW_AT_decl_line -; CHECK-NEXT:.b32 5164 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x158b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1591:0x23 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 143 // DW_AT_decl_line -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 95 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15ae:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x174c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x15b4:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1752:0x28 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 80 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 109 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 145 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 47 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15cd:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15d2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2125 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x176f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1774:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x15d8:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x177a:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 80 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 110 // DW_AT_name +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 146 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 52 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x15ef:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1794:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x15f5:0xa DW_TAG_base_type -; CHECK-NEXT:.b8 100 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_encoding -; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x15ff:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 5636 // DW_AT_type -; CHECK-NEXT:.b8 13 // Abbrev [13] 0x1604:0x5 DW_TAG_const_type -; CHECK-NEXT:.b32 5641 // DW_AT_type -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1609:0x8 DW_TAG_base_type -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 8 // DW_AT_encoding -; CHECK-NEXT:.b8 1 // DW_AT_byte_size -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1611:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x179a:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 80 -; CHECK-NEXT:.b8 75 -; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 110 // DW_AT_name +; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 147 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 100 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x162a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x17b6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1630:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x17bc:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 57 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 110 // DW_AT_name -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 149 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 150 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1651:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x17d6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1657:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x17dc:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 57 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 110 // DW_AT_name +; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 151 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 11 // DW_AT_decl_file +; CHECK-NEXT:.b8 155 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1679:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x167e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x17f6:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1684:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x17fc:0x2e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 57 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 119 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 112 // DW_AT_name +; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 119 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 155 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 165 // DW_AT_decl_line +; CHECK-NEXT:.b8 4 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x169a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x169f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x181f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1824:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x16a5:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x182a:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 57 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 114 // DW_AT_name -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 99 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 157 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 219 // DW_AT_decl_line +; CHECK-NEXT:.b8 4 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16c7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16cc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1842:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x16d2:0x2e DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1848:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 113 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 80 -; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 114 // DW_AT_name -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 113 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 99 // DW_AT_name ; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 159 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 32 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16f0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16f5:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x16fa:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4624 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1862:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1700:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1868:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 114 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 101 // DW_AT_name +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 161 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 210 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1717:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1882:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x171d:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1888:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 114 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 101 // DW_AT_name +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 163 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 200 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1736:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x18a0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x173c:0x29 DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x18a6:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 101 // DW_AT_name +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 165 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 11 // DW_AT_decl_file +; CHECK-NEXT:.b8 145 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x175a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x175f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x18c0:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1765:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x18c6:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 101 // DW_AT_name +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 167 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 14 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1781:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1786:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x18de:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x178c:0x23 DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x18e4:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 101 // DW_AT_name +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 169 // DW_AT_decl_line -; CHECK-NEXT:.b32 4734 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 105 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17a9:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1900:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x17af:0x1b DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1906:0x20 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 171 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 11 // DW_AT_decl_file +; CHECK-NEXT:.b8 95 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17c4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1920:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x17ca:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1926:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 173 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 80 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17e1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1941:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1946:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x17e7:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x194c:0x22 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 175 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x17fe:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x1804:0x1b DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 116 // DW_AT_name -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 177 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 11 // DW_AT_decl_file +; CHECK-NEXT:.b8 85 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1819:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1968:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x181f:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x196e:0x2a DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 116 // DW_AT_name +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 179 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 32 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1836:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1988:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x198d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1992:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x183c:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1998:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 116 // DW_AT_name -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 181 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 11 // DW_AT_decl_file +; CHECK-NEXT:.b8 110 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1857:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x19b3:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x19b8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 31 // Abbrev [31] 0x185d:0x1f DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x19be:0x26 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 ; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 116 // DW_AT_name -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_decl_file -; CHECK-NEXT:.b8 183 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1876:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x187c:0x14 DW_TAG_subprogram -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 54 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x188a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1890:0x14 DW_TAG_subprogram -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 56 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x189e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18a4:0x14 DW_TAG_subprogram -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 58 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18b2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18b8:0x1a DW_TAG_subprogram -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 60 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18c7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18cc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18d2:0x14 DW_TAG_subprogram -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 178 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18e0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18e6:0x13 DW_TAG_subprogram -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 63 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x18f3:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x18f9:0x14 DW_TAG_subprogram -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 72 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1907:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x190d:0x13 DW_TAG_subprogram -; CHECK-NEXT:.b8 101 // DW_AT_name -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 100 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x191a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1920:0x14 DW_TAG_subprogram +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 181 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 11 // DW_AT_decl_file +; CHECK-NEXT:.b8 105 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x192e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x19d9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x19de:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1934:0x15 DW_TAG_subprogram -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x19e4:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 184 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1943:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1949:0x19 DW_TAG_subprogram ; CHECK-NEXT:.b8 102 // DW_AT_name ; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 187 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 17 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1957:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x195c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x19ff:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1a04:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1962:0x1a DW_TAG_subprogram -; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1a0a:0x29 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 80 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 103 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1971:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1976:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4624 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x197c:0x1a DW_TAG_subprogram -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 106 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 7 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x198b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1990:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1a28:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1a2d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2377 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1996:0x13 DW_TAG_subprogram -; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1a33:0x28 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 112 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 109 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19a3:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19a9:0x15 DW_TAG_subprogram -; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 104 // DW_AT_name +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 112 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 48 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 112 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 110 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19b8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1a50:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1a55:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19be:0x19 DW_TAG_subprogram -; CHECK-NEXT:.b8 109 // DW_AT_name +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1a5b:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 115 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19cc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19d1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6615 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x19d7:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19dc:0x18 DW_TAG_subprogram -; CHECK-NEXT:.b8 112 // DW_AT_name +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 119 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 153 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19e9:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x19ee:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x19f4:0x13 DW_TAG_subprogram -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 65 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 85 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 2332 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a01:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1a77:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a07:0x14 DW_TAG_subprogram -; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1a7d:0x28 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 74 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a15:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a1b:0x14 DW_TAG_subprogram -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 113 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 156 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 240 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a29:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1a9a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1a9f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a2f:0x13 DW_TAG_subprogram -; CHECK-NEXT:.b8 116 // DW_AT_name +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1aa5:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 67 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a3c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1a42:0x14 DW_TAG_subprogram -; CHECK-NEXT:.b8 116 // DW_AT_name +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_decl_file -; CHECK-NEXT:.b8 76 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1a50:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1a56:0xd DW_TAG_typedef -; CHECK-NEXT:.b32 6755 // DW_AT_type -; CHECK-NEXT:.b8 100 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 101 // DW_AT_decl_line -; CHECK-NEXT:.b8 34 // Abbrev [34] 0x1a63:0x2 DW_TAG_structure_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1a65:0xe DW_TAG_typedef -; CHECK-NEXT:.b32 6771 // DW_AT_type ; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 109 // DW_AT_decl_line -; CHECK-NEXT:.b8 35 // Abbrev [35] 0x1a73:0x22 DW_TAG_structure_type -; CHECK-NEXT:.b8 16 // DW_AT_byte_size -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 105 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1a77:0xf DW_TAG_member -; CHECK-NEXT:.b8 113 // DW_AT_name -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 5164 // DW_AT_type -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 107 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 // DW_AT_data_member_location -; CHECK-NEXT:.b8 35 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1a86:0xe DW_TAG_member -; CHECK-NEXT:.b8 114 // DW_AT_name -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 5164 // DW_AT_type -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 108 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 // DW_AT_data_member_location -; CHECK-NEXT:.b8 35 -; CHECK-NEXT:.b8 8 -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 36 // Abbrev [36] 0x1a95:0xd DW_TAG_subprogram -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 3 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 1 // DW_AT_noreturn -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1aa2:0x14 DW_TAG_subprogram -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 7 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 235 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ab0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1ac3:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1ab6:0x17 DW_TAG_subprogram -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1ac9:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 7 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ac7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6861 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1acd:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 6866 // DW_AT_type -; CHECK-NEXT:.b8 38 // Abbrev [38] 0x1ad2:0x1 DW_TAG_subroutine_type -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1ad3:0x14 DW_TAG_subprogram -; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 9 // DW_AT_decl_file -; CHECK-NEXT:.b8 26 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type +; CHECK-NEXT:.b8 125 // DW_AT_decl_line +; CHECK-NEXT:.b8 4 +; CHECK-NEXT:.b32 1508 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ae1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1ae7:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1ae7:0x15 DW_TAG_subprogram -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1aed:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 56 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 22 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1af6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1afc:0x15 DW_TAG_subprogram -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 27 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 5164 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b0b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1b11:0x2b DW_TAG_subprogram -; CHECK-NEXT:.b8 98 // DW_AT_name -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 10 // DW_AT_decl_file -; CHECK-NEXT:.b8 20 // DW_AT_decl_line -; CHECK-NEXT:.b32 6972 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 66 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1508 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b22:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6973 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b27:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6973 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b2c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b31:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b36:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7014 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 39 // Abbrev [39] 0x1b3c:0x1 DW_TAG_pointer_type -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1b3d:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 6978 // DW_AT_type -; CHECK-NEXT:.b8 40 // Abbrev [40] 0x1b42:0x1 DW_TAG_const_type -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1b43:0xe DW_TAG_typedef -; CHECK-NEXT:.b32 6993 // DW_AT_type -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 122 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1b0d:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1b13:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 48 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 11 // DW_AT_decl_file -; CHECK-NEXT:.b8 62 // DW_AT_decl_line -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1b51:0x15 DW_TAG_base_type ; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 32 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 32 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 48 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_encoding -; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 20 // Abbrev [20] 0x1b66:0x16 DW_TAG_typedef -; CHECK-NEXT:.b32 7036 // DW_AT_type -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 76 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1b2f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1b35:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 49 ; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 230 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1b7c:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 7041 // DW_AT_type -; CHECK-NEXT:.b8 41 // Abbrev [41] 0x1b81:0x10 DW_TAG_subroutine_type -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b86:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6973 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1b8b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6973 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1b91:0x1c DW_TAG_subprogram -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 108 // DW_AT_name ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 212 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6972 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 85 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ba2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ba7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1b51:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1bad:0x19 DW_TAG_subprogram -; CHECK-NEXT:.b8 100 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1b57:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 21 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 6742 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bbb:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bc0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 42 // Abbrev [42] 0x1bc6:0x12 DW_TAG_subprogram -; CHECK-NEXT:.b8 101 // DW_AT_name -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 31 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 5 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 1 // DW_AT_noreturn -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bd2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1b71:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 18 // Abbrev [18] 0x1bd8:0x11 DW_TAG_subprogram -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1b77:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 227 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 90 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1be3:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6972 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1b91:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1be9:0x17 DW_TAG_subprogram -; CHECK-NEXT:.b8 103 // DW_AT_name -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1b97:0x1e DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 52 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 7168 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 67 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1bfa:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1baf:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1c00:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 5641 // DW_AT_type -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c05:0x15 DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1bb5:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 8 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 116 // DW_AT_decl_line +; CHECK-NEXT:.b8 4 +; CHECK-NEXT:.b32 2917 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c14:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1bd1:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c1a:0x1a DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1bd7:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 23 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 6757 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 71 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 2917 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c29:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5164 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c2e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1bf5:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c34:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1bfb:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 80 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 109 // DW_AT_name -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 210 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6972 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 12 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c45:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1c17:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1c1c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 3345 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c4b:0x1b DW_TAG_subprogram -; CHECK-NEXT:.b8 109 // DW_AT_name -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1c22:0x2b DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 48 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 95 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c5b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c60:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c66:0x23 DW_TAG_subprogram -; CHECK-NEXT:.b8 109 // DW_AT_name +; CHECK-NEXT:.b8 110 // DW_AT_name +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 119 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 106 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 6979 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 130 // DW_AT_decl_line +; CHECK-NEXT:.b8 4 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c79:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7305 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c7e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1c83:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1c89:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 7310 // DW_AT_type -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1c8e:0xb DW_TAG_base_type -; CHECK-NEXT:.b8 119 // DW_AT_name -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1c47:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1c4d:0x31 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 48 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 5 // DW_AT_encoding -; CHECK-NEXT:.b8 4 // DW_AT_byte_size -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1c99:0x21 DW_TAG_subprogram -; CHECK-NEXT:.b8 109 // DW_AT_name -; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 110 // DW_AT_name +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 194 // DW_AT_decl_line +; CHECK-NEXT:.b8 4 +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1c73:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1c78:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1c7e:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 112 ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 119 -; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 98 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1caa:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7305 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1caf:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cb4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 18 // Abbrev [18] 0x1cba:0x21 DW_TAG_subprogram -; CHECK-NEXT:.b8 113 // DW_AT_name -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 112 // DW_AT_name ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 119 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 253 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 47 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cc6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6972 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ccb:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cd0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cd5:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7014 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 43 // Abbrev [43] 0x1cdb:0xf DW_TAG_subprogram +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1c97:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1c9c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1ca2:0x31 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 48 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 114 // DW_AT_name +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 109 ; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 118 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 22 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1cea:0x1d DW_TAG_subprogram +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1cc8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1ccd:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1cd3:0x31 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 80 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 224 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 6972 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 27 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1cfc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6972 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d01:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1cf4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1cf9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1cfe:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2377 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 18 // Abbrev [18] 0x1d07:0x12 DW_TAG_subprogram -; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1d04:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 120 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 114 // DW_AT_name +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 111 // DW_AT_decl_line +; CHECK-NEXT:.b8 4 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d13:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 619 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1d1e:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1d19:0x1b DW_TAG_subprogram -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1d24:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 164 // DW_AT_decl_line -; CHECK-NEXT:.b32 5621 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d29:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d2e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7476 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1d34:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 7168 // DW_AT_type -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1d39:0x20 DW_TAG_subprogram -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 114 // DW_AT_name ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 183 // DW_AT_decl_line -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 61 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d49:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d4e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7476 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d53:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1d59:0x21 DW_TAG_subprogram -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1d40:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1d46:0x2c DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 56 +; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 187 // DW_AT_decl_line -; CHECK-NEXT:.b32 6993 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d6a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d6f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7476 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d74:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1d7a:0x17 DW_TAG_subprogram ; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 205 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 250 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1d8b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1d67:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1d6c:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2917 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1d91:0x23 DW_TAG_subprogram -; CHECK-NEXT:.b8 119 // DW_AT_name -; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1d72:0x2a DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 55 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 109 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1da4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7168 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1da9:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7604 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1dae:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 6979 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 12 // Abbrev [12] 0x1db4:0x5 DW_TAG_pointer_type -; CHECK-NEXT:.b32 7609 // DW_AT_type -; CHECK-NEXT:.b8 13 // Abbrev [13] 0x1db9:0x5 DW_TAG_const_type -; CHECK-NEXT:.b32 7310 // DW_AT_type -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1dbe:0x1c DW_TAG_subprogram -; CHECK-NEXT:.b8 119 // DW_AT_name +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 102 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 245 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1dcf:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7168 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1dd4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7310 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1d91:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1d96:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 2332 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 28 // Abbrev [28] 0x1dda:0x78 DW_TAG_namespace -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1de5:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 201 // DW_AT_decl_line -; CHECK-NEXT:.b32 7762 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1dec:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 207 // DW_AT_decl_line -; CHECK-NEXT:.b32 7811 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1df3:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 211 // DW_AT_decl_line -; CHECK-NEXT:.b32 7830 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1dfa:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 217 // DW_AT_decl_line -; CHECK-NEXT:.b32 7852 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e01:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 228 // DW_AT_decl_line -; CHECK-NEXT:.b32 7879 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e08:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 229 // DW_AT_decl_line -; CHECK-NEXT:.b32 7901 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e0f:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 230 // DW_AT_decl_line -; CHECK-NEXT:.b32 7934 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e16:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 232 // DW_AT_decl_line -; CHECK-NEXT:.b32 7994 // DW_AT_import -; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1e1d:0x7 DW_TAG_imported_declaration -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 233 // DW_AT_decl_line -; CHECK-NEXT:.b32 8021 // DW_AT_import -; CHECK-NEXT:.b8 4 // Abbrev [4] 0x1e24:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1d9c:0x1e DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 78 -; CHECK-NEXT:.b8 57 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 51 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 100 // DW_AT_name +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 8 // DW_AT_decl_file -; CHECK-NEXT:.b8 214 // DW_AT_decl_line -; CHECK-NEXT:.b32 7762 // DW_AT_type +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 210 // DW_AT_decl_line +; CHECK-NEXT:.b8 4 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e46:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3764 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e4b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3764 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1db4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 33 // Abbrev [33] 0x1e52:0xf DW_TAG_typedef -; CHECK-NEXT:.b32 7777 // DW_AT_type -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1dba:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 118 -; CHECK-NEXT:.b8 95 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 121 // DW_AT_decl_line -; CHECK-NEXT:.b8 35 // Abbrev [35] 0x1e61:0x22 DW_TAG_structure_type -; CHECK-NEXT:.b8 16 // DW_AT_byte_size -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 117 // DW_AT_decl_line -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1e65:0xf DW_TAG_member -; CHECK-NEXT:.b8 113 // DW_AT_name -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 3764 // DW_AT_type -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 119 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 // DW_AT_data_member_location -; CHECK-NEXT:.b8 35 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 11 // Abbrev [11] 0x1e74:0xe DW_TAG_member -; CHECK-NEXT:.b8 114 // DW_AT_name -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b32 3764 // DW_AT_type -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 120 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 // DW_AT_data_member_location -; CHECK-NEXT:.b8 35 -; CHECK-NEXT:.b8 8 -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 42 // Abbrev [42] 0x1e83:0x13 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_name -; CHECK-NEXT:.b8 69 -; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 45 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 37 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 1 // DW_AT_noreturn -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1e90:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1dd4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1e96:0x16 DW_TAG_subprogram -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1dda:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 ; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 12 // DW_AT_decl_line +; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 11 // DW_AT_decl_file +; CHECK-NEXT:.b8 139 // DW_AT_decl_line ; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ea6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1df4:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1eac:0x1b DW_TAG_subprogram -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1dfa:0x1e DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 29 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 7762 // DW_AT_type +; CHECK-NEXT:.b8 116 // DW_AT_name +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 252 // DW_AT_decl_line +; CHECK-NEXT:.b8 4 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ebc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3764 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ec1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1e12:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 37 // Abbrev [37] 0x1ec7:0x16 DW_TAG_subprogram -; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1e18:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 53 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 36 // DW_AT_decl_line -; CHECK-NEXT:.b8 1 -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 116 // DW_AT_name +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 42 // DW_AT_decl_line +; CHECK-NEXT:.b8 5 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ed7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1e32:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1edd:0x21 DW_TAG_subprogram -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1e38:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 55 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 209 // DW_AT_decl_line -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 116 // DW_AT_name +; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 9 // DW_AT_decl_file +; CHECK-NEXT:.b8 56 // DW_AT_decl_line +; CHECK-NEXT:.b8 6 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1eee:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ef3:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7476 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ef8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1efe:0x22 DW_TAG_subprogram -; CHECK-NEXT:.b8 115 // DW_AT_name +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1e56:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 26 // Abbrev [26] 0x1e5c:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 54 ; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 214 // DW_AT_decl_line -; CHECK-NEXT:.b32 7968 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f10:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f15:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7476 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f1a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1f20:0x1a DW_TAG_base_type -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 32 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 32 ; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 32 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_encoding -; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1f3a:0x1b DW_TAG_subprogram -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 116 // DW_AT_name ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 172 // DW_AT_decl_line -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 11 // DW_AT_decl_file +; CHECK-NEXT:.b8 150 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f4a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f4f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7476 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x1e78:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 1554 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 32 // Abbrev [32] 0x1f55:0x1c DW_TAG_subprogram -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 27 // Abbrev [27] 0x1e7e:0x22a DW_TAG_structure_type +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 73 ; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 7 // DW_AT_decl_file -; CHECK-NEXT:.b8 175 // DW_AT_decl_line -; CHECK-NEXT:.b32 8049 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 1 // DW_AT_external -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f66:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5631 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f6b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 7476 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 10 // Abbrev [10] 0x1f71:0xf DW_TAG_base_type -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 1 // DW_AT_byte_size +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 77 // DW_AT_decl_line +; CHECK-NEXT:.b8 28 // Abbrev [28] 0x1e9c:0x4f DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name +; CHECK-NEXT:.b8 90 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 4 // DW_AT_encoding -; CHECK-NEXT:.b8 8 // DW_AT_byte_size -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1f80:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 78 // DW_AT_decl_line +; CHECK-NEXT:.b32 5207 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 28 // Abbrev [28] 0x1eeb:0x4f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 62 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1f9a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1fa0:0x22 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 121 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 90 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 79 // DW_AT_decl_line +; CHECK-NEXT:.b32 5207 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1fbc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1fc2:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 28 // Abbrev [28] 0x1f3a:0x4f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 57 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1fdc:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x1fe2:0x22 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 122 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 122 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 95 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 80 // DW_AT_decl_line +; CHECK-NEXT:.b32 5207 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x1ffe:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2004:0x28 DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 25 // Abbrev [25] 0x1f89:0x49 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 111 // DW_AT_name +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 47 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 83 // DW_AT_decl_line +; CHECK-NEXT:.b32 8360 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2021:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2026:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1fcb:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 8407 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x202c:0x20 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 30 // Abbrev [30] 0x1fd2:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 52 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 85 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2046:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x1ff2:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 8417 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x204c:0x22 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 30 // Abbrev [30] 0x1ff9:0x2c DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 97 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 100 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 85 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2068:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x2019:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 8417 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x201f:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 8422 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x206e:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x2025:0x43 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 99 // DW_AT_name +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 83 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 82 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 83 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 111 // DW_AT_name +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 61 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 150 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 85 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2088:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x205c:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 8407 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x2062:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 8422 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x208e:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x2068:0x3f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 14 // DW_AT_decl_file -; CHECK-NEXT:.b8 155 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 111 // DW_AT_name +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 38 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 85 // DW_AT_decl_line +; CHECK-NEXT:.b32 8427 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20a8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x20a0:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 8407 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x20ae:0x2e DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 57 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 27 // Abbrev [27] 0x20a8:0x2f DW_TAG_structure_type +; CHECK-NEXT:.b8 117 // DW_AT_name ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 103 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 12 // DW_AT_byte_size +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 190 // DW_AT_decl_line +; CHECK-NEXT:.b8 14 // Abbrev [14] 0x20b2:0xc DW_TAG_member +; CHECK-NEXT:.b8 120 // DW_AT_name ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 165 // DW_AT_decl_line -; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20d1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20d6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x20dc:0x1e DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b32 5207 // DW_AT_type +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 192 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 // DW_AT_data_member_location +; CHECK-NEXT:.b8 35 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 14 // Abbrev [14] 0x20be:0xc DW_TAG_member +; CHECK-NEXT:.b8 121 // DW_AT_name ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 219 // DW_AT_decl_line +; CHECK-NEXT:.b32 5207 // DW_AT_type +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 192 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 // DW_AT_data_member_location +; CHECK-NEXT:.b8 35 ; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x20f4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 14 // Abbrev [14] 0x20ca:0xc DW_TAG_member +; CHECK-NEXT:.b8 122 // DW_AT_name +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b32 5207 // DW_AT_type +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 192 // DW_AT_decl_line +; CHECK-NEXT:.b8 2 // DW_AT_data_member_location +; CHECK-NEXT:.b8 35 +; CHECK-NEXT:.b8 8 ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x20fa:0x20 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x20d7:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 8412 // DW_AT_type +; CHECK-NEXT:.b8 9 // Abbrev [9] 0x20dc:0x5 DW_TAG_const_type +; CHECK-NEXT:.b32 7806 // DW_AT_type +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x20e1:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 7806 // DW_AT_type +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x20e6:0x5 DW_TAG_reference_type +; CHECK-NEXT:.b32 8412 // DW_AT_type +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x20eb:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 7806 // DW_AT_type +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x20f0:0x6 DW_TAG_subprogram +; CHECK-NEXT:.b32 7836 // DW_AT_specification +; CHECK-NEXT:.b8 1 // DW_AT_inline +; CHECK-NEXT:.b8 27 // Abbrev [27] 0x20f6:0x228 DW_TAG_structure_type +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 99 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 68 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 32 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2114:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x211a:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_AT_byte_size +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 88 // DW_AT_decl_line +; CHECK-NEXT:.b8 28 // Abbrev [28] 0x2114:0x4f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 101 // DW_AT_name -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 68 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 210 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2134:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x213a:0x1e DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 101 // DW_AT_name -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 200 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2152:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2158:0x20 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 ; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 101 // DW_AT_name -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 14 // DW_AT_decl_file -; CHECK-NEXT:.b8 145 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2172:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2178:0x1e DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 ; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 101 // DW_AT_name -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 14 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 89 // DW_AT_decl_line +; CHECK-NEXT:.b32 5207 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2190:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2196:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 28 // Abbrev [28] 0x2163:0x4f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 68 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 101 // DW_AT_name -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 121 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 105 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 90 // DW_AT_decl_line +; CHECK-NEXT:.b32 5207 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21b2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x21b8:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 28 // Abbrev [28] 0x21b2:0x4f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 68 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 122 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 122 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 14 // DW_AT_decl_file -; CHECK-NEXT:.b8 95 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 91 // DW_AT_decl_line +; CHECK-NEXT:.b32 5207 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21d2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x21d8:0x26 DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 25 // Abbrev [25] 0x2201:0x47 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 50 ; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 68 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 52 ; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 80 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21f3:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x21f8:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x21fe:0x22 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 111 // DW_AT_name +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 14 // DW_AT_decl_file -; CHECK-NEXT:.b8 85 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x221a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2220:0x2a DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 32 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 94 // DW_AT_decl_line +; CHECK-NEXT:.b32 8990 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x223a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x223f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2244:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x2241:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9166 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x224a:0x26 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 30 // Abbrev [30] 0x2248:0x27 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 68 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 14 // DW_AT_decl_file -; CHECK-NEXT:.b8 110 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 96 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2265:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x226a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x2268:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9176 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2270:0x26 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 30 // Abbrev [30] 0x226f:0x2c DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 14 // DW_AT_decl_file -; CHECK-NEXT:.b8 105 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x228b:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2290:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2296:0x26 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 68 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 17 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 96 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22b1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22b6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x228f:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9176 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x2295:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9181 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x22bc:0x29 DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x229b:0x43 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 80 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 102 // DW_AT_name -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 7 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22da:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x22df:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4624 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x22e5:0x28 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 121 -; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 68 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 83 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 82 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 83 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 104 // DW_AT_name -; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 111 // DW_AT_name ; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 61 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 110 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 96 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2302:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2307:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x22d2:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9166 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x22d8:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9181 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x230d:0x22 DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x22de:0x3f DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 105 // DW_AT_name ; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 85 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2329:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x232f:0x28 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 107 +; CHECK-NEXT:.b8 68 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 111 // DW_AT_name ; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 38 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 240 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 96 // DW_AT_decl_line +; CHECK-NEXT:.b32 9186 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x234c:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2351:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x2316:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9166 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2357:0x24 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 35 // Abbrev [35] 0x231e:0x9d DW_TAG_structure_type +; CHECK-NEXT:.b8 100 // DW_AT_name +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 12 // DW_AT_byte_size +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 161 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 36 // Abbrev [36] 0x2328:0xd DW_TAG_member +; CHECK-NEXT:.b8 120 // DW_AT_name +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b32 5207 // DW_AT_type +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 163 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 2 // DW_AT_data_member_location +; CHECK-NEXT:.b8 35 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 36 // Abbrev [36] 0x2335:0xd DW_TAG_member +; CHECK-NEXT:.b8 121 // DW_AT_name +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b32 5207 // DW_AT_type +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 163 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 2 // DW_AT_data_member_location +; CHECK-NEXT:.b8 35 +; CHECK-NEXT:.b8 4 +; CHECK-NEXT:.b8 36 // Abbrev [36] 0x2342:0xd DW_TAG_member +; CHECK-NEXT:.b8 122 // DW_AT_name +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b32 5207 // DW_AT_type +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 163 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 2 // DW_AT_data_member_location +; CHECK-NEXT:.b8 35 +; CHECK-NEXT:.b8 8 +; CHECK-NEXT:.b8 23 // Abbrev [23] 0x234f:0x21 DW_TAG_subprogram +; CHECK-NEXT:.b8 100 // DW_AT_name +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 165 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x235a:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9147 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x2360:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5207 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x2365:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5207 // DW_AT_type +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x236a:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 5207 // DW_AT_type +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 23 // Abbrev [23] 0x2370:0x17 DW_TAG_subprogram +; CHECK-NEXT:.b8 100 // DW_AT_name +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 235 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 166 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2375:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x237b:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9147 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x2381:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9152 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x237b:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 37 // Abbrev [37] 0x2387:0x33 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 109 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 111 // DW_AT_name +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 ; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 125 // DW_AT_decl_line -; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 3764 // DW_AT_type +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 167 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b32 9152 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2399:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x23b3:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9147 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x239f:0x26 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 56 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x23bb:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 8990 // DW_AT_type +; CHECK-NEXT:.b8 20 // Abbrev [20] 0x23c0:0xe DW_TAG_typedef +; CHECK-NEXT:.b32 8360 // DW_AT_type +; CHECK-NEXT:.b8 117 // DW_AT_name +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 14 // DW_AT_decl_file +; CHECK-NEXT:.b8 127 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x23ce:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 9171 // DW_AT_type +; CHECK-NEXT:.b8 9 // Abbrev [9] 0x23d3:0x5 DW_TAG_const_type +; CHECK-NEXT:.b32 8438 // DW_AT_type +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x23d8:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 8438 // DW_AT_type +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x23dd:0x5 DW_TAG_reference_type +; CHECK-NEXT:.b32 9171 // DW_AT_type +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x23e2:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 8438 // DW_AT_type +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x23e7:0x6 DW_TAG_subprogram +; CHECK-NEXT:.b32 8468 // DW_AT_specification +; CHECK-NEXT:.b8 1 // DW_AT_inline +; CHECK-NEXT:.b8 27 // Abbrev [27] 0x23ed:0x233 DW_TAG_structure_type +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file +; CHECK-NEXT:.b8 1 // DW_AT_byte_size +; CHECK-NEXT:.b8 13 // DW_AT_decl_file ; CHECK-NEXT:.b8 66 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 3764 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x23bf:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x23c5:0x22 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 48 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 48 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 76 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x23e1:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x23e7:0x22 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 85 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2403:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2409:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 28 // Abbrev [28] 0x240c:0x50 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 78 ; CHECK-NEXT:.b8 50 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 5 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2423:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2429:0x20 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 90 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2443:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2449:0x1e DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 67 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2461:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2467:0x22 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 49 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 120 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 116 // DW_AT_decl_line -; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 67 // DW_AT_decl_line +; CHECK-NEXT:.b32 5207 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2483:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2489:0x24 DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 28 // Abbrev [28] 0x245c:0x50 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 108 // DW_AT_name -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 71 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 5164 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24a7:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x24ad:0x27 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 80 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 109 // DW_AT_name -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 73 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 12 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24c9:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24ce:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2125 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x24d4:0x2b DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 48 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 110 // DW_AT_name +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 121 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 130 // DW_AT_decl_line -; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 68 // DW_AT_decl_line +; CHECK-NEXT:.b32 5207 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x24f9:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x24ff:0x31 DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 28 // Abbrev [28] 0x24ac:0x50 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 48 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 110 // DW_AT_name ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 120 -; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 194 // DW_AT_decl_line -; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2525:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x252a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2530:0x24 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 112 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 119 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 112 // DW_AT_name -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 119 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 47 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2549:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x254e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2554:0x31 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 49 -; CHECK-NEXT:.b8 48 -; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 122 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 114 // DW_AT_name +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 122 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 22 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 69 // DW_AT_decl_line +; CHECK-NEXT:.b32 5207 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x257a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x257f:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2585:0x31 DW_TAG_subprogram +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 25 // Abbrev [25] 0x24fc:0x4a DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 55 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 113 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 118 +; CHECK-NEXT:.b8 53 ; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 80 ; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 114 // DW_AT_name +; CHECK-NEXT:.b8 111 // DW_AT_name +; CHECK-NEXT:.b8 112 ; CHECK-NEXT:.b8 101 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 113 -; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 111 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 27 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25a6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25ab:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25b0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4624 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x25b6:0x20 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 ; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 32 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 51 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 114 // DW_AT_name +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 72 // DW_AT_decl_line +; CHECK-NEXT:.b32 8360 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_declaration +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x253f:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9760 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 30 // Abbrev [30] 0x2546:0x28 DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 111 // DW_AT_decl_line -; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 74 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25d0:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x2567:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9770 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x25d6:0x22 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 30 // Abbrev [30] 0x256e:0x2d DW_TAG_subprogram +; CHECK-NEXT:.b8 95 // DW_AT_name +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 99 ; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 114 // DW_AT_name -; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 98 ; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 +; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 100 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 61 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 74 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x25f2:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x258f:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9770 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x2595:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9775 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x25f8:0x2c DW_TAG_subprogram +; CHECK-NEXT:.b8 31 // Abbrev [31] 0x259b:0x44 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 56 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 83 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 82 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 83 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 111 // DW_AT_name +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 61 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 250 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 74 // DW_AT_decl_line ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2619:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x261e:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 5164 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x25d3:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9760 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial +; CHECK-NEXT:.b8 6 // Abbrev [6] 0x25d9:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9775 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2624:0x2a DW_TAG_subprogram +; CHECK-NEXT:.b8 32 // Abbrev [32] 0x25df:0x40 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 78 +; CHECK-NEXT:.b8 75 +; CHECK-NEXT:.b8 50 +; CHECK-NEXT:.b8 54 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 99 +; CHECK-NEXT:.b8 117 +; CHECK-NEXT:.b8 100 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 108 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 117 ; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 97 ; CHECK-NEXT:.b8 108 -; CHECK-NEXT:.b8 98 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 245 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2643:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2648:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 4579 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x264e:0x1e DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 115 +; CHECK-NEXT:.b8 116 ; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 73 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 95 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 100 +; CHECK-NEXT:.b8 69 +; CHECK-NEXT:.b8 118 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 111 // DW_AT_name +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 97 +; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 111 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 38 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 210 // DW_AT_decl_line -; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 13 // DW_AT_decl_file +; CHECK-NEXT:.b8 74 // DW_AT_decl_line +; CHECK-NEXT:.b32 9780 // DW_AT_type ; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2666:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 3 // DW_AT_accessibility +; CHECK-NEXT: // DW_ACCESS_private +; CHECK-NEXT:.b8 29 // Abbrev [29] 0x2618:0x6 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9760 // DW_AT_type +; CHECK-NEXT:.b8 1 // DW_AT_artificial ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x266c:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 0 // End Of Children Mark +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x2620:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 9765 // DW_AT_type +; CHECK-NEXT:.b8 9 // Abbrev [9] 0x2625:0x5 DW_TAG_const_type +; CHECK-NEXT:.b32 9197 // DW_AT_type +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x262a:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 9197 // DW_AT_type +; CHECK-NEXT:.b8 33 // Abbrev [33] 0x262f:0x5 DW_TAG_reference_type +; CHECK-NEXT:.b32 9765 // DW_AT_type +; CHECK-NEXT:.b8 8 // Abbrev [8] 0x2634:0x5 DW_TAG_pointer_type +; CHECK-NEXT:.b32 9197 // DW_AT_type +; CHECK-NEXT:.b8 34 // Abbrev [34] 0x2639:0x6 DW_TAG_subprogram +; CHECK-NEXT:.b32 9228 // DW_AT_specification +; CHECK-NEXT:.b8 1 // DW_AT_inline +; CHECK-NEXT:.b8 38 // Abbrev [38] 0x263f:0x32 DW_TAG_subprogram ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 51 +; CHECK-NEXT:.b8 114 +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 105 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 80 ; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 37 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2686:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x268c:0x20 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 53 +; CHECK-NEXT:.b8 114 // DW_AT_name +; CHECK-NEXT:.b8 101 ; CHECK-NEXT:.b8 115 -; CHECK-NEXT:.b8 113 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 115 // DW_AT_name -; CHECK-NEXT:.b8 113 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 12 // DW_AT_decl_file +; CHECK-NEXT:.b8 3 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 1 // DW_AT_inline +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x2653:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 120 // DW_AT_name ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 14 // DW_AT_decl_file -; CHECK-NEXT:.b8 139 // DW_AT_decl_line -; CHECK-NEXT:.b8 3 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26a6:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x26ac:0x1e DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 52 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 12 // DW_AT_decl_file +; CHECK-NEXT:.b8 3 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x265c:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 121 // DW_AT_name ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 116 // DW_AT_name -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 12 // DW_AT_decl_file +; CHECK-NEXT:.b8 3 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x2665:0xb DW_TAG_formal_parameter +; CHECK-NEXT:.b8 114 // DW_AT_name +; CHECK-NEXT:.b8 101 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 252 // DW_AT_decl_line -; CHECK-NEXT:.b8 4 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26c4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 3 // DW_AT_decl_line +; CHECK-NEXT:.b32 3345 // DW_AT_type ; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x26ca:0x20 DW_TAG_subprogram +; CHECK-NEXT:.b8 40 // Abbrev [40] 0x2671:0xbf DW_TAG_subprogram +; CHECK-NEXT:.b64 Lfunc_begin0 // DW_AT_low_pc +; CHECK-NEXT:.b64 Lfunc_end0 // DW_AT_high_pc +; CHECK-NEXT:.b8 1 // DW_AT_frame_base +; CHECK-NEXT:.b8 156 ; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name ; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 ; CHECK-NEXT:.b8 53 -; CHECK-NEXT:.b8 116 +; CHECK-NEXT:.b8 115 ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 121 +; CHECK-NEXT:.b8 105 ; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 80 ; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 83 +; CHECK-NEXT:.b8 95 ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 116 // DW_AT_name +; CHECK-NEXT:.b8 115 // DW_AT_name ; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 104 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 120 +; CHECK-NEXT:.b8 112 +; CHECK-NEXT:.b8 121 ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 42 // DW_AT_decl_line -; CHECK-NEXT:.b8 5 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x26e4:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x26ea:0x24 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 55 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 5 // DW_AT_decl_line +; CHECK-NEXT:.b8 1 // DW_AT_external +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x269c:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 110 // DW_AT_name ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 116 // DW_AT_name -; CHECK-NEXT:.b8 103 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 109 -; CHECK-NEXT:.b8 97 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 12 // DW_AT_decl_file +; CHECK-NEXT:.b8 5 // DW_AT_decl_line +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x26a5:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 97 // DW_AT_name ; CHECK-NEXT:.b8 0 ; CHECK-NEXT:.b8 12 // DW_AT_decl_file -; CHECK-NEXT:.b8 56 // DW_AT_decl_line -; CHECK-NEXT:.b8 6 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x2708:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 0 // End Of Children Mark -; CHECK-NEXT:.b8 44 // Abbrev [44] 0x270e:0x22 DW_TAG_subprogram -; CHECK-NEXT:.b8 95 // DW_AT_MIPS_linkage_name -; CHECK-NEXT:.b8 90 -; CHECK-NEXT:.b8 76 -; CHECK-NEXT:.b8 54 -; CHECK-NEXT:.b8 116 -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 102 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 5 // DW_AT_decl_line +; CHECK-NEXT:.b32 1554 // DW_AT_type +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x26ae:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 120 // DW_AT_name ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 116 // DW_AT_name -; CHECK-NEXT:.b8 114 -; CHECK-NEXT:.b8 117 -; CHECK-NEXT:.b8 110 -; CHECK-NEXT:.b8 99 -; CHECK-NEXT:.b8 102 +; CHECK-NEXT:.b8 12 // DW_AT_decl_file +; CHECK-NEXT:.b8 5 // DW_AT_decl_line +; CHECK-NEXT:.b32 3345 // DW_AT_type +; CHECK-NEXT:.b8 39 // Abbrev [39] 0x26b7:0x9 DW_TAG_formal_parameter +; CHECK-NEXT:.b8 121 // DW_AT_name ; CHECK-NEXT:.b8 0 -; CHECK-NEXT:.b8 14 // DW_AT_decl_file -; CHECK-NEXT:.b8 150 // DW_AT_decl_line -; CHECK-NEXT:.b8 2 -; CHECK-NEXT:.b32 2116 // DW_AT_type -; CHECK-NEXT:.b8 1 // DW_AT_declaration -; CHECK-NEXT:.b8 7 // Abbrev [7] 0x272a:0x5 DW_TAG_formal_parameter -; CHECK-NEXT:.b32 2116 // DW_AT_type +; CHECK-NEXT:.b8 12 // DW_AT_decl_file +; CHECK-NEXT:.b8 5 // DW_AT_decl_line +; CHECK-NEXT:.b32 3345 // DW_AT_type +; CHECK-NEXT:.b8 41 // Abbrev [41] 0x26c0:0x9 DW_TAG_variable +; CHECK-NEXT:.b8 105 // DW_AT_name +; CHECK-NEXT:.b8 0 +; CHECK-NEXT:.b8 12 // DW_AT_decl_file +; CHECK-NEXT:.b8 6 // DW_AT_decl_line +; CHECK-NEXT:.b32 2332 // DW_AT_type +; CHECK-NEXT:.b8 42 // Abbrev [42] 0x26c9:0x18 DW_TAG_inlined_subroutine +; CHECK-NEXT:.b32 8432 // DW_AT_abstract_origin +; CHECK-NEXT:.b64 Ltmp0 // DW_AT_low_pc +; CHECK-NEXT:.b64 Ltmp1 // DW_AT_high_pc +; CHECK-NEXT:.b8 12 // DW_AT_call_file +; CHECK-NEXT:.b8 6 // DW_AT_call_line +; CHECK-NEXT:.b8 11 // DW_AT_call_column +; CHECK-NEXT:.b8 42 // Abbrev [42] 0x26e1:0x18 DW_TAG_inlined_subroutine +; CHECK-NEXT:.b32 9191 // DW_AT_abstract_origin +; CHECK-NEXT:.b64 Ltmp1 // DW_AT_low_pc +; CHECK-NEXT:.b64 Ltmp2 // DW_AT_high_pc +; CHECK-NEXT:.b8 12 // DW_AT_call_file +; CHECK-NEXT:.b8 6 // DW_AT_call_line +; CHECK-NEXT:.b8 24 // DW_AT_call_column +; CHECK-NEXT:.b8 42 // Abbrev [42] 0x26f9:0x18 DW_TAG_inlined_subroutine +; CHECK-NEXT:.b32 9785 // DW_AT_abstract_origin +; CHECK-NEXT:.b64 Ltmp2 // DW_AT_low_pc +; CHECK-NEXT:.b64 Ltmp3 // DW_AT_high_pc +; CHECK-NEXT:.b8 12 // DW_AT_call_file +; CHECK-NEXT:.b8 6 // DW_AT_call_line +; CHECK-NEXT:.b8 37 // DW_AT_call_column +; CHECK-NEXT:.b8 43 // Abbrev [43] 0x2711:0x1e DW_TAG_inlined_subroutine +; CHECK-NEXT:.b32 9791 // DW_AT_abstract_origin +; CHECK-NEXT:.b64 Ltmp9 // DW_AT_low_pc +; CHECK-NEXT:.b64 Ltmp10 // DW_AT_high_pc +; CHECK-NEXT:.b8 12 // DW_AT_call_file +; CHECK-NEXT:.b8 8 // DW_AT_call_line +; CHECK-NEXT:.b8 5 // DW_AT_call_column +; CHECK-NEXT:.b8 44 // Abbrev [44] 0x2729:0x5 DW_TAG_formal_parameter +; CHECK-NEXT:.b32 9820 // DW_AT_abstract_origin +; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT:.b8 0 // End Of Children Mark ; CHECK-NEXT: } diff --git a/llvm/test/DebugInfo/NVPTX/debug-loc-offset.ll b/llvm/test/DebugInfo/NVPTX/debug-loc-offset.ll index a6bfd8533a27..43cc586fff19 100644 --- a/llvm/test/DebugInfo/NVPTX/debug-loc-offset.ll +++ b/llvm/test/DebugInfo/NVPTX/debug-loc-offset.ll @@ -153,6 +153,15 @@ attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp ; CHECK-NEXT: .b8 0 // EOM(1) ; CHECK-NEXT: .b8 0 // EOM(2) ; CHECK-NEXT: .b8 2 // Abbreviation Code +; CHECK-NEXT: .b8 19 // DW_TAG_structure_type +; CHECK-NEXT: .b8 0 // DW_CHILDREN_no +; CHECK-NEXT: .b8 3 // DW_AT_name +; CHECK-NEXT: .b8 8 // DW_FORM_string +; CHECK-NEXT: .b8 60 // DW_AT_declaration +; CHECK-NEXT: .b8 12 // DW_FORM_flag +; CHECK-NEXT: .b8 0 // EOM(1) +; CHECK-NEXT: .b8 0 // EOM(2) +; CHECK-NEXT: .b8 3 // Abbreviation Code ; CHECK-NEXT: .b8 46 // DW_TAG_subprogram ; CHECK-NEXT: .b8 1 // DW_CHILDREN_yes ; CHECK-NEXT: .b8 17 // DW_AT_low_pc @@ -170,13 +179,11 @@ attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp ; CHECK-NEXT: .b8 11 // DW_FORM_data1 ; CHECK-NEXT: .b8 59 // DW_AT_decl_line ; CHECK-NEXT: .b8 11 // DW_FORM_data1 -; CHECK-NEXT: .b8 73 // DW_AT_type -; CHECK-NEXT: .b8 19 // DW_FORM_ref4 ; CHECK-NEXT: .b8 63 // DW_AT_external ; CHECK-NEXT: .b8 12 // DW_FORM_flag ; CHECK-NEXT: .b8 0 // EOM(1) ; CHECK-NEXT: .b8 0 // EOM(2) -; CHECK-NEXT: .b8 3 // Abbreviation Code +; CHECK-NEXT: .b8 4 // Abbreviation Code ; CHECK-NEXT: .b8 5 // DW_TAG_formal_parameter ; CHECK-NEXT: .b8 0 // DW_CHILDREN_no ; CHECK-NEXT: .b8 3 // DW_AT_name @@ -189,18 +196,20 @@ attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp ; CHECK-NEXT: .b8 19 // DW_FORM_ref4 ; CHECK-NEXT: .b8 0 // EOM(1) ; CHECK-NEXT: .b8 0 // EOM(2) -; CHECK-NEXT: .b8 4 // Abbreviation Code -; CHECK-NEXT: .b8 36 // DW_TAG_base_type +; CHECK-NEXT: .b8 5 // Abbreviation Code +; CHECK-NEXT: .b8 52 // DW_TAG_variable ; CHECK-NEXT: .b8 0 // DW_CHILDREN_no ; CHECK-NEXT: .b8 3 // DW_AT_name ; CHECK-NEXT: .b8 8 // DW_FORM_string -; CHECK-NEXT: .b8 62 // DW_AT_encoding +; CHECK-NEXT: .b8 58 // DW_AT_decl_file ; CHECK-NEXT: .b8 11 // DW_FORM_data1 -; CHECK-NEXT: .b8 11 // DW_AT_byte_size +; CHECK-NEXT: .b8 59 // DW_AT_decl_line ; CHECK-NEXT: .b8 11 // DW_FORM_data1 +; CHECK-NEXT: .b8 73 // DW_AT_type +; CHECK-NEXT: .b8 16 // DW_FORM_ref_addr ; CHECK-NEXT: .b8 0 // EOM(1) ; CHECK-NEXT: .b8 0 // EOM(2) -; CHECK-NEXT: .b8 5 // Abbreviation Code +; CHECK-NEXT: .b8 6 // Abbreviation Code ; CHECK-NEXT: .b8 46 // DW_TAG_subprogram ; CHECK-NEXT: .b8 1 // DW_CHILDREN_yes ; CHECK-NEXT: .b8 17 // DW_AT_low_pc @@ -218,42 +227,33 @@ attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp ; CHECK-NEXT: .b8 11 // DW_FORM_data1 ; CHECK-NEXT: .b8 59 // DW_AT_decl_line ; CHECK-NEXT: .b8 11 // DW_FORM_data1 +; CHECK-NEXT: .b8 73 // DW_AT_type +; CHECK-NEXT: .b8 19 // DW_FORM_ref4 ; CHECK-NEXT: .b8 63 // DW_AT_external ; CHECK-NEXT: .b8 12 // DW_FORM_flag ; CHECK-NEXT: .b8 0 // EOM(1) ; CHECK-NEXT: .b8 0 // EOM(2) -; CHECK-NEXT: .b8 6 // Abbreviation Code -; CHECK-NEXT: .b8 52 // DW_TAG_variable +; CHECK-NEXT: .b8 7 // Abbreviation Code +; CHECK-NEXT: .b8 36 // DW_TAG_base_type ; CHECK-NEXT: .b8 0 // DW_CHILDREN_no ; CHECK-NEXT: .b8 3 // DW_AT_name ; CHECK-NEXT: .b8 8 // DW_FORM_string -; CHECK-NEXT: .b8 58 // DW_AT_decl_file +; CHECK-NEXT: .b8 62 // DW_AT_encoding ; CHECK-NEXT: .b8 11 // DW_FORM_data1 -; CHECK-NEXT: .b8 59 // DW_AT_decl_line +; CHECK-NEXT: .b8 11 // DW_AT_byte_size ; CHECK-NEXT: .b8 11 // DW_FORM_data1 -; CHECK-NEXT: .b8 73 // DW_AT_type -; CHECK-NEXT: .b8 16 // DW_FORM_ref_addr -; CHECK-NEXT: .b8 0 // EOM(1) -; CHECK-NEXT: .b8 0 // EOM(2) -; CHECK-NEXT: .b8 7 // Abbreviation Code -; CHECK-NEXT: .b8 19 // DW_TAG_structure_type -; CHECK-NEXT: .b8 0 // DW_CHILDREN_no -; CHECK-NEXT: .b8 3 // DW_AT_name -; CHECK-NEXT: .b8 8 // DW_FORM_string -; CHECK-NEXT: .b8 60 // DW_AT_declaration -; CHECK-NEXT: .b8 12 // DW_FORM_flag ; CHECK-NEXT: .b8 0 // EOM(1) ; CHECK-NEXT: .b8 0 // EOM(2) ; CHECK-NEXT: .b8 0 // EOM(3) ; CHECK-NEXT: } ; CHECK-NEXT: .section .debug_info ; CHECK-NEXT: { -; CHECK-NEXT: .b32 152 // Length of Unit +; CHECK-NEXT: .b32 159 // Length of Unit ; CHECK-NEXT: .b8 2 // DWARF version number ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b32 .debug_abbrev // Offset Into Abbrev. Section ; CHECK-NEXT: .b8 8 // Address Size (in bytes) -; CHECK-NEXT: .b8 1 // Abbrev [1] 0xb:0x91 DW_TAG_compile_unit +; CHECK-NEXT: .b8 1 // Abbrev [1] 0xb:0x98 DW_TAG_compile_unit ; CHECK-NEXT: .b8 99 // DW_AT_producer ; CHECK-NEXT: .b8 108 ; CHECK-NEXT: .b8 97 @@ -301,7 +301,7 @@ attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp ; CHECK-NEXT: .b8 115 ; CHECK-NEXT: .b8 101 ; CHECK-NEXT: .b8 116 -; CHECK-NEXT: .b8 49 +; CHECK-NEXT: .b8 50 ; CHECK-NEXT: .b8 46 ; CHECK-NEXT: .b8 99 ; CHECK-NEXT: .b8 99 @@ -323,11 +323,15 @@ attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp ; CHECK-NEXT: .b8 99 ; CHECK-NEXT: .b8 99 ; CHECK-NEXT: .b8 0 -; CHECK-NEXT: .b64 Lfunc_begin0 // DW_AT_low_pc -; CHECK-NEXT: .b64 Lfunc_end0 // DW_AT_high_pc -; CHECK-NEXT: .b8 2 // Abbrev [2] 0x64:0x30 DW_TAG_subprogram -; CHECK-NEXT: .b64 Lfunc_begin0 // DW_AT_low_pc -; CHECK-NEXT: .b64 Lfunc_end0 // DW_AT_high_pc +; CHECK-NEXT: .b64 Lfunc_begin1 // DW_AT_low_pc +; CHECK-NEXT: .b64 Lfunc_end1 // DW_AT_high_pc +; CHECK-NEXT: .b8 2 // Abbrev [2] 0x64:0x4 DW_TAG_structure_type +; CHECK-NEXT: .b8 65 // DW_AT_name +; CHECK-NEXT: .b8 0 +; CHECK-NEXT: .b8 1 // DW_AT_declaration +; CHECK-NEXT: .b8 3 // Abbrev [3] 0x68:0x3a DW_TAG_subprogram +; CHECK-NEXT: .b64 Lfunc_begin1 // DW_AT_low_pc +; CHECK-NEXT: .b64 Lfunc_end1 // DW_AT_high_pc ; CHECK-NEXT: .b8 1 // DW_AT_frame_base ; CHECK-NEXT: .b8 156 ; CHECK-NEXT: .b8 95 // DW_AT_MIPS_linkage_name @@ -335,38 +339,37 @@ attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp ; CHECK-NEXT: .b8 51 ; CHECK-NEXT: .b8 98 ; CHECK-NEXT: .b8 97 -; CHECK-NEXT: .b8 114 -; CHECK-NEXT: .b8 105 +; CHECK-NEXT: .b8 122 +; CHECK-NEXT: .b8 49 +; CHECK-NEXT: .b8 65 ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b8 98 // DW_AT_name ; CHECK-NEXT: .b8 97 -; CHECK-NEXT: .b8 114 +; CHECK-NEXT: .b8 122 ; CHECK-NEXT: .b8 0 -; CHECK-NEXT: .b8 1 // DW_AT_decl_file -; CHECK-NEXT: .b8 1 // DW_AT_decl_line -; CHECK-NEXT: .b32 148 // DW_AT_type +; CHECK-NEXT: .b8 2 // DW_AT_decl_file +; CHECK-NEXT: .b8 6 // DW_AT_decl_line ; CHECK-NEXT: .b8 1 // DW_AT_external -; CHECK-NEXT: .b8 3 // Abbrev [3] 0x8a:0x9 DW_TAG_formal_parameter -; CHECK-NEXT: .b8 98 // DW_AT_name +; CHECK-NEXT: .b8 4 // Abbrev [4] 0x8b:0x9 DW_TAG_formal_parameter +; CHECK-NEXT: .b8 97 // DW_AT_name ; CHECK-NEXT: .b8 0 -; CHECK-NEXT: .b8 1 // DW_AT_decl_file -; CHECK-NEXT: .b8 1 // DW_AT_decl_line -; CHECK-NEXT: .b32 148 // DW_AT_type -; CHECK-NEXT: .b8 0 // End Of Children Mark -; CHECK-NEXT: .b8 4 // Abbrev [4] 0x94:0x7 DW_TAG_base_type -; CHECK-NEXT: .b8 105 // DW_AT_name -; CHECK-NEXT: .b8 110 -; CHECK-NEXT: .b8 116 +; CHECK-NEXT: .b8 2 // DW_AT_decl_file +; CHECK-NEXT: .b8 6 // DW_AT_decl_line +; CHECK-NEXT: .b32 100 // DW_AT_type +; CHECK-NEXT: .b8 5 // Abbrev [5] 0x94:0xd DW_TAG_variable +; CHECK-NEXT: .b8 122 // DW_AT_name ; CHECK-NEXT: .b8 0 -; CHECK-NEXT: .b8 5 // DW_AT_encoding -; CHECK-NEXT: .b8 4 // DW_AT_byte_size +; CHECK-NEXT: .b8 2 // DW_AT_decl_file +; CHECK-NEXT: .b8 7 // DW_AT_decl_line +; CHECK-NEXT: .b64 .debug_info+311 // DW_AT_type ; CHECK-NEXT: .b8 0 // End Of Children Mark -; CHECK-NEXT: .b32 159 // Length of Unit +; CHECK-NEXT: .b8 0 // End Of Children Mark +; CHECK-NEXT: .b32 152 // Length of Unit ; CHECK-NEXT: .b8 2 // DWARF version number ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b32 .debug_abbrev // Offset Into Abbrev. Section ; CHECK-NEXT: .b8 8 // Address Size (in bytes) -; CHECK-NEXT: .b8 1 // Abbrev [1] 0xb:0x98 DW_TAG_compile_unit +; CHECK-NEXT: .b8 1 // Abbrev [1] 0xb:0x91 DW_TAG_compile_unit ; CHECK-NEXT: .b8 99 // DW_AT_producer ; CHECK-NEXT: .b8 108 ; CHECK-NEXT: .b8 97 @@ -414,7 +417,7 @@ attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp ; CHECK-NEXT: .b8 115 ; CHECK-NEXT: .b8 101 ; CHECK-NEXT: .b8 116 -; CHECK-NEXT: .b8 50 +; CHECK-NEXT: .b8 49 ; CHECK-NEXT: .b8 46 ; CHECK-NEXT: .b8 99 ; CHECK-NEXT: .b8 99 @@ -436,11 +439,11 @@ attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp ; CHECK-NEXT: .b8 99 ; CHECK-NEXT: .b8 99 ; CHECK-NEXT: .b8 0 -; CHECK-NEXT: .b64 Lfunc_begin1 // DW_AT_low_pc -; CHECK-NEXT: .b64 Lfunc_end1 // DW_AT_high_pc -; CHECK-NEXT: .b8 5 // Abbrev [5] 0x64:0x3a DW_TAG_subprogram -; CHECK-NEXT: .b64 Lfunc_begin1 // DW_AT_low_pc -; CHECK-NEXT: .b64 Lfunc_end1 // DW_AT_high_pc +; CHECK-NEXT: .b64 Lfunc_begin0 // DW_AT_low_pc +; CHECK-NEXT: .b64 Lfunc_end0 // DW_AT_high_pc +; CHECK-NEXT: .b8 6 // Abbrev [6] 0x64:0x30 DW_TAG_subprogram +; CHECK-NEXT: .b64 Lfunc_begin0 // DW_AT_low_pc +; CHECK-NEXT: .b64 Lfunc_end0 // DW_AT_high_pc ; CHECK-NEXT: .b8 1 // DW_AT_frame_base ; CHECK-NEXT: .b8 156 ; CHECK-NEXT: .b8 95 // DW_AT_MIPS_linkage_name @@ -448,34 +451,31 @@ attributes #2 = { "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp ; CHECK-NEXT: .b8 51 ; CHECK-NEXT: .b8 98 ; CHECK-NEXT: .b8 97 -; CHECK-NEXT: .b8 122 -; CHECK-NEXT: .b8 49 -; CHECK-NEXT: .b8 65 +; CHECK-NEXT: .b8 114 +; CHECK-NEXT: .b8 105 ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b8 98 // DW_AT_name ; CHECK-NEXT: .b8 97 -; CHECK-NEXT: .b8 122 +; CHECK-NEXT: .b8 114 ; CHECK-NEXT: .b8 0 -; CHECK-NEXT: .b8 2 // DW_AT_decl_file -; CHECK-NEXT: .b8 6 // DW_AT_decl_line +; CHECK-NEXT: .b8 1 // DW_AT_decl_file +; CHECK-NEXT: .b8 1 // DW_AT_decl_line +; CHECK-NEXT: .b32 148 // DW_AT_type ; CHECK-NEXT: .b8 1 // DW_AT_external -; CHECK-NEXT: .b8 3 // Abbrev [3] 0x87:0x9 DW_TAG_formal_parameter -; CHECK-NEXT: .b8 97 // DW_AT_name -; CHECK-NEXT: .b8 0 -; CHECK-NEXT: .b8 2 // DW_AT_decl_file -; CHECK-NEXT: .b8 6 // DW_AT_decl_line -; CHECK-NEXT: .b32 158 // DW_AT_type -; CHECK-NEXT: .b8 6 // Abbrev [6] 0x90:0xd DW_TAG_variable -; CHECK-NEXT: .b8 122 // DW_AT_name +; CHECK-NEXT: .b8 4 // Abbrev [4] 0x8a:0x9 DW_TAG_formal_parameter +; CHECK-NEXT: .b8 98 // DW_AT_name ; CHECK-NEXT: .b8 0 -; CHECK-NEXT: .b8 2 // DW_AT_decl_file -; CHECK-NEXT: .b8 7 // DW_AT_decl_line -; CHECK-NEXT: .b64 .debug_info+148 // DW_AT_type +; CHECK-NEXT: .b8 1 // DW_AT_decl_file +; CHECK-NEXT: .b8 1 // DW_AT_decl_line +; CHECK-NEXT: .b32 148 // DW_AT_type ; CHECK-NEXT: .b8 0 // End Of Children Mark -; CHECK-NEXT: .b8 7 // Abbrev [7] 0x9e:0x4 DW_TAG_structure_type -; CHECK-NEXT: .b8 65 // DW_AT_name +; CHECK-NEXT: .b8 7 // Abbrev [7] 0x94:0x7 DW_TAG_base_type +; CHECK-NEXT: .b8 105 // DW_AT_name +; CHECK-NEXT: .b8 110 +; CHECK-NEXT: .b8 116 ; CHECK-NEXT: .b8 0 -; CHECK-NEXT: .b8 1 // DW_AT_declaration +; CHECK-NEXT: .b8 5 // DW_AT_encoding +; CHECK-NEXT: .b8 4 // DW_AT_byte_size ; CHECK-NEXT: .b8 0 // End Of Children Mark ; CHECK-NEXT: } ; CHECK-NEXT: .section .debug_loc { } diff --git a/llvm/test/DebugInfo/PowerPC/strict-dwarf.ll b/llvm/test/DebugInfo/PowerPC/strict-dwarf.ll index 483569a12deb..02590955262f 100644 --- a/llvm/test/DebugInfo/PowerPC/strict-dwarf.ll +++ b/llvm/test/DebugInfo/PowerPC/strict-dwarf.ll @@ -15,17 +15,17 @@ ; 3: DwarfUnit::addUInt() ; 4: addUInt(Block, (dwarf::Attribute)0, Form, Integer); -; CHECK: DW_AT_noreturn ; CHECK: DW_AT_name ("var") ; CHECK-NOT: DW_TAG_ ; CHECK: DW_AT_alignment ; CHECK: DW_AT_location (DW_OP_addr 0x0) +; CHECK: DW_AT_noreturn ; -; STRICT-NOT: DW_AT_noreturn ; STRICT: DW_AT_name ("var") ; STRICT-NOT: DW_AT_alignment ; STRICT-NOT: DW_TAG_ ; STRICT: DW_AT_location (DW_OP_addr 0x0) +; STRICT-NOT: DW_AT_noreturn @_ZL3var = internal global i32 0, align 16, !dbg !0 diff --git a/llvm/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll b/llvm/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll index 9c33051ea75e..14793460e14c 100644 --- a/llvm/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll +++ b/llvm/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll @@ -46,17 +46,17 @@ attributes #1 = { nounwind readnone } ; CHECK: DW_TAG_variable ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x{{[0-9a-f]*}}] = "LOC") +; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x{{[0-9a-f]*}}] = "GLB") ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_decl_file [DW_FORM_data1] ("/work/llvm/vanilla/test/DebugInfo{{[/\\]}}test.c") +; CHECK: DW_AT_decl_file [DW_FORM_data1] ("/work/llvm/vanilla/test/DebugInfo{{[/\\]}}test.c") ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_decl_line [DW_FORM_data1] (4) +; CHECK: DW_AT_decl_line [DW_FORM_data1] (1) ; CHECK: DW_TAG_variable ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x{{[0-9a-f]*}}] = "GLB") +; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x{{[0-9a-f]*}}] = "LOC") ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_decl_file [DW_FORM_data1] ("/work/llvm/vanilla/test/DebugInfo{{[/\\]}}test.c") +; CHECK: DW_AT_decl_file [DW_FORM_data1] ("/work/llvm/vanilla/test/DebugInfo{{[/\\]}}test.c") ; CHECK-NOT: DW_TAG -; CHECK: DW_AT_decl_line [DW_FORM_data1] (1) +; CHECK: DW_AT_decl_line [DW_FORM_data1] (4) diff --git a/llvm/test/DebugInfo/X86/DW_AT_calling-convention.ll b/llvm/test/DebugInfo/X86/DW_AT_calling-convention.ll index 82af77a1ef99..495f0f646628 100644 --- a/llvm/test/DebugInfo/X86/DW_AT_calling-convention.ll +++ b/llvm/test/DebugInfo/X86/DW_AT_calling-convention.ll @@ -21,6 +21,10 @@ ; CHECK: .debug_info contents: +; CHECK: DW_TAG_subroutine_type [[subroutine_abbrev]] * +; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] {{.*}} +; CHECK-NEXT: DW_AT_calling_convention [DW_FORM_data1] (DW_CC_BORLAND_msfastcall) + ; CHECK: DW_TAG_subprogram [{{.*}}] * ; CHECK: DW_AT_low_pc ; CHECK: DW_AT_high_pc @@ -33,10 +37,6 @@ ; CHECK: DW_AT_type ; CHECK: DW_AT_external -; CHECK: DW_TAG_subroutine_type [[subroutine_abbrev]] * -; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] {{.*}} -; CHECK-NEXT: DW_AT_calling_convention [DW_FORM_data1] (DW_CC_BORLAND_msfastcall) - ; ModuleID = 't.cpp' source_filename = "t.cpp" target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" diff --git a/llvm/test/DebugInfo/X86/align_cpp11.ll b/llvm/test/DebugInfo/X86/align_cpp11.ll index 537b7b4ff21e..14cfda7c06da 100644 --- a/llvm/test/DebugInfo/X86/align_cpp11.ll +++ b/llvm/test/DebugInfo/X86/align_cpp11.ll @@ -33,21 +33,6 @@ ; auto Lambda = [i](){}; ; } -; CHECK: DW_TAG_subprogram -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name{{.*}}"i" -; CHECK: DW_AT_alignment{{.*}}32 -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_class_type -; CHECK: DW_TAG_member -; CHECK: DW_AT_name{{.*}}"i" -; CHECK: DW_AT_alignment{{.*}}32 -; CHECK: DW_TAG_subprogram -; CHECK: DW_TAG_formal_parameter - ; CHECK: DW_TAG_class_type ; CHECK: DW_AT_name{{.*}}"C0" ; CHECK: DW_AT_alignment{{.*}}64 @@ -68,6 +53,21 @@ ; CHECK: DW_TAG_enumerator ; CHECK: DW_TAG_enumerator +; CHECK: DW_TAG_subprogram +; CHECK: DW_TAG_variable +; CHECK: DW_TAG_variable +; CHECK: DW_TAG_variable +; CHECK: DW_TAG_variable +; CHECK: DW_AT_name{{.*}}"i" +; CHECK: DW_AT_alignment{{.*}}32 +; CHECK: DW_TAG_variable +; CHECK: DW_TAG_class_type +; CHECK: DW_TAG_member +; CHECK: DW_AT_name{{.*}}"i" +; CHECK: DW_AT_alignment{{.*}}32 +; CHECK: DW_TAG_subprogram +; CHECK: DW_TAG_formal_parameter + ; CHECK: DW_TAG_class_type ; CHECK: DW_AT_name{{.*}}"C1" ; CHECK: DW_TAG_member diff --git a/llvm/test/DebugInfo/X86/align_objc.ll b/llvm/test/DebugInfo/X86/align_objc.ll index 375567096fc0..bc0abbdaa103 100644 --- a/llvm/test/DebugInfo/X86/align_objc.ll +++ b/llvm/test/DebugInfo/X86/align_objc.ll @@ -16,14 +16,7 @@ ; } ; CHECK: DW_TAG_compile_unit - -; CHECK: DW_TAG_subprogram -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_variable -; CHECK: DW_AT_name{{.*}}"i" -; CHECK: DW_AT_alignment{{.*}}32 ; CHECK: DW_TAG_variable - ; CHECK: DW_TAG_typedef ; CHECK: DW_AT_name{{.*}}"S0" @@ -32,6 +25,12 @@ ; CHECK: DW_TAG_member ; CHECK: DW_TAG_base_type +; CHECK: DW_TAG_subprogram +; CHECK: DW_TAG_variable +; CHECK: DW_TAG_variable +; CHECK: DW_AT_name{{.*}}"i" +; CHECK: DW_AT_alignment{{.*}}32 + ; CHECK: DW_TAG_typedef ; CHECK: DW_AT_name{{.*}}"S1" ; CHECK: DW_TAG_structure_type diff --git a/llvm/test/DebugInfo/X86/arange-and-stub.ll b/llvm/test/DebugInfo/X86/arange-and-stub.ll index 9264fab1a8a4..db1837e02141 100644 --- a/llvm/test/DebugInfo/X86/arange-and-stub.ll +++ b/llvm/test/DebugInfo/X86/arange-and-stub.ll @@ -5,7 +5,7 @@ ; CHECK: .L_ZTId.DW.stub: ; CHECK: .data -; CHECK-NEXT: .Lsec_end1: +; CHECK-NEXT: .Lsec_end0: source_filename = "test/DebugInfo/X86/arange-and-stub.ll" target triple = "x86_64-linux-gnu" diff --git a/llvm/test/DebugInfo/X86/containing-type-extension-rust.ll b/llvm/test/DebugInfo/X86/containing-type-extension-rust.ll index 94cde895f35a..3e6c6d269082 100644 --- a/llvm/test/DebugInfo/X86/containing-type-extension-rust.ll +++ b/llvm/test/DebugInfo/X86/containing-type-extension-rust.ll @@ -2,7 +2,6 @@ ; RUN: llvm-dwarfdump -v -debug-info %t | FileCheck %s ; Check that any type can have a vtable holder. -; CHECK: DW_TAG_structure_type ; CHECK: [[SP:.*]]: DW_TAG_structure_type ; CHECK-NOT: TAG ; CHECK: DW_AT_containing_type [DW_FORM_ref4] diff --git a/llvm/test/DebugInfo/X86/debug-info-access.ll b/llvm/test/DebugInfo/X86/debug-info-access.ll index fdc96dc0f421..2f21dfb3fe0e 100644 --- a/llvm/test/DebugInfo/X86/debug-info-access.ll +++ b/llvm/test/DebugInfo/X86/debug-info-access.ll @@ -64,10 +64,6 @@ ; F f; ; H h; -; CHECK: DW_TAG_subprogram -; CHECK: DW_AT_name {{.*}}"free") -; CHECK-NOT: DW_AT_accessibility - ; CHECK: DW_TAG_member ; CHECK: DW_AT_name {{.*}}"union_priv") ; CHECK-NOT: DW_TAG @@ -128,6 +124,10 @@ ; CHECK: DW_AT_name ("I") ; CHECK: DW_AT_accessibility (DW_ACCESS_private) +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_name {{.*}}"free") +; CHECK-NOT: DW_AT_accessibility + %union.U = type { i32 } %struct.A = type { i8 } %class.B = type { i8 } diff --git a/llvm/test/DebugInfo/X86/debug-info-blocks.ll b/llvm/test/DebugInfo/X86/debug-info-blocks.ll index 0173f856f0de..0914fa68620c 100644 --- a/llvm/test/DebugInfo/X86/debug-info-blocks.ll +++ b/llvm/test/DebugInfo/X86/debug-info-blocks.ll @@ -5,6 +5,10 @@ ; rdar://problem/9279956 ; test that the DW_AT_location of self is at ( fbreg +{{[0-9]+}}, deref, +{{[0-9]+}} ) +; CHECK: [[A:.*]]: DW_TAG_structure_type +; CHECK-NEXT: DW_AT_APPLE_objc_complete_type +; CHECK-NEXT: DW_AT_name{{.*}}"A" + ; CHECK: DW_TAG_subprogram ; CHECK: DW_TAG_subprogram ; CHECK: DW_TAG_subprogram @@ -31,10 +35,6 @@ ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_artificial -; CHECK: [[A:.*]]: DW_TAG_structure_type -; CHECK-NEXT: DW_AT_APPLE_objc_complete_type -; CHECK-NEXT: DW_AT_name{{.*}}"A" - ; CHECK: [[APTR]]: DW_TAG_pointer_type ; CHECK-NEXT: {[[A]]} diff --git a/llvm/test/DebugInfo/X86/debug-info-static-member.ll b/llvm/test/DebugInfo/X86/debug-info-static-member.ll index 915e48b3c03a..5e2f46686bd6 100644 --- a/llvm/test/DebugInfo/X86/debug-info-static-member.ll +++ b/llvm/test/DebugInfo/X86/debug-info-static-member.ll @@ -106,8 +106,6 @@ attributes #1 = { nounwind readnone } ; (for variables) or DW_AT_const_value (for constants). ; ; PRESENT: .debug_info contents: -; PRESENT: DW_TAG_subprogram -; PRESENT: DW_TAG_variable ; PRESENT: DW_TAG_variable ; PRESENT-NEXT: DW_AT_specification {{.*}} "a" ; PRESENT-NEXT: DW_AT_location @@ -158,8 +156,6 @@ attributes #1 = { nounwind readnone } ; For Darwin gdb: ; DARWINP: .debug_info contents: -; DARWINP: DW_TAG_subprogram -; DARWINP: DW_TAG_variable ; DARWINP: DW_TAG_variable ; DARWINP-NEXT: DW_AT_specification {{.*}} "a" ; DARWINP-NEXT: DW_AT_location diff --git a/llvm/test/DebugInfo/X86/debug-loc-offset.mir b/llvm/test/DebugInfo/X86/debug-loc-offset.mir index 72267ebcb417..95f524fc162c 100644 --- a/llvm/test/DebugInfo/X86/debug-loc-offset.mir +++ b/llvm/test/DebugInfo/X86/debug-loc-offset.mir @@ -31,22 +31,6 @@ # # Checking that we have two compile units with two sets of high/lo_pc. # CHECK: .debug_info contents -# -# CHECK: DW_TAG_compile_unit -# CHECK: DW_AT_low_pc {{.*}} (0x00000000 ".text") -# CHECK: DW_AT_high_pc -# -# CHECK: DW_TAG_subprogram -# CHECK-NOT: DW_TAG -# CHECK: DW_AT_linkage_name [DW_FORM_strp]{{.*}}"_Z3bari" -# CHECK-NOT: {{DW_TAG|NULL}} -# CHECK: DW_TAG_formal_parameter -# CHECK-NOT: DW_TAG -# CHECK: DW_AT_location [DW_FORM_sec_offset] ({{.*}} -# CHECK-NEXT: [0x00000000, 0x0000000a) ".text": DW_OP_consts +0, DW_OP_stack_value -# CHECK-NEXT: [0x0000000a, 0x00000017) ".text": DW_OP_consts +1, DW_OP_stack_value) -# CHECK-NEXT: DW_AT_name [DW_FORM_strp]{{.*}}"b" - # CHECK: DW_TAG_compile_unit # CHECK: DW_AT_low_pc {{.*}} (0x00000020 ".text") # CHECK: DW_AT_high_pc @@ -66,6 +50,21 @@ # CHECK: DW_AT_location [DW_FORM_exprloc] # CHECK-NOT: DW_AT_location # +# CHECK: DW_TAG_compile_unit +# CHECK: DW_AT_low_pc {{.*}} (0x00000000 ".text") +# CHECK: DW_AT_high_pc +# +# CHECK: DW_TAG_subprogram +# CHECK-NOT: DW_TAG +# CHECK: DW_AT_linkage_name [DW_FORM_strp]{{.*}}"_Z3bari" +# CHECK-NOT: {{DW_TAG|NULL}} +# CHECK: DW_TAG_formal_parameter +# CHECK-NOT: DW_TAG +# CHECK: DW_AT_location [DW_FORM_sec_offset] ({{.*}} +# CHECK-NEXT: [0x00000000, 0x0000000a) ".text": DW_OP_consts +0, DW_OP_stack_value +# CHECK-NEXT: [0x0000000a, 0x00000017) ".text": DW_OP_consts +1, DW_OP_stack_value) +# CHECK-NEXT: DW_AT_name [DW_FORM_strp]{{.*}}"b" +# # CHECK: .debug_loc contents: # CHECK: 0x00000000: # CHECK-NEXT: (0x00000000, 0x0000000a): DW_OP_consts +0, DW_OP_stack_value diff --git a/llvm/test/DebugInfo/X86/dwarf-aranges.ll b/llvm/test/DebugInfo/X86/dwarf-aranges.ll index b65eba451f4d..5358a30c6a06 100644 --- a/llvm/test/DebugInfo/X86/dwarf-aranges.ll +++ b/llvm/test/DebugInfo/X86/dwarf-aranges.ll @@ -8,22 +8,22 @@ ; -- alignment -- ; CHECK-NEXT: .zero 4,255 -; - it should have made one span covering all functions in this CU. -; CHECK-NEXT: .quad .Lfunc_begin0 -; CHECK-NEXT: .quad .Lsec_end0-.Lfunc_begin0 - ; - it should have made one span covering all vars in this CU. ; CHECK-NEXT: .quad some_data -; CHECK-NEXT: .quad .Lsec_end1-some_data +; CHECK-NEXT: .quad .Lsec_end0-some_data ; - it should have made one span covering all vars in this CU. ; CHECK-NEXT: .quad some_other -; CHECK-NEXT: .quad .Lsec_end2-some_other +; CHECK-NEXT: .quad .Lsec_end1-some_other ; - it should have made one span for each symbol. ; CHECK-NEXT: .quad some_bss ; CHECK-NEXT: .quad 4 +; - it should have made one span covering all functions in this CU. +; CHECK-NEXT: .quad .Lfunc_begin0 +; CHECK-NEXT: .quad .Lsec_end2-.Lfunc_begin0 + ; -- finish -- ; CHECK-NEXT: # ARange terminator diff --git a/llvm/test/DebugInfo/X86/dwarf-linkage-names.ll b/llvm/test/DebugInfo/X86/dwarf-linkage-names.ll index 75faee0ac2b5..2af2b86590a2 100644 --- a/llvm/test/DebugInfo/X86/dwarf-linkage-names.ll +++ b/llvm/test/DebugInfo/X86/dwarf-linkage-names.ll @@ -17,10 +17,10 @@ ; This assumes the variable will appear before the function. ; LINKAGE1: .section .debug_info -; LINKAGE1: DW_TAG_subprogram +; LINKAGE1: DW_TAG_variable ; LINKAGE1-NOT: DW_TAG ; LINKAGE1: {{DW_AT_(MIPS_)?linkage_name}} -; LINKAGE1: DW_TAG_variable +; LINKAGE1: DW_TAG_subprogram ; LINKAGE1-NOT: DW_TAG ; LINKAGE1: {{DW_AT_(MIPS_)?linkage_name}} ; LINKAGE1: .section diff --git a/llvm/test/DebugInfo/X86/dwarfdump-DIImportedEntity_elements.ll b/llvm/test/DebugInfo/X86/dwarfdump-DIImportedEntity_elements.ll index 1a98fbe267f5..2d4937ee7f9f 100644 --- a/llvm/test/DebugInfo/X86/dwarfdump-DIImportedEntity_elements.ll +++ b/llvm/test/DebugInfo/X86/dwarfdump-DIImportedEntity_elements.ll @@ -3,21 +3,21 @@ ; RUN: llc %s -filetype=obj -o - | llvm-dwarfdump - | FileCheck %s +; CHECK: [[MYMOD:0x[0-9a-f]+]]: DW_TAG_module +; CHECK: DW_AT_name ("mymod") +; CHECK: [[VAR1:0x[0-9a-f]+]]: DW_TAG_variable +; CHECK: DW_AT_name ("var1") + ; CHECK: DW_TAG_subprogram ; CHECK: DW_AT_name ("main") ; CHECK: DW_TAG_subprogram ; CHECK: DW_AT_name ("use_renamed") ; CHECK: DW_TAG_imported_module -; CHECK: DW_AT_import ([[MYMOD:0x[0-9a-f]+]]) +; CHECK: DW_AT_import ([[MYMOD]]) ; CHECK: DW_TAG_imported_declaration -; CHECK: DW_AT_import ([[VAR1:0x[0-9a-f]+]]) +; CHECK: DW_AT_import ([[VAR1]]) ; CHECK: DW_AT_name ("var4") -; CHECK: [[MYMOD]]: DW_TAG_module -; CHECK: DW_AT_name ("mymod") -; CHECK: [[VAR1]]: DW_TAG_variable -; CHECK: DW_AT_name ("var1") - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;This test case is generated from ;;module mymod diff --git a/llvm/test/DebugInfo/X86/generate-odr-hash.ll b/llvm/test/DebugInfo/X86/generate-odr-hash.ll index f9153f9c9756..9fa954cd2485 100644 --- a/llvm/test/DebugInfo/X86/generate-odr-hash.ll +++ b/llvm/test/DebugInfo/X86/generate-odr-hash.ll @@ -54,7 +54,12 @@ ; FISSION-LABEL: .debug_info.dwo contents: ; CHECK: Compile Unit: length = [[CU_SIZE:[0-9a-f]+]] -; CHECK: DW_TAG_structure_type +; CHECK: [[BAR:^0x........]]: DW_TAG_structure_type +; CHECK-NEXT: DW_AT_declaration +; CHECK-NEXT: DW_AT_signature {{.*}} (0x1d02f3be30cc5688) +; CHECK: [[FLUFFY:^0x........]]: DW_TAG_class_type +; CHECK-NEXT: DW_AT_declaration +; CHECK-NEXT: DW_AT_signature {{.*}} (0xb04af47397402e77) ; Ensure the CU-local type 'walrus' is not placed in a type unit. ; CHECK: [[WALRUS:^0x........]]: DW_TAG_structure_type @@ -63,13 +68,6 @@ ; CHECK-NEXT: DW_AT_decl_file ; CHECK-NEXT: DW_AT_decl_line -; CHECK: [[BAR:^0x........]]: DW_TAG_structure_type -; CHECK-NEXT: DW_AT_declaration -; CHECK-NEXT: DW_AT_signature {{.*}} (0x1d02f3be30cc5688) -; CHECK: [[FLUFFY:^0x........]]: DW_TAG_class_type -; CHECK-NEXT: DW_AT_declaration -; CHECK-NEXT: DW_AT_signature {{.*}} (0xb04af47397402e77) - ; CHECK: [[WOMBAT:^0x........]]: DW_TAG_structure_type ; CHECK-NEXT: DW_AT_declaration ; CHECK-NEXT: DW_AT_signature {{.*}} (0xfd756cee88f8a118) @@ -123,10 +121,10 @@ ; CHECK-LABEL: .debug_line contents: ; CHECK: Line table prologue ; CHECK-NOT: file_names[ -; CHECK: file_names[ -; CHECK-NEXT: name: "bar.cpp" ; SINGLE: file_names[ ; SINGLE-NEXT: name: "bar.h" +; CHECK: file_names[ +; CHECK-NEXT: name: "bar.cpp" ; CHECK-NOT: file_names[ ; FISSION: .debug_line.dwo contents: diff --git a/llvm/test/DebugInfo/X86/gnu-public-names.ll b/llvm/test/DebugInfo/X86/gnu-public-names.ll index de44b4341868..d19358ee6e58 100644 --- a/llvm/test/DebugInfo/X86/gnu-public-names.ll +++ b/llvm/test/DebugInfo/X86/gnu-public-names.ll @@ -77,6 +77,9 @@ ; CHECK: DW_AT_GNU_pubnames (true) ; CHECK-NOT: DW_AT_GNU_pubtypes [ +; CHECK: [[STATIC_MEM_VAR:0x[0-9a-f]+]]: DW_TAG_variable +; CHECK: DW_AT_specification {{.*}} "static_member_variable" + ; CHECK: [[C:0x[0-9a-f]+]]: DW_TAG_structure_type ; CHECK: DW_AT_name ("C") ; CHECK: DW_TAG_member @@ -95,23 +98,11 @@ ; CHECK: DW_AT_name ("int") ; CHECK: DW_TAG_pointer_type -; CHECK: [[MEM_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram -; CHECK: DW_AT_specification {{.*}} "_ZN1C15member_functionEv" -; CHECK: DW_TAG_formal_parameter -; CHECK: NULL - -; CHECK: [[STATIC_MEM_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram -; CHECK: DW_AT_specification {{.*}} "_ZN1C22static_member_functionEv" - -; CHECK: [[GLOBAL_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram -; CHECK: DW_AT_linkage_name -; CHECK: DW_AT_name ("global_function") +; CHECK: [[GLOB_VAR:0x[0-9a-f]+]]: DW_TAG_variable +; CHECK: DW_AT_name ("global_variable") ; CHECK: [[NS:0x[0-9a-f]+]]: DW_TAG_namespace ; CHECK: DW_AT_name ("ns") -; CHECK: [[GLOB_NS_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram -; CHECK: DW_AT_linkage_name -; CHECK: DW_AT_name ("global_namespace_function") ; CHECK: [[GLOB_NS_VAR:0x[0-9a-f]+]]: DW_TAG_variable ; CHECK: DW_AT_name ("global_namespace_variable") ; CHECK-NOT: DW_AT_specification @@ -127,6 +118,9 @@ ; CHECK: DW_TAG_member ; CHECK: NULL ; CHECK: DW_TAG_variable +; CHECK: [[GLOB_NS_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram +; CHECK: DW_AT_linkage_name +; CHECK: DW_AT_name ("global_namespace_function") ; CHECK: NULL ; CHECK: DW_TAG_subprogram @@ -136,13 +130,6 @@ ; CHECK: DW_AT_location ; CHECK: NULL -; CHECK: DW_TAG_subprogram -; CHECK: [[STATIC_MEM_VAR:0x[0-9a-f]+]]: DW_TAG_variable -; CHECK: DW_AT_specification {{.*}} "static_member_variable" - -; CHECK: [[GLOB_VAR:0x[0-9a-f]+]]: DW_TAG_variable -; CHECK: DW_AT_name ("global_variable") - ; CHECK: [[ANON:.*]]: DW_TAG_namespace ; CHECK-NOT: DW_AT_name ; CHECK: [[ANON_I:.*]]: DW_TAG_variable @@ -184,6 +171,20 @@ ; CHECK: NULL ; CHECK: DW_TAG_imported_declaration + +; CHECK: [[MEM_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram +; CHECK: DW_AT_specification {{.*}} "_ZN1C15member_functionEv" +; CHECK: DW_TAG_formal_parameter +; CHECK: NULL + +; CHECK: [[STATIC_MEM_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram +; CHECK: DW_AT_specification {{.*}} "_ZN1C22static_member_functionEv" + +; CHECK: [[GLOBAL_FUNC:0x[0-9a-f]+]]: DW_TAG_subprogram +; CHECK: DW_AT_linkage_name +; CHECK: DW_AT_name ("global_function") + +; CHECK: DW_TAG_subprogram ; CHECK: DW_TAG_pointer_type ; CHECK: DW_TAG_pointer_type ; CHECK: NULL diff --git a/llvm/test/DebugInfo/X86/lexical-block-file-inline.ll b/llvm/test/DebugInfo/X86/lexical-block-file-inline.ll index 38a096c7bfc9..13a8c0acb277 100644 --- a/llvm/test/DebugInfo/X86/lexical-block-file-inline.ll +++ b/llvm/test/DebugInfo/X86/lexical-block-file-inline.ll @@ -32,6 +32,8 @@ ; CHECK: DW_TAG_lexical_block ; CHECK-NOT: {{DW_TAG|NULL}} ; CHECK: DW_TAG_variable +; CHECK-NOT: {{DW_TAG|NULL}} +; CHECK: DW_TAG_imported_module ;; Abstract "bar" function ; CHECK: [[Offset_bar]]: DW_TAG_subprogram @@ -58,6 +60,9 @@ ; CHECK: DW_TAG_lexical_block ; CHECK-NOT: {{DW_TAG|NULL}} ; CHECK: DW_TAG_variable +; CHECK-NOT: {{DW_TAG|NULL}} +; CHECK: DW_TAG_imported_module + ; Function Attrs: alwaysinline nounwind define i32 @_Z3barv() #0 !dbg !4 { diff --git a/llvm/test/DebugInfo/X86/linkage-name.ll b/llvm/test/DebugInfo/X86/linkage-name.ll index e43cccfb5fcf..f5c82b1759a3 100644 --- a/llvm/test/DebugInfo/X86/linkage-name.ll +++ b/llvm/test/DebugInfo/X86/linkage-name.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=x86_64-macosx %s -o %t -filetype=obj ; RUN: llvm-dwarfdump -v -debug-info %t | FileCheck %s -; CHECK: DW_TAG_subprogram [8] * +; CHECK: DW_TAG_subprogram [9] * ; CHECK-NOT: DW_AT_{{(MIPS_)?}}linkage_name ; CHECK: DW_AT_specification diff --git a/llvm/test/DebugInfo/X86/namelist1.ll b/llvm/test/DebugInfo/X86/namelist1.ll index 149d6cbbe318..57316a806b71 100644 --- a/llvm/test/DebugInfo/X86/namelist1.ll +++ b/llvm/test/DebugInfo/X86/namelist1.ll @@ -4,11 +4,10 @@ ; RUN: llc -O0 -mtriple=x86_64-unknown-linux-gnu %s -filetype=obj -o %t.o ; RUN: llvm-dwarfdump %t.o | FileCheck %s ; -; CHECK: [[ITEM2:0x.+]]: DW_TAG_variable -; CHECK: DW_AT_name ("b") -; CHECK: DW_TAG_variable ; CHECK: [[ITEM1:0x.+]]: DW_TAG_variable ; CHECK: DW_AT_name ("a") +; CHECK: [[ITEM2:0x.+]]: DW_TAG_variable +; CHECK: DW_AT_name ("b") ; CHECK: DW_TAG_namelist ; CHECK: DW_AT_name ("nml") ; CHECK: DW_TAG_namelist_item diff --git a/llvm/test/DebugInfo/X86/sret.ll b/llvm/test/DebugInfo/X86/sret.ll index 673e982b0534..479f2f605421 100644 --- a/llvm/test/DebugInfo/X86/sret.ll +++ b/llvm/test/DebugInfo/X86/sret.ll @@ -3,8 +3,8 @@ ; Based on the debuginfo-tests/sret.cpp code. -; CHECK-DWO: DW_AT_GNU_dwo_id (0x46a03e2a3394ce47) -; CHECK-DWO: DW_AT_GNU_dwo_id (0x46a03e2a3394ce47) +; CHECK-DWO: DW_AT_GNU_dwo_id (0xa58a336e896549f1) +; CHECK-DWO: DW_AT_GNU_dwo_id (0xa58a336e896549f1) ; RUN: llc -O0 -fast-isel=true -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump -debug-info - | FileCheck -check-prefixes=CHECK,FASTISEL %s ; RUN: llc -O0 -fast-isel=false -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump -debug-info - | FileCheck -check-prefixes=CHECK,SDAG %s diff --git a/llvm/test/DebugInfo/X86/subprogram-across-cus.ll b/llvm/test/DebugInfo/X86/subprogram-across-cus.ll index a265b8e2bc30..345342aecf3c 100644 --- a/llvm/test/DebugInfo/X86/subprogram-across-cus.ll +++ b/llvm/test/DebugInfo/X86/subprogram-across-cus.ll @@ -35,12 +35,12 @@ ; Check that there are no verifier failures, and that the SP for "main" appears ; in the correct CU. ; CHECK-LABEL: DW_TAG_compile_unit +; CHECK: DW_AT_name ("1.cpp") +; CHECK-NOT: DW_AT_name ("main") +; CHECK-LABEL: DW_TAG_compile_unit ; CHECK: DW_AT_name ("2.cpp") ; CHECK: DW_TAG_subprogram ; CHECK: DW_AT_name ("main") -; CHECK-LABEL: DW_TAG_compile_unit -; CHECK: DW_AT_name ("1.cpp") -; CHECK-NOT: DW_AT_name ("main") source_filename = "ld-temp.o" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/DebugInfo/X86/template.ll b/llvm/test/DebugInfo/X86/template.ll index 8a071cc81389..6ac7f2bc0a57 100644 --- a/llvm/test/DebugInfo/X86/template.ll +++ b/llvm/test/DebugInfo/X86/template.ll @@ -15,10 +15,26 @@ ; VERIFY-NOT: error: +; CHECK: [[INT:0x[0-9a-f]*]]:{{ *}}DW_TAG_base_type +; CHECK-NEXT: DW_AT_name{{.*}} = "int" + +; CHECK: DW_TAG_structure_type +; CHECK: DW_AT_name{{.*}}"y_impl" +; CHECK-NOT: {{TAG|NULL}} +; CHECK: DW_TAG_template_type_parameter + +; CHECK: DW_TAG_variable +; CHECK-NEXT: DW_AT_name{{.*}}"var" +; CHECK-NOT: NULL +; CHECK: DW_TAG_template_type_parameter +; CHECK-NEXT: DW_AT_type{{.*}}=> {[[INT]]} +; CHECK-NEXT: DW_AT_name{{.*}}= "T" + + ; CHECK: DW_AT_name{{.*}}"func<3, &glbl, y_impl, nullptr, E, 1, 2>" ; CHECK-NOT: NULL ; CHECK: DW_TAG_template_value_parameter -; CHECK-NEXT: DW_AT_type{{.*}}=> {[[INT:0x[0-9a-f]*]]} +; CHECK-NEXT: DW_AT_type{{.*}}=> {[[INT]]} ; CHECK-NEXT: DW_AT_name{{.*}}= "x" ; CHECK-NEXT: DW_AT_const_value [DW_FORM_sdata]{{.*}}(3) @@ -55,21 +71,6 @@ ; CHECK-NEXT: DW_AT_type{{.*}}=> {[[INT]]} ; CHECK-NEXT: DW_AT_const_value [DW_FORM_sdata]{{.*}}(2) -; CHECK: [[INT]]:{{ *}}DW_TAG_base_type -; CHECK-NEXT: DW_AT_name{{.*}} = "int" - -; CHECK: DW_TAG_structure_type -; CHECK: DW_AT_name{{.*}}"y_impl" -; CHECK-NOT: {{TAG|NULL}} -; CHECK: DW_TAG_template_type_parameter - -; CHECK: DW_TAG_variable -; CHECK-NEXT: DW_AT_name{{.*}}"var" -; CHECK-NOT: NULL -; CHECK: DW_TAG_template_type_parameter -; CHECK-NEXT: DW_AT_type{{.*}}=> {[[INT]]} -; CHECK-NEXT: DW_AT_name{{.*}}= "T" - ; CHECK: [[INTPTR]]:{{ *}}DW_TAG_pointer_type ; CHECK-NEXT: DW_AT_type{{.*}} => {[[INT]]} diff --git a/llvm/test/DebugInfo/X86/tls.ll b/llvm/test/DebugInfo/X86/tls.ll index 33ab5559fcb9..ecb1d93bbd28 100644 --- a/llvm/test/DebugInfo/X86/tls.ll +++ b/llvm/test/DebugInfo/X86/tls.ll @@ -30,45 +30,33 @@ ; that here instead of raw assembly printing ; FISSION: .section .debug_info.dwo, +; 3 bytes of data in this DW_FORM_exprloc representation of the location of 'tls' +; FISSION: .byte 3{{ *}}# DW_AT_location +; DW_OP_GNU_const_index (0xfx == 252) to refer to the debug_addr table +; FISSION-NEXT: .byte 252 +; an index of zero into the debug_addr table +; FISSION-NEXT: .byte 0 + ; SINGLE: .section .debug_info, ; DARWIN: .section {{.*}}debug_info, -; SINGLE-64: DW_TAG_variable ; 10 bytes of data in this DW_FORM_exprloc representation of the location of 'tls' ; SINGLE-64: .byte 10 # DW_AT_location ; DW_OP_const8u (0x0e == 14) of address ; SINGLE-64-NEXT: .byte 14 ; SINGLE-64-NEXT: .quad tls@DTPOFF -; DARWIN: DW_TAG_variable ; DARWIN: .byte 10 ## DW_AT_location ; DW_OP_const8u (0x0e == 14) of address ; DARWIN-NEXT: .byte 14 ; DARWIN-NEXT: .quad _tls -; SINGLE-32: DW_TAG_variable ; 6 bytes of data in 32-bit mode ; SINGLE-32: .byte 6 # DW_AT_location ; DW_OP_const4u (0x0e == 12) of address ; SINGLE-32-NEXT: .byte 12 ; SINGLE-32-NEXT: .long tls@DTPOFF -; FISSION: DW_TAG_template_value_parameter -; FISSION: .byte 3 # DW_AT_location -; DW_OP_GNU_addr_index -; FISSION-NEXT: .byte 251 -; FISSION-NEXT: .byte 2 -; DW_OP_stack_value -; FISSION-NEXT: .byte 159 - -; FISSION: DW_TAG_variable -; 3 bytes of data in this DW_FORM_exprloc representation of the location of 'tls' -; FISSION: .byte 3{{ *}}# DW_AT_location -; DW_OP_GNU_const_index (0xfx == 252) to refer to the debug_addr table -; FISSION-NEXT: .byte 252 -; an index of 1 into the debug_addr table -; FISSION-NEXT: .byte 1 - ; DW_OP_GNU_push_tls_address ; GNUOP-NEXT: .byte 224 ; DW_OP_form_tls_address @@ -78,12 +66,19 @@ ; FISSION: .byte 2 # DW_AT_location ; DW_OP_GNU_addr_index ; FISSION-NEXT: .byte 251 -; FISSION-NEXT: .byte 2 +; FISSION-NEXT: .byte 1 + +; FISSION: DW_TAG_template_value_parameter +; FISSION: .byte 3 # DW_AT_location +; DW_OP_GNU_addr_index +; FISSION-NEXT: .byte 251 +; FISSION-NEXT: .byte 1 +; DW_OP_stack_value +; FISSION-NEXT: .byte 159 ; check that the expected TLS address description is the first thing in the debug_addr section ; FISSION: .section .debug_addr ; FISSION-NEXT: .Laddr_table_base0: -; FISSION-NEXT: .quad .Lfunc_begin0 ; FISSION-NEXT: .quad tls@DTPOFF ; FISSION-NEXT: .quad glbl ; FISSION-NOT: .quad glbl diff --git a/llvm/test/DebugInfo/X86/type-units-maybe-unused-types.ll b/llvm/test/DebugInfo/X86/type-units-maybe-unused-types.ll index 78baf7987157..6bfc1e881de7 100644 --- a/llvm/test/DebugInfo/X86/type-units-maybe-unused-types.ll +++ b/llvm/test/DebugInfo/X86/type-units-maybe-unused-types.ll @@ -52,8 +52,6 @@ ;; The check-not directives check that Unused doesn't get a DIE in the CU. ; CHECK: DW_TAG_compile_unit ; CHECK-NOT: DW_AT_signature -; CHECK: DW_AT_type ([[DIE_Enum:[0-9a-fx]+]] "Ex::Enum") -; CHECK-NOT: DW_AT_signature ; CHECK: DW_AT_type ([[DIE_Outer:[0-9a-fx]+]] "Outer") ; CHECK-NOT: DW_AT_signature @@ -62,10 +60,13 @@ ; CHECK: [[DIE_Outer]]: DW_TAG_class_type ; CHECK-NEXT: DW_AT_declaration (true) ; CHECK-NEXT: DW_AT_signature ([[SIG_Outer]]) -; CHECK-EMPTY: + +; CHECK-NOT: DW_AT_signature +; CHECK: DW_AT_type ([[DIE_Enum:[0-9a-fx]+]] "Ex::Enum") +; CHECK-NOT: DW_AT_signature ;; Ex is not referenced in the CU but its nested type, Enum, is. -; CHECK-NEXT: DW_TAG_structure_type +; CHECK: DW_TAG_structure_type ; CHECK-NEXT: DW_AT_declaration (true) ; CHECK-NEXT: DW_AT_signature ([[SIG_Ex]]) ; CHECK-EMPTY: diff --git a/llvm/test/DebugInfo/X86/vla-global.ll b/llvm/test/DebugInfo/X86/vla-global.ll index 81d00ead44aa..b846df22c37e 100644 --- a/llvm/test/DebugInfo/X86/vla-global.ll +++ b/llvm/test/DebugInfo/X86/vla-global.ll @@ -1,7 +1,4 @@ ; RUN: llc -mtriple=x86_64-apple-darwin %s -o - -filetype=obj | llvm-dwarfdump - | FileCheck %s -; CHECK: DW_TAG_subprogram -; CHECK: DW_TAG_variable -; CHECK: DW_TAG_variable ; CHECK: 0x00000[[G:.*]]: DW_TAG_variable ; CHECK-NEXT: DW_AT_name ("g") ; CHECK: DW_TAG_array_type diff --git a/llvm/test/DebugInfo/attr-btf_tag.ll b/llvm/test/DebugInfo/attr-btf_tag.ll index 5a603c551f22..ecbb1e721d5a 100644 --- a/llvm/test/DebugInfo/attr-btf_tag.ll +++ b/llvm/test/DebugInfo/attr-btf_tag.ll @@ -43,6 +43,37 @@ attributes #1 = { nofree nosync nounwind readnone speculatable willreturn } !llvm.module.flags = !{!10, !11, !12, !13, !14} !llvm.ident = !{!15} +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "g1", scope: !2, file: !3, line: 8, type: !6, isLocal: false, isDefinition: true, annotations: !7) + +; CHECK: DW_TAG_variable +; CHECK-NEXT: DW_AT_name ("g1") +; CHECK: DW_TAG_LLVM_annotation +; CHECK-NEXT: DW_AT_name ("btf_decl_tag") +; CHECK-NEXT: DW_AT_const_value ("tag1") +; CHECK-EMPTY: +; CHECK-NEXT: DW_TAG_LLVM_annotation +; CHECK-NEXT: DW_AT_name ("btf_decl_tag") +; CHECK-NEXT: DW_AT_const_value ("tag2") +; CHECK-EMPTY: +; CHECK-NEXT: NULL + +!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 13.0.0 (https://github.com/llvm/llvm-project.git 305231a4f71b68945b4dd92925c76ff49e377c86)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "t.c", directory: "/tmp/home/yhs/work/tests/llvm/btf_tag") +!4 = !{} +!5 = !{!0} +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = !{!8, !9} +!8 = !{!"btf_decl_tag", !"tag1"} +!9 = !{!"btf_decl_tag", !"tag2"} +!10 = !{i32 7, !"Dwarf Version", i32 4} +!11 = !{i32 2, !"Debug Info Version", i32 3} +!12 = !{i32 1, !"wchar_size", i32 4} +!13 = !{i32 7, !"uwtable", i32 1} +!14 = !{i32 7, !"frame-pointer", i32 2} +!15 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 305231a4f71b68945b4dd92925c76ff49e377c86)"} +!16 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 10, type: !17, scopeLine: 10, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !4, annotations: !7) + ; CHECK: DW_TAG_subprogram ; CHECK: DW_AT_name ("foo") ; CHECK: DW_TAG_formal_parameter @@ -66,17 +97,12 @@ attributes #1 = { nofree nosync nounwind readnone speculatable willreturn } ; CHECK-EMPTY: ; CHECK-NEXT: NULL -; CHECK: DW_TAG_variable -; CHECK-NEXT: DW_AT_name ("g1") -; CHECK: DW_TAG_LLVM_annotation -; CHECK-NEXT: DW_AT_name ("btf_decl_tag") -; CHECK-NEXT: DW_AT_const_value ("tag1") -; CHECK-EMPTY: -; CHECK-NEXT: DW_TAG_LLVM_annotation -; CHECK-NEXT: DW_AT_name ("btf_decl_tag") -; CHECK-NEXT: DW_AT_const_value ("tag2") -; CHECK-EMPTY: -; CHECK-NEXT: NULL +!17 = !DISubroutineType(types: !18) +!18 = !{!6, !19} +!19 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !20, size: 64) +!20 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t1", file: !3, line: 4, size: 32, elements: !21, annotations: !7) +!21 = !{!22} +!22 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !20, file: !3, line: 5, baseType: !6, size: 32, annotations: !7) ; CHECK: DW_TAG_structure_type ; CHECK-NEXT: DW_AT_name ("t1") @@ -102,29 +128,6 @@ attributes #1 = { nofree nosync nounwind readnone speculatable willreturn } ; CHECK-EMPTY: ; CHECK-NEXT: NULL -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "g1", scope: !2, file: !3, line: 8, type: !6, isLocal: false, isDefinition: true, annotations: !7) -!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 13.0.0 (https://github.com/llvm/llvm-project.git 305231a4f71b68945b4dd92925c76ff49e377c86)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None) -!3 = !DIFile(filename: "t.c", directory: "/tmp/home/yhs/work/tests/llvm/btf_tag") -!4 = !{} -!5 = !{!0} -!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!7 = !{!8, !9} -!8 = !{!"btf_decl_tag", !"tag1"} -!9 = !{!"btf_decl_tag", !"tag2"} -!10 = !{i32 7, !"Dwarf Version", i32 4} -!11 = !{i32 2, !"Debug Info Version", i32 3} -!12 = !{i32 1, !"wchar_size", i32 4} -!13 = !{i32 7, !"uwtable", i32 1} -!14 = !{i32 7, !"frame-pointer", i32 2} -!15 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 305231a4f71b68945b4dd92925c76ff49e377c86)"} -!16 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 10, type: !17, scopeLine: 10, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !4, annotations: !7) -!17 = !DISubroutineType(types: !18) -!18 = !{!6, !19} -!19 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !20, size: 64) -!20 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t1", file: !3, line: 4, size: 32, elements: !21, annotations: !7) -!21 = !{!22} -!22 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !20, file: !3, line: 5, baseType: !6, size: 32, annotations: !7) !23 = !DILocalVariable(name: "arg", arg: 1, scope: !16, file: !3, line: 10, type: !19, annotations: !7) !24 = !DILocation(line: 10, column: 48, scope: !16) !25 = !DILocation(line: 11, column: 10, scope: !16) diff --git a/llvm/test/MC/WebAssembly/debug-info.ll b/llvm/test/MC/WebAssembly/debug-info.ll index a234350546f7..d2a815f09716 100644 --- a/llvm/test/MC/WebAssembly/debug-info.ll +++ b/llvm/test/MC/WebAssembly/debug-info.ll @@ -154,20 +154,20 @@ ; CHECK-NEXT: 0x16 R_WASM_SECTION_OFFSET_I32 .debug_line 0 ; CHECK-NEXT: 0x1A R_WASM_SECTION_OFFSET_I32 .debug_str 62 ; CHECK-NEXT: 0x1E R_WASM_FUNCTION_OFFSET_I32 f2 0 -; CHECK-NEXT: 0x27 R_WASM_FUNCTION_OFFSET_I32 f2 0 -; CHECK-NEXT: 0x32 R_WASM_GLOBAL_INDEX_I32 __stack_pointer -; CHECK-NEXT: 0x37 R_WASM_SECTION_OFFSET_I32 .debug_str 118 -; CHECK-NEXT: 0x3E R_WASM_SECTION_OFFSET_I32 .debug_str 105 -; CHECK-NEXT: 0x4A R_WASM_MEMORY_ADDR_I32 foo 0 -; CHECK-NEXT: 0x54 R_WASM_SECTION_OFFSET_I32 .debug_str 109 -; CHECK-NEXT: 0x5B R_WASM_SECTION_OFFSET_I32 .debug_str 113 -; CHECK-NEXT: 0x67 R_WASM_MEMORY_ADDR_I32 ptr2 0 +; CHECK-NEXT: 0x27 R_WASM_SECTION_OFFSET_I32 .debug_str 105 +; CHECK-NEXT: 0x33 R_WASM_MEMORY_ADDR_I32 foo 0 +; CHECK-NEXT: 0x3D R_WASM_SECTION_OFFSET_I32 .debug_str 109 +; CHECK-NEXT: 0x44 R_WASM_SECTION_OFFSET_I32 .debug_str 113 +; CHECK-NEXT: 0x50 R_WASM_MEMORY_ADDR_I32 ptr2 0 +; CHECK-NEXT: 0x5B R_WASM_FUNCTION_OFFSET_I32 f2 0 +; CHECK-NEXT: 0x66 R_WASM_GLOBAL_INDEX_I32 __stack_pointer +; CHECK-NEXT: 0x6B R_WASM_SECTION_OFFSET_I32 .debug_str 118 ; CHECK-NEXT: } ; CHECK-NEXT: Section (10) .debug_aranges { ; CHECK-NEXT: 0x6 R_WASM_SECTION_OFFSET_I32 .debug_info 0 -; CHECK-NEXT: 0x10 R_WASM_FUNCTION_OFFSET_I32 f2 0 -; CHECK-NEXT: 0x18 R_WASM_MEMORY_ADDR_I32 foo 0 -; CHECK-NEXT: 0x20 R_WASM_MEMORY_ADDR_I32 ptr2 0 +; CHECK-NEXT: 0x10 R_WASM_MEMORY_ADDR_I32 foo 0 +; CHECK-NEXT: 0x18 R_WASM_MEMORY_ADDR_I32 ptr2 0 +; CHECK-NEXT: 0x20 R_WASM_FUNCTION_OFFSET_I32 f2 0 ; CHECK-NEXT: } ; CHECK-NEXT: Section (12) .debug_pubnames { ; CHECK-NEXT: 0x6 R_WASM_SECTION_OFFSET_I32 .debug_info 0 diff --git a/llvm/test/MC/WebAssembly/debug-info64.ll b/llvm/test/MC/WebAssembly/debug-info64.ll index af5b57511fcc..47b33aac104c 100644 --- a/llvm/test/MC/WebAssembly/debug-info64.ll +++ b/llvm/test/MC/WebAssembly/debug-info64.ll @@ -160,20 +160,20 @@ ; CHECK-NEXT: 0x16 R_WASM_SECTION_OFFSET_I32 .debug_line 0 ; CHECK-NEXT: 0x1A R_WASM_SECTION_OFFSET_I32 .debug_str 62 ; CHECK-NEXT: 0x1E R_WASM_FUNCTION_OFFSET_I64 f2 0 -; CHECK-NEXT: 0x2B R_WASM_FUNCTION_OFFSET_I64 f2 0 -; CHECK-NEXT: 0x3A R_WASM_GLOBAL_INDEX_I32 __stack_pointer -; CHECK-NEXT: 0x3F R_WASM_SECTION_OFFSET_I32 .debug_str 118 -; CHECK-NEXT: 0x46 R_WASM_SECTION_OFFSET_I32 .debug_str 105 -; CHECK-NEXT: 0x52 R_WASM_MEMORY_ADDR_I64 foo 0 -; CHECK-NEXT: 0x60 R_WASM_SECTION_OFFSET_I32 .debug_str 109 -; CHECK-NEXT: 0x67 R_WASM_SECTION_OFFSET_I32 .debug_str 113 -; CHECK-NEXT: 0x73 R_WASM_MEMORY_ADDR_I64 ptr2 0 +; CHECK-NEXT: 0x2B R_WASM_SECTION_OFFSET_I32 .debug_str 105 +; CHECK-NEXT: 0x37 R_WASM_MEMORY_ADDR_I64 foo 0 +; CHECK-NEXT: 0x45 R_WASM_SECTION_OFFSET_I32 .debug_str 109 +; CHECK-NEXT: 0x4C R_WASM_SECTION_OFFSET_I32 .debug_str 113 +; CHECK-NEXT: 0x58 R_WASM_MEMORY_ADDR_I64 ptr2 0 +; CHECK-NEXT: 0x67 R_WASM_FUNCTION_OFFSET_I64 f2 0 +; CHECK-NEXT: 0x76 R_WASM_GLOBAL_INDEX_I32 __stack_pointer +; CHECK-NEXT: 0x7B R_WASM_SECTION_OFFSET_I32 .debug_str 118 ; CHECK-NEXT: } ; CHECK-NEXT: Section (10) .debug_aranges { ; CHECK-NEXT: 0x6 R_WASM_SECTION_OFFSET_I32 .debug_info 0 -; CHECK-NEXT: 0x10 R_WASM_FUNCTION_OFFSET_I64 f2 0 -; CHECK-NEXT: 0x20 R_WASM_MEMORY_ADDR_I64 foo 0 -; CHECK-NEXT: 0x30 R_WASM_MEMORY_ADDR_I64 ptr2 0 +; CHECK-NEXT: 0x10 R_WASM_MEMORY_ADDR_I64 foo 0 +; CHECK-NEXT: 0x20 R_WASM_MEMORY_ADDR_I64 ptr2 0 +; CHECK-NEXT: 0x30 R_WASM_FUNCTION_OFFSET_I64 f2 0 ; CHECK-NEXT: } ; CHECK-NEXT: Section (12) .debug_pubnames { ; CHECK-NEXT: 0x6 R_WASM_SECTION_OFFSET_I32 .debug_info 0 diff --git a/llvm/test/MC/WebAssembly/dwarfdump.ll b/llvm/test/MC/WebAssembly/dwarfdump.ll index 8e11ce3f93ee..cb2e3e613af9 100644 --- a/llvm/test/MC/WebAssembly/dwarfdump.ll +++ b/llvm/test/MC/WebAssembly/dwarfdump.ll @@ -15,46 +15,46 @@ ; CHECK-NEXT: DW_AT_low_pc (0x00000002) ; CHECK-NEXT: DW_AT_high_pc (0x00000004) -; CHECK: 0x00000026: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_low_pc (0x00000002) -; CHECK-NEXT: DW_AT_high_pc (0x00000004) -; CHECK-NEXT: DW_AT_frame_base (DW_OP_WASM_location 0x3 0x0, DW_OP_stack_value) -; CHECK-NEXT: DW_AT_name ("f2") -; CHECK-NEXT: DW_AT_decl_file ("/usr/local/google/home/sbc/dev/wasm/simple{{[/\\]}}test.c") -; CHECK-NEXT: DW_AT_decl_line (2) -; CHECK-NEXT: DW_AT_prototyped (true) -; CHECK-NEXT: DW_AT_external (true) - -; CHECK: 0x0000003d: DW_TAG_variable +; CHECK: 0x00000026: DW_TAG_variable ; CHECK-NEXT: DW_AT_name ("foo") -; CHECK-NEXT: DW_AT_type (0x0000004e "int *") +; CHECK-NEXT: DW_AT_type (0x00000037 "int *") ; CHECK-NEXT: DW_AT_external (true) ; CHECK-NEXT: DW_AT_decl_file ("/usr/local/google/home/sbc/dev/wasm/simple{{[/\\]}}test.c") ; CHECK-NEXT: DW_AT_decl_line (4) ; CHECK-NEXT: DW_AT_location (DW_OP_addr 0x0) -; CHECK: 0x0000004e: DW_TAG_pointer_type -; CHECK-NEXT: DW_AT_type (0x00000053 "int") +; CHECK: 0x00000037: DW_TAG_pointer_type +; CHECK-NEXT: DW_AT_type (0x0000003c "int") -; CHECK: 0x00000053: DW_TAG_base_type +; CHECK: 0x0000003c: DW_TAG_base_type ; CHECK-NEXT: DW_AT_name ("int") ; CHECK-NEXT: DW_AT_encoding (DW_ATE_signed) ; CHECK-NEXT: DW_AT_byte_size (0x04) -; CHECK: 0x0000005a: DW_TAG_variable +; CHECK: 0x00000043: DW_TAG_variable ; CHECK-NEXT: DW_AT_name ("ptr2") -; CHECK-NEXT: DW_AT_type (0x0000006b "void (*)()") +; CHECK-NEXT: DW_AT_type (0x00000054 "void (*)()") ; CHECK-NEXT: DW_AT_external (true) ; CHECK-NEXT: DW_AT_decl_file ("/usr/local/google/home/sbc/dev/wasm/simple{{[/\\]}}test.c") ; CHECK-NEXT: DW_AT_decl_line (5) ; CHECK-NEXT: DW_AT_location (DW_OP_addr 0x4) -; CHECK: 0x0000006b: DW_TAG_pointer_type -; CHECK-NEXT: DW_AT_type (0x00000070 "void ()") +; CHECK: 0x00000054: DW_TAG_pointer_type +; CHECK-NEXT: DW_AT_type (0x00000059 "void ()") -; CHECK: 0x00000070: DW_TAG_subroutine_type +; CHECK: 0x00000059: DW_TAG_subroutine_type ; CHECK-NEXT: DW_AT_prototyped (true) +; CHECK: 0x0000005a: DW_TAG_subprogram +; CHECK-NEXT: DW_AT_low_pc (0x00000002) +; CHECK-NEXT: DW_AT_high_pc (0x00000004) +; CHECK-NEXT: DW_AT_frame_base (DW_OP_WASM_location 0x3 0x0, DW_OP_stack_value) +; CHECK-NEXT: DW_AT_name ("f2") +; CHECK-NEXT: DW_AT_decl_file ("/usr/local/google/home/sbc/dev/wasm/simple{{[/\\]}}test.c") +; CHECK-NEXT: DW_AT_decl_line (2) +; CHECK-NEXT: DW_AT_prototyped (true) +; CHECK-NEXT: DW_AT_external (true) + ; CHECK: 0x00000071: NULL @@ -66,48 +66,48 @@ ; SPLIT-NEXT: DW_AT_language (DW_LANG_C99) ; SPLIT-NEXT: DW_AT_name ("test.c") ; SPLIT-NEXT: DW_AT_GNU_dwo_name ("{{.*}}dwarfdump.ll.tmp.dwo") -; SPLIT-NEXT: DW_AT_GNU_dwo_id (0x0642bb5dada25ca6) +; SPLIT-NEXT: DW_AT_GNU_dwo_id (0xad3151f12153fa17) -; SPLIT: 0x00000019: DW_TAG_subprogram -; SPLIT-NEXT: DW_AT_low_pc (indexed (00000000) address = ) -; SPLIT-NEXT: DW_AT_high_pc (0x00000002) -; SPLIT-NEXT: DW_AT_frame_base (DW_OP_WASM_location 0x3 0x0, DW_OP_stack_value) -; SPLIT-NEXT: DW_AT_name ("f2") -; SPLIT-NEXT: DW_AT_decl_file (0x01) -; SPLIT-NEXT: DW_AT_decl_line (2) -; SPLIT-NEXT: DW_AT_prototyped (true) -; SPLIT-NEXT: DW_AT_external (true) - -; SPLIT: 0x0000002a: DW_TAG_variable +; SPLIT: 0x00000019: DW_TAG_variable ; SPLIT-NEXT: DW_AT_name ("foo") -; SPLIT-NEXT: DW_AT_type (0x00000035 "int *") +; SPLIT-NEXT: DW_AT_type (0x00000024 "int *") ; SPLIT-NEXT: DW_AT_external (true) ; SPLIT-NEXT: DW_AT_decl_file (0x01) ; SPLIT-NEXT: DW_AT_decl_line (4) -; SPLIT-NEXT: DW_AT_location (DW_OP_GNU_addr_index 0x1) +; SPLIT-NEXT: DW_AT_location (DW_OP_GNU_addr_index 0x0) -; SPLIT: 0x00000035: DW_TAG_pointer_type -; SPLIT-NEXT: DW_AT_type (0x0000003a "int") +; SPLIT: 0x00000024: DW_TAG_pointer_type +; SPLIT-NEXT: DW_AT_type (0x00000029 "int") -; SPLIT: 0x0000003a: DW_TAG_base_type +; SPLIT: 0x00000029: DW_TAG_base_type ; SPLIT-NEXT: DW_AT_name ("int") ; SPLIT-NEXT: DW_AT_encoding (DW_ATE_signed) ; SPLIT-NEXT: DW_AT_byte_size (0x04) -; SPLIT: 0x0000003e: DW_TAG_variable +; SPLIT: 0x0000002d: DW_TAG_variable ; SPLIT-NEXT: DW_AT_name ("ptr2") -; SPLIT-NEXT: DW_AT_type (0x00000049 "void (*)()") +; SPLIT-NEXT: DW_AT_type (0x00000038 "void (*)()") ; SPLIT-NEXT: DW_AT_external (true) ; SPLIT-NEXT: DW_AT_decl_file (0x01) ; SPLIT-NEXT: DW_AT_decl_line (5) -; SPLIT-NEXT: DW_AT_location (DW_OP_GNU_addr_index 0x2) +; SPLIT-NEXT: DW_AT_location (DW_OP_GNU_addr_index 0x1) -; SPLIT: 0x00000049: DW_TAG_pointer_type -; SPLIT-NEXT: DW_AT_type (0x0000004e "void ()") +; SPLIT: 0x00000038: DW_TAG_pointer_type +; SPLIT-NEXT: DW_AT_type (0x0000003d "void ()") -; SPLIT: 0x0000004e: DW_TAG_subroutine_type +; SPLIT: 0x0000003d: DW_TAG_subroutine_type ; SPLIT-NEXT: DW_AT_prototyped (true) +; SPLIT: 0x0000003e: DW_TAG_subprogram +; SPLIT-NEXT: DW_AT_low_pc (indexed (00000002) address = ) +; SPLIT-NEXT: DW_AT_high_pc (0x00000002) +; SPLIT-NEXT: DW_AT_frame_base (DW_OP_WASM_location 0x3 0x0, DW_OP_stack_value) +; SPLIT-NEXT: DW_AT_name ("f2") +; SPLIT-NEXT: DW_AT_decl_file (0x01) +; SPLIT-NEXT: DW_AT_decl_line (2) +; SPLIT-NEXT: DW_AT_prototyped (true) +; SPLIT-NEXT: DW_AT_external (true) + ; SPLIT: 0x0000004f: NULL diff --git a/llvm/test/MC/WebAssembly/dwarfdump64.ll b/llvm/test/MC/WebAssembly/dwarfdump64.ll index 199e1f55931f..603f610c4dff 100644 --- a/llvm/test/MC/WebAssembly/dwarfdump64.ll +++ b/llvm/test/MC/WebAssembly/dwarfdump64.ll @@ -14,46 +14,46 @@ ; CHECK-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000002) ; CHECK-NEXT: DW_AT_high_pc [DW_FORM_data4] (0x00000002) -; CHECK: 0x0000002a: DW_TAG_subprogram -; CHECK-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000002) -; CHECK-NEXT: DW_AT_high_pc [DW_FORM_data4] (0x00000002) -; CHECK-NEXT: DW_AT_frame_base [DW_FORM_exprloc] (DW_OP_WASM_location 0x3 0x0, DW_OP_stack_value) -; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ("f2") -; CHECK-NEXT: DW_AT_decl_file [DW_FORM_data1] ("/usr/local/google/home/sbc/dev/wasm/simple{{[/\\]}}test.c") -; CHECK-NEXT: DW_AT_decl_line [DW_FORM_data1] (2) -; CHECK-NEXT: DW_AT_prototyped [DW_FORM_flag_present] (true) -; CHECK-NEXT: DW_AT_external [DW_FORM_flag_present] (true) - -; CHECK: 0x00000045: DW_TAG_variable +; CHECK: 0x0000002a: DW_TAG_variable ; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ("foo") -; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x0000005a "int *") +; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x0000003f "int *") ; CHECK-NEXT: DW_AT_external [DW_FORM_flag_present] (true) ; CHECK-NEXT: DW_AT_decl_file [DW_FORM_data1] ("/usr/local/google/home/sbc/dev/wasm/simple{{[/\\]}}test.c") ; CHECK-NEXT: DW_AT_decl_line [DW_FORM_data1] (4) ; CHECK-NEXT: DW_AT_location [DW_FORM_exprloc] (DW_OP_addr 0x0) -; CHECK: 0x0000005a: DW_TAG_pointer_type -; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x0000005f "int") +; CHECK: 0x0000003f: DW_TAG_pointer_type +; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x00000044 "int") -; CHECK: 0x0000005f: DW_TAG_base_type +; CHECK: 0x00000044: DW_TAG_base_type ; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ("int") ; CHECK-NEXT: DW_AT_encoding [DW_FORM_data1] (DW_ATE_signed) ; CHECK-NEXT: DW_AT_byte_size [DW_FORM_data1] (0x04) -; CHECK: 0x00000066: DW_TAG_variable +; CHECK: 0x0000004b: DW_TAG_variable ; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ("ptr2") -; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x0000007b "void (*)()") +; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x00000060 "void (*)()") ; CHECK-NEXT: DW_AT_external [DW_FORM_flag_present] (true) ; CHECK-NEXT: DW_AT_decl_file [DW_FORM_data1] ("/usr/local/google/home/sbc/dev/wasm/simple{{[/\\]}}test.c") ; CHECK-NEXT: DW_AT_decl_line [DW_FORM_data1] (5) ; CHECK-NEXT: DW_AT_location [DW_FORM_exprloc] (DW_OP_addr 0x8) -; CHECK: 0x0000007b: DW_TAG_pointer_type -; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x00000080 "void ()") +; CHECK: 0x00000060: DW_TAG_pointer_type +; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (0x00000065 "void ()") -; CHECK: 0x00000080: DW_TAG_subroutine_type +; CHECK: 0x00000065: DW_TAG_subroutine_type ; CHECK-NEXT: DW_AT_prototyped [DW_FORM_flag_present] (true) +; CHECK: 0x00000066: DW_TAG_subprogram +; CHECK-NEXT: DW_AT_low_pc [DW_FORM_addr] (0x0000000000000002) +; CHECK-NEXT: DW_AT_high_pc [DW_FORM_data4] (0x00000002) +; CHECK-NEXT: DW_AT_frame_base [DW_FORM_exprloc] (DW_OP_WASM_location 0x3 0x0, DW_OP_stack_value) +; CHECK-NEXT: DW_AT_name [DW_FORM_strp] ("f2") +; CHECK-NEXT: DW_AT_decl_file [DW_FORM_data1] ("/usr/local/google/home/sbc/dev/wasm/simple{{[/\\]}}test.c") +; CHECK-NEXT: DW_AT_decl_line [DW_FORM_data1] (2) +; CHECK-NEXT: DW_AT_prototyped [DW_FORM_flag_present] (true) +; CHECK-NEXT: DW_AT_external [DW_FORM_flag_present] (true) + ; CHECK: 0x00000081: NULL target triple = "wasm64-unknown-unknown" From ee4bbb50ae5847806c843f9122f4d6f1e2a0677f Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 23 Dec 2021 18:06:48 -0500 Subject: [PATCH 185/293] [gn build] (manually) port 7cd7b4d2f807 (sanitizer_symbolize_set_demangle) --- llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn | 1 + llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn | 1 + 2 files changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn index 82f7a6aec927..de8f33da0bc5 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn @@ -163,6 +163,7 @@ target(asan_target_type, "asan") { "-Wl,-U,___sanitizer_symbolize_data", "-Wl,-U,___sanitizer_symbolize_demangle", "-Wl,-U,___sanitizer_symbolize_flush", + "-Wl,-U,___sanitizer_symbolize_set_demangle", "-Wl,-U,___sanitizer_symbolize_set_inline_frames", # xray diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn index 788a5621d393..db354d907a4e 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn @@ -157,6 +157,7 @@ target(tsan_target_type, "rtl") { "-Wl,-U,___sanitizer_symbolize_data", "-Wl,-U,___sanitizer_symbolize_demangle", "-Wl,-U,___sanitizer_symbolize_flush", + "-Wl,-U,___sanitizer_symbolize_set_demangle", "-Wl,-U,___sanitizer_symbolize_set_inline_frames", # FIXME: better From 464cc4c920a1c1f4bbaa36e38ed0321369eaa379 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 23 Dec 2021 15:13:46 -0800 Subject: [PATCH 186/293] [ELF] Remove stale comment which was duplicated in MarkLive::run Pointed out by thakis --- lld/ELF/MarkLive.cpp | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp index 2c712ea964b1..b63f2beb9dcb 100644 --- a/lld/ELF/MarkLive.cpp +++ b/lld/ELF/MarkLive.cpp @@ -381,35 +381,6 @@ template void elf::markLive() { return; } - // Otherwise, do mark-sweep GC. - // - // The --gc-sections option works only for SHF_ALLOC sections (sections that - // are memory-mapped at runtime). So we can unconditionally make non-SHF_ALLOC - // sections alive except SHF_LINK_ORDER, SHT_REL/SHT_RELA sections, and - // sections in a group. - // - // Usually, non-SHF_ALLOC sections are not removed even if they are - // unreachable through relocations because reachability is not a good signal - // whether they are garbage or not (e.g. there is usually no section referring - // to a .comment section, but we want to keep it.) When a non-SHF_ALLOC - // section is retained, we also retain sections dependent on it. - // - // Note on SHF_LINK_ORDER: Such sections contain metadata and they - // have a reverse dependency on the InputSection they are linked with. - // We are able to garbage collect them. - // - // Note on SHF_REL{,A}: Such sections reach here only when -r - // or --emit-reloc were given. And they are subject of garbage - // collection because, if we remove a text section, we also - // remove its relocation section. - // - // Note on nextInSectionGroup: The ELF spec says that group sections are - // included or omitted as a unit. We take the interpretation that: - // - // - Group members (nextInSectionGroup != nullptr) are subject to garbage - // collection. - // - Groups members are retained or discarded as a unit. - // Follow the graph to mark all live sections. for (unsigned curPart = 1; curPart <= partitions.size(); ++curPart) MarkLive(curPart).run(); From a67c0fc1fbe8479eb5b3d7c189395bfe6cb98086 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 23 Dec 2021 15:03:55 -0800 Subject: [PATCH 187/293] [Hexagon] Revamp HVX flag verification in driver Generalize warning/error messages (for reuse), refactor flag verification code, rewrite HVX flag driver testcase. --- .../clang/Basic/DiagnosticDriverKinds.td | 15 +- clang/lib/Driver/ToolChains/Hexagon.cpp | 140 +++++--- clang/test/Driver/hexagon-hvx-ieee-fp.c | 11 + clang/test/Driver/hexagon-hvx-qfloat.c | 6 +- clang/test/Driver/hexagon-hvx.c | 322 ++++++++++++------ clang/test/Driver/hexagon-vectorize.c | 2 +- 6 files changed, 329 insertions(+), 167 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index c623aeff11f0..a7fd2f26478c 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -443,16 +443,13 @@ def err_analyzer_checker_option_invalid_input : Error< def err_analyzer_checker_incompatible_analyzer_option : Error< "checker cannot be enabled with analyzer option '%0' == %1">; -def err_drv_invalid_hvx_length : Error< - "-mhvx-length is not supported without a -mhvx/-mhvx= flag">; -def warn_drv_vectorize_needs_hvx : Warning< - "auto-vectorization requires HVX, use -mhvx to enable it">, +def warn_drv_needs_hvx : Warning< + "%0 requires HVX, use -mhvx/-mhvx= to enable it">, InGroup; - -def err_drv_invalid_hvx_qfloat : Error< - "-mhvx-qfloat is not supported without a -mhvx/-mhvx= flag.">; -def err_drv_invalid_arch_hvx_qfloat : Error< - "-mhvx-qfloat is not supported on HVX %0.">; +def err_drv_needs_hvx : Error< + "%0 requires HVX, use -mhvx/-mhvx= to enable it">; +def err_drv_needs_hvx_version : Error< + "%0 is not supported on HVX %1">; def err_drv_module_header_wrong_kind : Error< "header file '%0' input type '%1' does not match type of prior input " diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp index f9785e42025c..ba3040636604 100644 --- a/clang/lib/Driver/ToolChains/Hexagon.cpp +++ b/clang/lib/Driver/ToolChains/Hexagon.cpp @@ -26,8 +26,8 @@ using namespace clang; using namespace llvm::opt; // Default hvx-length for various versions. -static StringRef getDefaultHvxLength(StringRef Cpu) { - return llvm::StringSwitch(Cpu) +static StringRef getDefaultHvxLength(StringRef HvxVer) { + return llvm::StringSwitch(HvxVer) .Case("v60", "64b") .Case("v62", "64b") .Case("v65", "64b") @@ -51,63 +51,107 @@ static void handleHVXTargetFeatures(const Driver &D, const ArgList &Args, // Handle HVX warnings. handleHVXWarnings(D, Args); - // Add the +hvx* features based on commandline flags. - StringRef HVXFeature, HVXLength; - - // Handle -mhvx, -mhvx=, -mno-hvx. - if (Arg *A = Args.getLastArg(options::OPT_mno_hexagon_hvx, - options::OPT_mhexagon_hvx, - options::OPT_mhexagon_hvx_EQ)) { - if (A->getOption().matches(options::OPT_mno_hexagon_hvx)) - return; - if (A->getOption().matches(options::OPT_mhexagon_hvx_EQ)) { - HasHVX = true; - HVXFeature = Cpu = A->getValue(); - HVXFeature = Args.MakeArgString(llvm::Twine("+hvx") + HVXFeature.lower()); - } else if (A->getOption().matches(options::OPT_mhexagon_hvx)) { - HasHVX = true; - HVXFeature = Args.MakeArgString(llvm::Twine("+hvx") + Cpu); + auto makeFeature = [&Args](Twine T, bool Enable) -> StringRef { + const std::string &S = T.str(); + StringRef Opt(S); + if (Opt.endswith("=")) + Opt = Opt.drop_back(1); + if (Opt.startswith("mno-")) + Opt = Opt.drop_front(4); + else if (Opt.startswith("m")) + Opt = Opt.drop_front(1); + return Args.MakeArgString(Twine(Enable ? "+" : "-") + Twine(Opt)); + }; + + auto withMinus = [](StringRef S) -> std::string { + return "-" + S.str(); + }; + + // Drop tiny core suffix for HVX version. + std::string HvxVer = + (Cpu.back() == 'T' || Cpu.back() == 't' ? Cpu.drop_back(1) : Cpu).str(); + HasHVX = false; + + // Handle -mhvx, -mhvx=, -mno-hvx. If both present, -mhvx= wins over -mhvx. + auto argOrNull = [&Args](auto FlagOn, auto FlagOff) -> Arg* { + if (Arg *A = Args.getLastArg(FlagOn, FlagOff)) { + if (A->getOption().matches(FlagOn)) + return A; } - Features.push_back(HVXFeature); + return nullptr; + }; + + Arg *HvxBareA = + argOrNull(options::OPT_mhexagon_hvx, options::OPT_mno_hexagon_hvx); + Arg *HvxVerA = + argOrNull(options::OPT_mhexagon_hvx_EQ, options::OPT_mno_hexagon_hvx); + + if (Arg *A = HvxVerA ? HvxVerA : HvxBareA) { + if (A->getOption().matches(options::OPT_mhexagon_hvx_EQ)) + HvxVer = StringRef(A->getValue()).lower(); // lower produces std:string + HasHVX = true; + Features.push_back(makeFeature(Twine("hvx") + HvxVer, true)); + } else if (Arg *A = Args.getLastArg(options::OPT_mno_hexagon_hvx)) { + // If there was an explicit -mno-hvx, add -hvx to target features. + Features.push_back(makeFeature(A->getOption().getName(), false)); } + StringRef HvxLen = getDefaultHvxLength(HvxVer); + // Handle -mhvx-length=. if (Arg *A = Args.getLastArg(options::OPT_mhexagon_hvx_length_EQ)) { // These flags are valid only if HVX in enabled. if (!HasHVX) - D.Diag(diag::err_drv_invalid_hvx_length); + D.Diag(diag::err_drv_needs_hvx) << withMinus(A->getOption().getName()); else if (A->getOption().matches(options::OPT_mhexagon_hvx_length_EQ)) - HVXLength = A->getValue(); + HvxLen = A->getValue(); } - // Default hvx-length based on Cpu. - else if (HasHVX) - HVXLength = getDefaultHvxLength(Cpu); - - if (!HVXLength.empty()) { - HVXFeature = - Args.MakeArgString(llvm::Twine("+hvx-length") + HVXLength.lower()); - Features.push_back(HVXFeature); + + if (HasHVX) { + StringRef L = makeFeature(Twine("hvx-length") + HvxLen.lower(), true); + Features.push_back(L); } - // Handle -mhvx-qfloat. - // QFloat is valid only on HVX v68/v68+ as of now. - unsigned short CpuVer; - Cpu.drop_front(1).getAsInteger(10, CpuVer); - if (Arg *A = Args.getLastArg(options::OPT_mhexagon_hvx_qfloat, - options::OPT_mno_hexagon_hvx_qfloat)) { - if (A->getOption().matches(options::OPT_mno_hexagon_hvx_qfloat)) { - StringRef OptName = A->getOption().getName().substr(4); - Features.push_back(Args.MakeArgString("-" + OptName)); - return; + unsigned HvxVerNum; + // getAsInteger returns 'true' on error. + if (StringRef(HvxVer).drop_front(1).getAsInteger(10, HvxVerNum)) + HvxVerNum = 0; + + // Handle HVX floating point flags. + auto checkFlagHvxVersion = [&](auto FlagOn, auto FlagOff, + unsigned MinVerNum) -> Optional { + // Return an Optional: + // - None indicates a verification failure, or that the flag was not + // present in Args. + // - Otherwise the returned value is that name of the feature to add + // to Features. + Arg *A = Args.getLastArg(FlagOn, FlagOff); + if (!A) + return None; + + StringRef OptName = A->getOption().getName(); + if (A->getOption().matches(FlagOff)) + return makeFeature(OptName, false); + + if (!HasHVX) { + D.Diag(diag::err_drv_needs_hvx) << withMinus(OptName); + return None; + } + if (HvxVerNum < MinVerNum) { + D.Diag(diag::err_drv_needs_hvx_version) + << withMinus(OptName) << ("v" + std::to_string(HvxVerNum)); + return None; } - StringRef OptName = A->getOption().getName().substr(1); - if (HasHVX) { - if (CpuVer >= 68) - Features.push_back(Args.MakeArgString("+" + OptName)); - else - D.Diag(diag::err_drv_invalid_arch_hvx_qfloat) << Cpu; - } else - D.Diag(diag::err_drv_invalid_hvx_qfloat); + return makeFeature(OptName, true); + }; + + if (auto F = checkFlagHvxVersion(options::OPT_mhexagon_hvx_qfloat, + options::OPT_mno_hexagon_hvx_qfloat, 68)) { + Features.push_back(*F); + } + if (auto F = checkFlagHvxVersion(options::OPT_mhexagon_hvx_ieee_fp, + options::OPT_mno_hexagon_hvx_ieee_fp, 68)) { + Features.push_back(*F); } } @@ -138,7 +182,7 @@ void hexagon::getHexagonTargetFeatures(const Driver &D, const ArgList &Args, handleHVXTargetFeatures(D, Args, Features, Cpu, HasHVX); if (HexagonToolChain::isAutoHVXEnabled(Args) && !HasHVX) - D.Diag(diag::warn_drv_vectorize_needs_hvx); + D.Diag(diag::warn_drv_needs_hvx) << "auto-vectorization"; } // Hexagon tools start. diff --git a/clang/test/Driver/hexagon-hvx-ieee-fp.c b/clang/test/Driver/hexagon-hvx-ieee-fp.c index acc9a910f0f1..cd6e11ef06fe 100644 --- a/clang/test/Driver/hexagon-hvx-ieee-fp.c +++ b/clang/test/Driver/hexagon-hvx-ieee-fp.c @@ -12,3 +12,14 @@ // RUN: -mno-hvx-ieee-fp 2>&1 | FileCheck -check-prefix=CHECK-NO-IEEEFP %s // CHECK-NO-IEEEFP: "-target-feature" "-hvx-ieee-fp" +// IEEE-FP is valid only on hvxv68 and hvxv68+. +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 -mhvx=v66 \ +// RUN: -mhvx-ieee-fp 2>&1 | FileCheck -check-prefix=CHECK-ERROR1 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv66 -mhvx -mhvx-ieee-fp \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-ERROR1 %s +// CHECK-ERROR1: error: -mhvx-ieee-fp is not supported on HVX v66 + +// IEEE-FP is valid only if HVX is enabled. +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 -mhvx-ieee-fp \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-ERROR2 %s +// CHECK-ERROR2: error: -mhvx-ieee-fp requires HVX, use -mhvx/-mhvx= to enable it diff --git a/clang/test/Driver/hexagon-hvx-qfloat.c b/clang/test/Driver/hexagon-hvx-qfloat.c index 698451aae58f..fc647f7b06ab 100644 --- a/clang/test/Driver/hexagon-hvx-qfloat.c +++ b/clang/test/Driver/hexagon-hvx-qfloat.c @@ -12,14 +12,14 @@ // RUN: -mno-hvx-qfloat 2>&1 | FileCheck -check-prefix=CHECK-NO-QFLOAT %s // CHECK-NO-QFLOAT: "-target-feature" "-hvx-qfloat" -// QFloat is valid only on hvxv68 and hvxv68+. +// QFloat is valid only on hvxv68+. // RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 -mhvx=v66 \ // RUN: -mhvx-qfloat 2>&1 | FileCheck -check-prefix=CHECK-ERROR1 %s // RUN: %clang -c %s -### -target hexagon-unknown-elf -mv66 -mhvx -mhvx-qfloat \ // RUN: 2>&1 | FileCheck -check-prefix=CHECK-ERROR1 %s -// CHECK-ERROR1: error: -mhvx-qfloat is not supported on HVX v66. +// CHECK-ERROR1: error: -mhvx-qfloat is not supported on HVX v66 // QFloat is valid only if HVX is enabled. // RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 -mhvx-qfloat \ // RUN: 2>&1 | FileCheck -check-prefix=CHECK-ERROR2 %s -// CHECK-ERROR2: error: -mhvx-qfloat is not supported without a -mhvx/-mhvx= flag. +// CHECK-ERROR2: error: -mhvx-qfloat requires HVX, use -mhvx/-mhvx= to enable it diff --git a/clang/test/Driver/hexagon-hvx.c b/clang/test/Driver/hexagon-hvx.c index aab7e329471b..10bb8fe9327f 100644 --- a/clang/test/Driver/hexagon-hvx.c +++ b/clang/test/Driver/hexagon-hvx.c @@ -2,117 +2,227 @@ // Tests for the hvx features and warnings. // ----------------------------------------------------------------------------- -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv65 -mhvx \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECKHVX165 %s -// CHECKHVX165: "-target-feature" "+hvxv65" +// No HVX without -mhvx/-mhvx= + +// CHECK-HVX-ON: "-target-feature" "+hvx +// CHECK-HVX-ON-NOT: "-target-feature" "-hvx +// CHECK-HVX-OFF-NOT: "-target-feature" "+hvx + +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv5 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv55 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv65 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv66 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv67 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv67t \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s + +// Infer HVX version from flag: + +// CHECK-HVX-V60: "-target-feature" "+hvxv60" +// CHECK-HVX-V62: "-target-feature" "+hvxv62" +// CHECK-HVX-V65: "-target-feature" "+hvxv65" +// CHECK-HVX-V66: "-target-feature" "+hvxv66" +// CHECK-HVX-V67: "-target-feature" "+hvxv67" +// CHECK-HVX-V68: "-target-feature" "+hvxv68" +// CHECK-HVX-V69: "-target-feature" "+hvxv69" + +// Direct version flag: +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v60 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V60 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v62 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V62 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v65 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V65 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v66 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V66 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v67 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V67 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v68 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V68 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v69 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V69 %s + +// Direct version flag with different CPU version: +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v60 -mv62 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V60 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v62 -mv65 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V62 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v65 -mv66 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V65 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v66 -mv67 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V66 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v67 -mv68 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V67 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v68 -mv69 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V68 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v69 -mv60 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V69 %s + +// Direct version flag with different CPU version and versionless -mhvx: +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v60 -mv62 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V60 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v62 -mv65 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V62 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v65 -mv66 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V65 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v66 -mv67 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V66 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v67 -mv68 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V67 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v68 -mv69 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V68 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v69 -mv60 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V69 %s + +// Direct version flag with different CPU version, versionless -mhvx +// and -mno-hvx. The -mno-hvx cancels -mhvx=, versionless -mhvx wins: +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v60 -mno-hvx -mv62 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V62 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v62 -mno-hvx -mv65 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V65 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v65 -mno-hvx -mv66 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V66 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v66 -mno-hvx -mv67 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V67 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v67 -mno-hvx -mv68 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V68 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v68 -mno-hvx -mv69 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V69 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v69 -mno-hvx -mv60 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V60 %s + +// Direct version flag with different CPU version, versionless -mhvx +// and -mno-hvx. The -mno-hvx cancels versionless -mhvx, -mhvx= wins: +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 -mhvx -mno-hvx -mhvx=v60 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V60 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv65 -mhvx -mno-hvx -mhvx=v62 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V62 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv66 -mhvx -mno-hvx -mhvx=v65 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V65 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv67 -mhvx -mno-hvx -mhvx=v66 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V66 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 -mhvx -mno-hvx -mhvx=v67 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V67 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mno-hvx -mhvx=v68 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V68 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 -mhvx -mno-hvx -mhvx=v69 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V69 %s + +// Infer HVX version from CPU version: +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V60 %s // RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 -mhvx \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECKHVX162 %s -// CHECKHVX162: "-target-feature" "+hvxv62" - +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V62 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv65 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V65 %s // RUN: %clang -c %s -### -target hexagon-unknown-elf -mv66 -mhvx \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECKHVX166 %s -// CHECKHVX166: "-target-feature" "+hvxv66" +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V66 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv67 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V67 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv67t -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V67 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv68 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V68 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-V69 %s + +// Infer HVX length from flag: + +// CHECK-HVX-L64: "-target-feature" "+hvx-length64b" +// CHECK-HVX-L64-NOT: "-target-feature" "+hvx-length128b" +// CHECK-HVX-L128: "-target-feature" "+hvx-length128b" +// CHECK-HVX-L128-NOT: "-target-feature" "+hvx-length64b" + +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx -mhvx-length=64b \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-L64 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx -mhvx-length=128b \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-L128 %s + +// Infer HVX length from HVX version: + +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v60 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-L64 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v62 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-L64 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v65 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-L64 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v66 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-L128 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v67 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-L128 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v68 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-L128 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v69 \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-L128 %s + +// No HVX with trailing -mno-hvx + +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v69 -mno-hvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mno-hvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx=v69 -mhvx-length=128b -mno-hvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mhvx-qfloat -mno-hvx \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-OFF %s + +// Float + +// CHECK-HVX-QFLOAT-ON: "-target-feature" "+hvx-qfloat" +// CHECK-HVX-QFLOAT-OFF-NOT: "-target-feature" "+hvx-qfloat" +// CHECK-HVX-IEEE-ON: "-target-feature" "+hvx-ieee-fp" +// CHECK-HVX-IEEE-OFF-NOT: "-target-feature" "+hvx-ieee-fp" + +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mhvx-qfloat \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-QFLOAT-ON %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mno-hvx-qfloat \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-QFLOAT-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mno-hvx-qfloat -mhvx-qfloat \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-QFLOAT-ON %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mhvx-qfloat -mno-hvx-qfloat \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-QFLOAT-OFF %s -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv65 -mhvx \ -// RUN: -mhvx-length=128B 2>&1 | FileCheck -check-prefix=CHECKHVX2 %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mhvx-ieee-fp \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-IEEE-ON %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mno-hvx-ieee-fp \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-IEEE-OFF %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mno-hvx-ieee-fp -mhvx-ieee-fp \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-IEEE-ON %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mhvx-ieee-fp -mno-hvx-ieee-fp \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-IEEE-OFF %s + +// HVX flags heed HVX: + +// CHECK-NEEDS-HVX: error: {{.*}} requires HVX, use -mhvx/-mhvx= to enable it + +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv66 -mhvx-length=64b \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-NEEDS-HVX %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv66 -mhvx-length=128b \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-NEEDS-HVX %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx-qfloat \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-NEEDS-HVX %s +// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx-ieee-fp \ +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-NEEDS-HVX %s -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 -mhvx \ -// RUN: -mhvx-length=128B 2>&1 | FileCheck -check-prefix=CHECKHVX2 %s +// Invalid HVX length: + +// CHECK-HVX-BAD-LENGTH: error: unsupported argument '{{.*}}' to option 'mhvx-length=' -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 -mhvx \ -// RUN: -mhvx-length=128b 2>&1 | FileCheck -check-prefix=CHECKHVX2 %s -// CHECKHVX2-NOT: "-target-feature" "+hvx-length64b" -// CHECKHVX2: "-target-feature" "+hvx-length128b" - -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv65 2>&1 \ -// RUN: | FileCheck -check-prefix=CHECKHVX3 %s - -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 2>&1 \ -// RUN: | FileCheck -check-prefix=CHECKHVX3 %s -// CHECKHVX3-NOT: "-target-feature" "+hvx - -// No hvx target feature must be added if -mno-hvx occurs last -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 -mno-hvx \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-NOHVX %s -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 -mhvx -mno-hvx \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-NOHVX %s -// CHECK-NOHVX-NOT: "-target-feature" "+hvx - -// No hvx-ieee-fp target feature must be added if -mno-hvx-ieee-fp occurs last -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx-ieee-fp -mno-hvx \ -// RUN: -mno-hvx-ieee-fp 2>&1 | FileCheck -check-prefix=CHECK-NOHVX-IEEE-FP %s -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx-ieee-fp -mhvx \ -// RUN: -mno-hvx-ieee-fp 2>&1 | FileCheck -check-prefix=CHECK-HVX-NOHVX-IEEE-FP %s -// -// CHECK-NOHVX-IEEE-FP-NOT: "-target-feature" "+hvx-ieee-fp" -// CHECK-HVX-NOHVX-IEEE-FP-NOT: "-target-feature" "+hvx-ieee-fp" -// CHECK-HVX-NOHVX-IEEE-FP: "-target-feature" "+hvx -// CHECK-HVX-NOHVX-IEEE-FP-NOT: "-target-feature" "+hvx-ieee-fp" - -// Hvx target feature should be added if -mno-hvx doesn't occur last -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 -mno-hvx -mhvx\ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXFEAT %s -// CHECK-HVXFEAT: "-target-feature" "+hvxv62" - -// With -mhvx, the version of hvx defaults to Cpu -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 -mhvx \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-DEFAULT %s -// CHECK-HVX-DEFAULT: "-target-feature" "+hvxv60" - -// Test -mhvx= flag -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 -mhvx=v62 \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXEQ %s -// CHECK-HVXEQ: "-target-feature" "+hvxv62" - -// Honor the last occurred -mhvx=, -mhvx flag. -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 -mhvx=v62 -mhvx\ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXEQ-PRE %s -// CHECK-HVXEQ-PRE-NOT: "-target-feature" "+hvxv62" -// CHECK-HVXEQ-PRE: "-target-feature" "+hvxv60" -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 -mhvx -mhvx=v62\ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXEQ-PRE2 %s -// CHECK-HVXEQ-PRE2-NOT: "-target-feature" "+hvxv60" -// CHECK-HVXEQ-PRE2: "-target-feature" "+hvxv62" - -// Test -mhvx-length flag -// The default mode on v60,v62 is 64B. -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 -mhvx \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-64B %s -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 -mhvx \ -// RUN: -mhvx-length=64b 2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-64B %s -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv60 -mhvx \ -// RUN: -mhvx-length=64B 2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-64B %s -// CHECK-HVXLENGTH-64B: "-target-feature" "+hvx{{.*}}" "-target-feature" "+hvx-length64b" -// The default mode on v66 and future archs is 128B. -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv66 -mhvx \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-128B %s -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv62 -mhvx -mhvx-length=128B\ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-128B %s -// CHECK-HVXLENGTH-128B: "-target-feature" "+hvx{{.*}}" "-target-feature" "+hvx-length128b" - -// Bail out if -mhvx-length is specified without HVX enabled -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx-length=64B \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-ERROR %s -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx-length=128B \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-ERROR %s -// CHECK-HVXLENGTH-ERROR: error: -mhvx-length is not supported without a -mhvx/-mhvx= flag - -// Error out if an unsupported value is passed to -mhvx-length. // RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx -mhvx-length=B \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-VALUE-ERROR %s +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-BAD-LENGTH %s // RUN: %clang -c %s -### -target hexagon-unknown-elf -mhvx -mhvx-length=128 \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXLENGTH-VALUE-ERROR %s -// CHECK-HVXLENGTH-VALUE-ERROR: error: unsupported argument '{{.*}}' to option 'mhvx-length=' - -// Test -mhvx-ieee-fp flag -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx-ieee-fp \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXIEEEFP-LONE %s -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mhvx -mhvx-ieee-fp \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXIEEEFP %s -// RUN: %clang -c %s -### -target hexagon-unknown-elf -mv69 -mno-hvx -mhvx-ieee-fp \ -// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVXIEEEFP %s -// CHECK-HVXIEEEFP-LONE-NOT: "-target-feature" "+hvx" -// CHECK-HVXIEEEFP-LONE: "-target-feature" "+hvx-ieee-fp" -// CHECK-HVXIEEEFP: "-target-feature" "+hvx-ieee-fp" -// CHECK-HVXIEEEFP-LONE-NOT: "-target-feature" "+hvx" +// RUN: 2>&1 | FileCheck -check-prefix=CHECK-HVX-BAD-LENGTH %s diff --git a/clang/test/Driver/hexagon-vectorize.c b/clang/test/Driver/hexagon-vectorize.c index dcd6a09222eb..6a142e30cd94 100644 --- a/clang/test/Driver/hexagon-vectorize.c +++ b/clang/test/Driver/hexagon-vectorize.c @@ -6,4 +6,4 @@ // CHECK-DEFAULT-NOT: hexagon-autohvx // CHECK-VECTOR: "-mllvm" "-hexagon-autohvx" // CHECK-NOVECTOR-NOT: hexagon-autohvx -// CHECK-NEEDHVX: warning: auto-vectorization requires HVX, use -mhvx to enable it +// CHECK-NEEDHVX: warning: auto-vectorization requires HVX, use -mhvx/-mhvx= to enable it From 60509623c46e8d8122dcc1ee62fecae0a6383139 Mon Sep 17 00:00:00 2001 From: Siva Chandra Reddy Date: Thu, 23 Dec 2021 22:59:14 +0000 Subject: [PATCH 188/293] [libc][obvious] Fix style of MPFRWrapper. --- libc/test/src/math/RIntTest.h | 28 +- libc/test/src/math/RoundToIntegerTest.h | 18 +- libc/utils/MPFRWrapper/MPFRUtils.cpp | 392 ++++++++++++------------ libc/utils/MPFRWrapper/MPFRUtils.h | 149 ++++----- 4 files changed, 298 insertions(+), 289 deletions(-) diff --git a/libc/test/src/math/RIntTest.h b/libc/test/src/math/RIntTest.h index 7dc677059f19..438a92338dfa 100644 --- a/libc/test/src/math/RIntTest.h +++ b/libc/test/src/math/RIntTest.h @@ -70,12 +70,12 @@ class RIntTestTemplate : public __llvm_libc::testing::Test { for (int mode : ROUNDING_MODES) { __llvm_libc::fputil::set_round(mode); mpfr::RoundingMode mpfr_mode = to_mpfr_rounding_mode(mode); - ASSERT_FP_EQ(func(T(1.0)), mpfr::Round(T(1.0), mpfr_mode)); - ASSERT_FP_EQ(func(T(-1.0)), mpfr::Round(T(-1.0), mpfr_mode)); - ASSERT_FP_EQ(func(T(10.0)), mpfr::Round(T(10.0), mpfr_mode)); - ASSERT_FP_EQ(func(T(-10.0)), mpfr::Round(T(-10.0), mpfr_mode)); - ASSERT_FP_EQ(func(T(1234.0)), mpfr::Round(T(1234.0), mpfr_mode)); - ASSERT_FP_EQ(func(T(-1234.0)), mpfr::Round(T(-1234.0), mpfr_mode)); + ASSERT_FP_EQ(func(T(1.0)), mpfr::round(T(1.0), mpfr_mode)); + ASSERT_FP_EQ(func(T(-1.0)), mpfr::round(T(-1.0), mpfr_mode)); + ASSERT_FP_EQ(func(T(10.0)), mpfr::round(T(10.0), mpfr_mode)); + ASSERT_FP_EQ(func(T(-10.0)), mpfr::round(T(-10.0), mpfr_mode)); + ASSERT_FP_EQ(func(T(1234.0)), mpfr::round(T(1234.0), mpfr_mode)); + ASSERT_FP_EQ(func(T(-1234.0)), mpfr::round(T(-1234.0), mpfr_mode)); } } @@ -83,12 +83,12 @@ class RIntTestTemplate : public __llvm_libc::testing::Test { for (int mode : ROUNDING_MODES) { __llvm_libc::fputil::set_round(mode); mpfr::RoundingMode mpfr_mode = to_mpfr_rounding_mode(mode); - ASSERT_FP_EQ(func(T(0.5)), mpfr::Round(T(0.5), mpfr_mode)); - ASSERT_FP_EQ(func(T(-0.5)), mpfr::Round(T(-0.5), mpfr_mode)); - ASSERT_FP_EQ(func(T(0.115)), mpfr::Round(T(0.115), mpfr_mode)); - ASSERT_FP_EQ(func(T(-0.115)), mpfr::Round(T(-0.115), mpfr_mode)); - ASSERT_FP_EQ(func(T(0.715)), mpfr::Round(T(0.715), mpfr_mode)); - ASSERT_FP_EQ(func(T(-0.715)), mpfr::Round(T(-0.715), mpfr_mode)); + ASSERT_FP_EQ(func(T(0.5)), mpfr::round(T(0.5), mpfr_mode)); + ASSERT_FP_EQ(func(T(-0.5)), mpfr::round(T(-0.5), mpfr_mode)); + ASSERT_FP_EQ(func(T(0.115)), mpfr::round(T(0.115), mpfr_mode)); + ASSERT_FP_EQ(func(T(-0.115)), mpfr::round(T(-0.115), mpfr_mode)); + ASSERT_FP_EQ(func(T(0.715)), mpfr::round(T(0.715), mpfr_mode)); + ASSERT_FP_EQ(func(T(-0.715)), mpfr::round(T(-0.715), mpfr_mode)); } } @@ -102,7 +102,7 @@ class RIntTestTemplate : public __llvm_libc::testing::Test { for (int mode : ROUNDING_MODES) { __llvm_libc::fputil::set_round(mode); mpfr::RoundingMode mpfr_mode = to_mpfr_rounding_mode(mode); - ASSERT_FP_EQ(func(x), mpfr::Round(x, mpfr_mode)); + ASSERT_FP_EQ(func(x), mpfr::round(x, mpfr_mode)); } } } @@ -121,7 +121,7 @@ class RIntTestTemplate : public __llvm_libc::testing::Test { for (int mode : ROUNDING_MODES) { __llvm_libc::fputil::set_round(mode); mpfr::RoundingMode mpfr_mode = to_mpfr_rounding_mode(mode); - ASSERT_FP_EQ(func(x), mpfr::Round(x, mpfr_mode)); + ASSERT_FP_EQ(func(x), mpfr::round(x, mpfr_mode)); } } } diff --git a/libc/test/src/math/RoundToIntegerTest.h b/libc/test/src/math/RoundToIntegerTest.h index e95f96e92ab0..60647eaadc6b 100644 --- a/libc/test/src/math/RoundToIntegerTest.h +++ b/libc/test/src/math/RoundToIntegerTest.h @@ -141,7 +141,7 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test { F x = F(bits); long mpfr_result; - bool erangeflag = mpfr::RoundToLong(x, mpfr_result); + bool erangeflag = mpfr::round_to_long(x, mpfr_result); ASSERT_FALSE(erangeflag); test_one_input(func, x, mpfr_result, false); } @@ -163,10 +163,10 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test { long mpfr_long_result; bool erangeflag; if (TestModes) - erangeflag = - mpfr::RoundToLong(x, to_mpfr_rounding_mode(mode), mpfr_long_result); + erangeflag = mpfr::round_to_long(x, to_mpfr_rounding_mode(mode), + mpfr_long_result); else - erangeflag = mpfr::RoundToLong(x, mpfr_long_result); + erangeflag = mpfr::round_to_long(x, mpfr_long_result); ASSERT_FALSE(erangeflag); I mpfr_result = mpfr_long_result; test_one_input(func, x, mpfr_result, false); @@ -210,13 +210,13 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test { __llvm_libc::fputil::set_round(m); long mpfr_long_result; bool erangeflag = - mpfr::RoundToLong(x, to_mpfr_rounding_mode(m), mpfr_long_result); + mpfr::round_to_long(x, to_mpfr_rounding_mode(m), mpfr_long_result); ASSERT_TRUE(erangeflag); test_one_input(func, x, INTEGER_MIN, true); } } else { long mpfr_long_result; - bool erangeflag = mpfr::RoundToLong(x, mpfr_long_result); + bool erangeflag = mpfr::round_to_long(x, mpfr_long_result); ASSERT_TRUE(erangeflag); test_one_input(func, x, INTEGER_MIN, true); } @@ -280,8 +280,8 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test { if (TestModes) { for (int m : ROUNDING_MODES) { long mpfr_long_result; - bool erangeflag = - mpfr::RoundToLong(x, to_mpfr_rounding_mode(m), mpfr_long_result); + bool erangeflag = mpfr::round_to_long(x, to_mpfr_rounding_mode(m), + mpfr_long_result); I mpfr_result = mpfr_long_result; __llvm_libc::fputil::set_round(m); if (erangeflag) @@ -291,7 +291,7 @@ class RoundToIntegerTestTemplate : public __llvm_libc::testing::Test { } } else { long mpfr_long_result; - bool erangeflag = mpfr::RoundToLong(x, mpfr_long_result); + bool erangeflag = mpfr::round_to_long(x, mpfr_long_result); I mpfr_result = mpfr_long_result; if (erangeflag) test_one_input(func, x, x > 0 ? INTEGER_MAX : INTEGER_MIN, true); diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp index 7c4f061b47b5..4e25c874d122 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.cpp +++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp @@ -38,73 +38,77 @@ namespace mpfr { template struct Precision; template <> struct Precision { - static constexpr unsigned int value = 24; + static constexpr unsigned int VALUE = 24; }; template <> struct Precision { - static constexpr unsigned int value = 53; + static constexpr unsigned int VALUE = 53; }; #if !(defined(LLVM_LIBC_ARCH_X86)) template <> struct Precision { - static constexpr unsigned int value = 64; + static constexpr unsigned int VALUE = 64; }; #else template <> struct Precision { - static constexpr unsigned int value = 113; + static constexpr unsigned int VALUE = 113; }; #endif class MPFRNumber { // A precision value which allows sufficiently large additional // precision even compared to quad-precision floating point values. - unsigned int mpfrPrecision; + unsigned int mpfr_precision; mpfr_t value; public: - MPFRNumber() : mpfrPrecision(256) { mpfr_init2(value, mpfrPrecision); } + MPFRNumber() : mpfr_precision(256) { mpfr_init2(value, mpfr_precision); } // We use explicit EnableIf specializations to disallow implicit // conversions. Implicit conversions can potentially lead to loss of // precision. template ::Value, int> = 0> - explicit MPFRNumber(XType x, int precision = 128) : mpfrPrecision(precision) { - mpfr_init2(value, mpfrPrecision); + explicit MPFRNumber(XType x, int precision = 128) + : mpfr_precision(precision) { + mpfr_init2(value, mpfr_precision); mpfr_set_flt(value, x, MPFR_RNDN); } template ::Value, int> = 0> - explicit MPFRNumber(XType x, int precision = 128) : mpfrPrecision(precision) { - mpfr_init2(value, mpfrPrecision); + explicit MPFRNumber(XType x, int precision = 128) + : mpfr_precision(precision) { + mpfr_init2(value, mpfr_precision); mpfr_set_d(value, x, MPFR_RNDN); } template ::Value, int> = 0> - explicit MPFRNumber(XType x, int precision = 128) : mpfrPrecision(precision) { - mpfr_init2(value, mpfrPrecision); + explicit MPFRNumber(XType x, int precision = 128) + : mpfr_precision(precision) { + mpfr_init2(value, mpfr_precision); mpfr_set_ld(value, x, MPFR_RNDN); } template ::Value, int> = 0> - explicit MPFRNumber(XType x, int precision = 128) : mpfrPrecision(precision) { - mpfr_init2(value, mpfrPrecision); + explicit MPFRNumber(XType x, int precision = 128) + : mpfr_precision(precision) { + mpfr_init2(value, mpfr_precision); mpfr_set_sj(value, x, MPFR_RNDN); } - MPFRNumber(const MPFRNumber &other) : mpfrPrecision(other.mpfrPrecision) { - mpfr_init2(value, mpfrPrecision); + MPFRNumber(const MPFRNumber &other) : mpfr_precision(other.mpfr_precision) { + mpfr_init2(value, mpfr_precision); mpfr_set(value, other.value, MPFR_RNDN); } ~MPFRNumber() { mpfr_clear(value); } MPFRNumber &operator=(const MPFRNumber &rhs) { - mpfrPrecision = rhs.mpfrPrecision; + mpfr_precision = rhs.mpfr_precision; mpfr_set(value, rhs.value, MPFR_RNDN); return *this; } @@ -185,7 +189,7 @@ class MPFRNumber { return result; } - bool roundToLong(long &result) const { + bool roung_to_long(long &result) const { // We first calculate the rounded value. This way, when converting // to long using mpfr_get_si, the rounding direction of MPFR_RNDN // (or any other rounding mode), does not have an influence. @@ -195,10 +199,10 @@ class MPFRNumber { return mpfr_erangeflag_p(); } - bool roundToLong(mpfr_rnd_t rnd, long &result) const { + bool roung_to_long(mpfr_rnd_t rnd, long &result) const { MPFRNumber rint_result; mpfr_rint(rint_result.value, value, rnd); - return rint_result.roundToLong(result); + return rint_result.roung_to_long(result); } MPFRNumber rint(mpfr_rnd_t rnd) const { @@ -374,7 +378,7 @@ namespace internal { template cpp::EnableIfType::Value, MPFRNumber> -unaryOperation(Operation op, InputType input) { +unary_operation(Operation op, InputType input) { MPFRNumber mpfrInput(input); switch (op) { case Operation::Abs: @@ -416,7 +420,7 @@ unaryOperation(Operation op, InputType input) { template cpp::EnableIfType::Value, MPFRNumber> -unaryOperationTwoOutputs(Operation op, InputType input, int &output) { +unary_operation_two_outputs(Operation op, InputType input, int &output) { MPFRNumber mpfrInput(input); switch (op) { case Operation::Frexp: @@ -428,7 +432,7 @@ unaryOperationTwoOutputs(Operation op, InputType input, int &output) { template cpp::EnableIfType::Value, MPFRNumber> -binaryOperationOneOutput(Operation op, InputType x, InputType y) { +binary_operation_one_output(Operation op, InputType x, InputType y) { MPFRNumber inputX(x), inputY(y); switch (op) { case Operation::Hypot: @@ -440,7 +444,8 @@ binaryOperationOneOutput(Operation op, InputType x, InputType y) { template cpp::EnableIfType::Value, MPFRNumber> -binaryOperationTwoOutputs(Operation op, InputType x, InputType y, int &output) { +binary_operation_two_outputs(Operation op, InputType x, InputType y, + int &output) { MPFRNumber inputX(x), inputY(y); switch (op) { case Operation::RemQuo: @@ -452,11 +457,12 @@ binaryOperationTwoOutputs(Operation op, InputType x, InputType y, int &output) { template cpp::EnableIfType::Value, MPFRNumber> -ternaryOperationOneOutput(Operation op, InputType x, InputType y, InputType z) { +ternary_operation_one_output(Operation op, InputType x, InputType y, + InputType z) { // For FMA function, we just need to compare with the mpfr_fma with the same // precision as InputType. Using higher precision as the intermediate results // to compare might incorrectly fail due to double-rounding errors. - constexpr unsigned int prec = Precision::value; + constexpr unsigned int prec = Precision::VALUE; MPFRNumber inputX(x, prec), inputY(y, prec), inputZ(z, prec); switch (op) { case Operation::Fma: @@ -467,54 +473,55 @@ ternaryOperationOneOutput(Operation op, InputType x, InputType y, InputType z) { } template -void explainUnaryOperationSingleOutputError(Operation op, T input, T matchValue, - testutils::StreamWrapper &OS) { +void explain_unary_operation_single_output_error(Operation op, T input, + T matchValue, + testutils::StreamWrapper &OS) { MPFRNumber mpfrInput(input); - MPFRNumber mpfrResult = unaryOperation(op, input); + MPFRNumber mpfr_result = unary_operation(op, input); MPFRNumber mpfrMatchValue(matchValue); FPBits inputBits(input); FPBits matchBits(matchValue); - FPBits mpfrResultBits(mpfrResult.as()); + FPBits mpfr_resultBits(mpfr_result.as()); OS << "Match value not within tolerance value of MPFR result:\n" << " Input decimal: " << mpfrInput.str() << '\n'; __llvm_libc::fputil::testing::describeValue(" Input bits: ", input, OS); OS << '\n' << " Match decimal: " << mpfrMatchValue.str() << '\n'; __llvm_libc::fputil::testing::describeValue(" Match bits: ", matchValue, OS); - OS << '\n' << " MPFR result: " << mpfrResult.str() << '\n'; + OS << '\n' << " MPFR result: " << mpfr_result.str() << '\n'; __llvm_libc::fputil::testing::describeValue( - " MPFR rounded: ", mpfrResult.as(), OS); + " MPFR rounded: ", mpfr_result.as(), OS); OS << '\n'; - OS << " ULP error: " << std::to_string(mpfrResult.ulp(matchValue)) + OS << " ULP error: " << std::to_string(mpfr_result.ulp(matchValue)) << '\n'; } template void -explainUnaryOperationSingleOutputError(Operation op, float, float, - testutils::StreamWrapper &); -template void -explainUnaryOperationSingleOutputError(Operation op, double, double, - testutils::StreamWrapper &); -template void explainUnaryOperationSingleOutputError( +explain_unary_operation_single_output_error(Operation op, float, float, + testutils::StreamWrapper &); +template void explain_unary_operation_single_output_error( + Operation op, double, double, testutils::StreamWrapper &); +template void explain_unary_operation_single_output_error( Operation op, long double, long double, testutils::StreamWrapper &); template -void explainUnaryOperationTwoOutputsError(Operation op, T input, - const BinaryOutput &libcResult, - testutils::StreamWrapper &OS) { +void explain_unary_operation_two_outputs_error( + Operation op, T input, const BinaryOutput &libc_result, + testutils::StreamWrapper &OS) { MPFRNumber mpfrInput(input); FPBits inputBits(input); int mpfrIntResult; - MPFRNumber mpfrResult = unaryOperationTwoOutputs(op, input, mpfrIntResult); + MPFRNumber mpfr_result = + unary_operation_two_outputs(op, input, mpfrIntResult); - if (mpfrIntResult != libcResult.i) { + if (mpfrIntResult != libc_result.i) { OS << "MPFR integral result: " << mpfrIntResult << '\n' - << "Libc integral result: " << libcResult.i << '\n'; + << "Libc integral result: " << libc_result.i << '\n'; } else { OS << "Integral result from libc matches integral result from MPFR.\n"; } - MPFRNumber mpfrMatchValue(libcResult.f); + MPFRNumber mpfrMatchValue(libc_result.f); OS << "Libc floating point result is not within tolerance value of the MPFR " << "result.\n\n"; @@ -522,74 +529,73 @@ void explainUnaryOperationTwoOutputsError(Operation op, T input, OS << "Libc floating point value: " << mpfrMatchValue.str() << '\n'; __llvm_libc::fputil::testing::describeValue( - " Libc floating point bits: ", libcResult.f, OS); + " Libc floating point bits: ", libc_result.f, OS); OS << "\n\n"; - OS << " MPFR result: " << mpfrResult.str() << '\n'; + OS << " MPFR result: " << mpfr_result.str() << '\n'; __llvm_libc::fputil::testing::describeValue( - " MPFR rounded: ", mpfrResult.as(), OS); + " MPFR rounded: ", mpfr_result.as(), OS); OS << '\n' << " ULP error: " - << std::to_string(mpfrResult.ulp(libcResult.f)) << '\n'; + << std::to_string(mpfr_result.ulp(libc_result.f)) << '\n'; } -template void explainUnaryOperationTwoOutputsError( +template void explain_unary_operation_two_outputs_error( Operation, float, const BinaryOutput &, testutils::StreamWrapper &); template void -explainUnaryOperationTwoOutputsError(Operation, double, - const BinaryOutput &, - testutils::StreamWrapper &); -template void explainUnaryOperationTwoOutputsError( +explain_unary_operation_two_outputs_error(Operation, double, + const BinaryOutput &, + testutils::StreamWrapper &); +template void explain_unary_operation_two_outputs_error( Operation, long double, const BinaryOutput &, testutils::StreamWrapper &); template -void explainBinaryOperationTwoOutputsError(Operation op, - const BinaryInput &input, - const BinaryOutput &libcResult, - testutils::StreamWrapper &OS) { +void explain_binary_operation_two_outputs_error( + Operation op, const BinaryInput &input, + const BinaryOutput &libc_result, testutils::StreamWrapper &OS) { MPFRNumber mpfrX(input.x); MPFRNumber mpfrY(input.y); FPBits xbits(input.x); FPBits ybits(input.y); int mpfrIntResult; - MPFRNumber mpfrResult = - binaryOperationTwoOutputs(op, input.x, input.y, mpfrIntResult); - MPFRNumber mpfrMatchValue(libcResult.f); + MPFRNumber mpfr_result = + binary_operation_two_outputs(op, input.x, input.y, mpfrIntResult); + MPFRNumber mpfrMatchValue(libc_result.f); OS << "Input decimal: x: " << mpfrX.str() << " y: " << mpfrY.str() << '\n' << "MPFR integral result: " << mpfrIntResult << '\n' - << "Libc integral result: " << libcResult.i << '\n' + << "Libc integral result: " << libc_result.i << '\n' << "Libc floating point result: " << mpfrMatchValue.str() << '\n' - << " MPFR result: " << mpfrResult.str() << '\n'; + << " MPFR result: " << mpfr_result.str() << '\n'; __llvm_libc::fputil::testing::describeValue( - "Libc floating point result bits: ", libcResult.f, OS); + "Libc floating point result bits: ", libc_result.f, OS); __llvm_libc::fputil::testing::describeValue( - " MPFR rounded bits: ", mpfrResult.as(), OS); - OS << "ULP error: " << std::to_string(mpfrResult.ulp(libcResult.f)) << '\n'; + " MPFR rounded bits: ", mpfr_result.as(), OS); + OS << "ULP error: " << std::to_string(mpfr_result.ulp(libc_result.f)) << '\n'; } -template void explainBinaryOperationTwoOutputsError( +template void explain_binary_operation_two_outputs_error( Operation, const BinaryInput &, const BinaryOutput &, testutils::StreamWrapper &); -template void explainBinaryOperationTwoOutputsError( +template void explain_binary_operation_two_outputs_error( Operation, const BinaryInput &, const BinaryOutput &, testutils::StreamWrapper &); -template void explainBinaryOperationTwoOutputsError( +template void explain_binary_operation_two_outputs_error( Operation, const BinaryInput &, const BinaryOutput &, testutils::StreamWrapper &); template -void explainBinaryOperationOneOutputError(Operation op, - const BinaryInput &input, - T libcResult, - testutils::StreamWrapper &OS) { +void explain_binary_operation_one_output_error(Operation op, + const BinaryInput &input, + T libc_result, + testutils::StreamWrapper &OS) { MPFRNumber mpfrX(input.x); MPFRNumber mpfrY(input.y); FPBits xbits(input.x); FPBits ybits(input.y); - MPFRNumber mpfrResult = binaryOperationOneOutput(op, input.x, input.y); - MPFRNumber mpfrMatchValue(libcResult); + MPFRNumber mpfr_result = binary_operation_one_output(op, input.x, input.y); + MPFRNumber mpfrMatchValue(libc_result); OS << "Input decimal: x: " << mpfrX.str() << " y: " << mpfrY.str() << '\n'; __llvm_libc::fputil::testing::describeValue("First input bits: ", input.x, @@ -598,36 +604,36 @@ void explainBinaryOperationOneOutputError(Operation op, OS); OS << "Libc result: " << mpfrMatchValue.str() << '\n' - << "MPFR result: " << mpfrResult.str() << '\n'; + << "MPFR result: " << mpfr_result.str() << '\n'; __llvm_libc::fputil::testing::describeValue( - "Libc floating point result bits: ", libcResult, OS); + "Libc floating point result bits: ", libc_result, OS); __llvm_libc::fputil::testing::describeValue( - " MPFR rounded bits: ", mpfrResult.as(), OS); - OS << "ULP error: " << std::to_string(mpfrResult.ulp(libcResult)) << '\n'; + " MPFR rounded bits: ", mpfr_result.as(), OS); + OS << "ULP error: " << std::to_string(mpfr_result.ulp(libc_result)) << '\n'; } -template void explainBinaryOperationOneOutputError( +template void explain_binary_operation_one_output_error( Operation, const BinaryInput &, float, testutils::StreamWrapper &); -template void explainBinaryOperationOneOutputError( +template void explain_binary_operation_one_output_error( Operation, const BinaryInput &, double, testutils::StreamWrapper &); -template void explainBinaryOperationOneOutputError( +template void explain_binary_operation_one_output_error( Operation, const BinaryInput &, long double, testutils::StreamWrapper &); template -void explainTernaryOperationOneOutputError(Operation op, - const TernaryInput &input, - T libcResult, - testutils::StreamWrapper &OS) { - MPFRNumber mpfrX(input.x, Precision::value); - MPFRNumber mpfrY(input.y, Precision::value); - MPFRNumber mpfrZ(input.z, Precision::value); +void explain_ternary_operation_one_output_error(Operation op, + const TernaryInput &input, + T libc_result, + testutils::StreamWrapper &OS) { + MPFRNumber mpfrX(input.x, Precision::VALUE); + MPFRNumber mpfrY(input.y, Precision::VALUE); + MPFRNumber mpfrZ(input.z, Precision::VALUE); FPBits xbits(input.x); FPBits ybits(input.y); FPBits zbits(input.z); - MPFRNumber mpfrResult = - ternaryOperationOneOutput(op, input.x, input.y, input.z); - MPFRNumber mpfrMatchValue(libcResult); + MPFRNumber mpfr_result = + ternary_operation_one_output(op, input.x, input.y, input.z); + MPFRNumber mpfrMatchValue(libc_result); OS << "Input decimal: x: " << mpfrX.str() << " y: " << mpfrY.str() << " z: " << mpfrZ.str() << '\n'; @@ -639,144 +645,142 @@ void explainTernaryOperationOneOutputError(Operation op, OS); OS << "Libc result: " << mpfrMatchValue.str() << '\n' - << "MPFR result: " << mpfrResult.str() << '\n'; + << "MPFR result: " << mpfr_result.str() << '\n'; __llvm_libc::fputil::testing::describeValue( - "Libc floating point result bits: ", libcResult, OS); + "Libc floating point result bits: ", libc_result, OS); __llvm_libc::fputil::testing::describeValue( - " MPFR rounded bits: ", mpfrResult.as(), OS); - OS << "ULP error: " << std::to_string(mpfrResult.ulp(libcResult)) << '\n'; + " MPFR rounded bits: ", mpfr_result.as(), OS); + OS << "ULP error: " << std::to_string(mpfr_result.ulp(libc_result)) << '\n'; } -template void explainTernaryOperationOneOutputError( +template void explain_ternary_operation_one_output_error( Operation, const TernaryInput &, float, testutils::StreamWrapper &); -template void explainTernaryOperationOneOutputError( +template void explain_ternary_operation_one_output_error( Operation, const TernaryInput &, double, testutils::StreamWrapper &); -template void explainTernaryOperationOneOutputError( +template void explain_ternary_operation_one_output_error( Operation, const TernaryInput &, long double, testutils::StreamWrapper &); template -bool compareUnaryOperationSingleOutput(Operation op, T input, T libcResult, - double ulpError) { +bool compare_unary_operation_single_output(Operation op, T input, T libc_result, + double ulp_error) { // If the ulp error is exactly 0.5 (i.e a tie), we would check that the result // is rounded to the nearest even. - MPFRNumber mpfrResult = unaryOperation(op, input); - double ulp = mpfrResult.ulp(libcResult); - bool bitsAreEven = ((FPBits(libcResult).uintval() & 1) == 0); - return (ulp < ulpError) || - ((ulp == ulpError) && ((ulp != 0.5) || bitsAreEven)); + MPFRNumber mpfr_result = unary_operation(op, input); + double ulp = mpfr_result.ulp(libc_result); + bool bits_are_even = ((FPBits(libc_result).uintval() & 1) == 0); + return (ulp < ulp_error) || + ((ulp == ulp_error) && ((ulp != 0.5) || bits_are_even)); } -template bool compareUnaryOperationSingleOutput(Operation, float, float, - double); -template bool compareUnaryOperationSingleOutput(Operation, double, - double, double); -template bool compareUnaryOperationSingleOutput(Operation, - long double, - long double, - double); +template bool compare_unary_operation_single_output(Operation, float, + float, double); +template bool compare_unary_operation_single_output(Operation, double, + double, double); +template bool compare_unary_operation_single_output(Operation, + long double, + long double, + double); template -bool compareUnaryOperationTwoOutputs(Operation op, T input, - const BinaryOutput &libcResult, - double ulpError) { +bool compare_unary_operation_two_outputs(Operation op, T input, + const BinaryOutput &libc_result, + double ulp_error) { int mpfrIntResult; - MPFRNumber mpfrResult = unaryOperationTwoOutputs(op, input, mpfrIntResult); - double ulp = mpfrResult.ulp(libcResult.f); + MPFRNumber mpfr_result = + unary_operation_two_outputs(op, input, mpfrIntResult); + double ulp = mpfr_result.ulp(libc_result.f); - if (mpfrIntResult != libcResult.i) + if (mpfrIntResult != libc_result.i) return false; - bool bitsAreEven = ((FPBits(libcResult.f).uintval() & 1) == 0); - return (ulp < ulpError) || - ((ulp == ulpError) && ((ulp != 0.5) || bitsAreEven)); + bool bits_are_even = ((FPBits(libc_result.f).uintval() & 1) == 0); + return (ulp < ulp_error) || + ((ulp == ulp_error) && ((ulp != 0.5) || bits_are_even)); } template bool -compareUnaryOperationTwoOutputs(Operation, float, - const BinaryOutput &, double); -template bool -compareUnaryOperationTwoOutputs(Operation, double, - const BinaryOutput &, double); -template bool compareUnaryOperationTwoOutputs( +compare_unary_operation_two_outputs(Operation, float, + const BinaryOutput &, double); +template bool compare_unary_operation_two_outputs( + Operation, double, const BinaryOutput &, double); +template bool compare_unary_operation_two_outputs( Operation, long double, const BinaryOutput &, double); template -bool compareBinaryOperationTwoOutputs(Operation op, const BinaryInput &input, - const BinaryOutput &libcResult, - double ulpError) { +bool compare_binary_operation_two_outputs(Operation op, + const BinaryInput &input, + const BinaryOutput &libc_result, + double ulp_error) { int mpfrIntResult; - MPFRNumber mpfrResult = - binaryOperationTwoOutputs(op, input.x, input.y, mpfrIntResult); - double ulp = mpfrResult.ulp(libcResult.f); + MPFRNumber mpfr_result = + binary_operation_two_outputs(op, input.x, input.y, mpfrIntResult); + double ulp = mpfr_result.ulp(libc_result.f); - if (mpfrIntResult != libcResult.i) { + if (mpfrIntResult != libc_result.i) { if (op == Operation::RemQuo) { - if ((0x7 & mpfrIntResult) != (0x7 & libcResult.i)) + if ((0x7 & mpfrIntResult) != (0x7 & libc_result.i)) return false; } else { return false; } } - bool bitsAreEven = ((FPBits(libcResult.f).uintval() & 1) == 0); - return (ulp < ulpError) || - ((ulp == ulpError) && ((ulp != 0.5) || bitsAreEven)); + bool bits_are_even = ((FPBits(libc_result.f).uintval() & 1) == 0); + return (ulp < ulp_error) || + ((ulp == ulp_error) && ((ulp != 0.5) || bits_are_even)); } -template bool -compareBinaryOperationTwoOutputs(Operation, const BinaryInput &, - const BinaryOutput &, double); -template bool -compareBinaryOperationTwoOutputs(Operation, const BinaryInput &, - const BinaryOutput &, double); -template bool compareBinaryOperationTwoOutputs( +template bool compare_binary_operation_two_outputs( + Operation, const BinaryInput &, const BinaryOutput &, double); +template bool compare_binary_operation_two_outputs( + Operation, const BinaryInput &, const BinaryOutput &, + double); +template bool compare_binary_operation_two_outputs( Operation, const BinaryInput &, const BinaryOutput &, double); template -bool compareBinaryOperationOneOutput(Operation op, const BinaryInput &input, - T libcResult, double ulpError) { - MPFRNumber mpfrResult = binaryOperationOneOutput(op, input.x, input.y); - double ulp = mpfrResult.ulp(libcResult); - - bool bitsAreEven = ((FPBits(libcResult).uintval() & 1) == 0); - return (ulp < ulpError) || - ((ulp == ulpError) && ((ulp != 0.5) || bitsAreEven)); +bool compare_binary_operation_one_output(Operation op, + const BinaryInput &input, + T libc_result, double ulp_error) { + MPFRNumber mpfr_result = binary_operation_one_output(op, input.x, input.y); + double ulp = mpfr_result.ulp(libc_result); + + bool bits_are_even = ((FPBits(libc_result).uintval() & 1) == 0); + return (ulp < ulp_error) || + ((ulp == ulp_error) && ((ulp != 0.5) || bits_are_even)); } -template bool compareBinaryOperationOneOutput(Operation, - const BinaryInput &, - float, double); -template bool -compareBinaryOperationOneOutput(Operation, const BinaryInput &, - double, double); -template bool compareBinaryOperationOneOutput( +template bool compare_binary_operation_one_output( + Operation, const BinaryInput &, float, double); +template bool compare_binary_operation_one_output( + Operation, const BinaryInput &, double, double); +template bool compare_binary_operation_one_output( Operation, const BinaryInput &, long double, double); template -bool compareTernaryOperationOneOutput(Operation op, - const TernaryInput &input, - T libcResult, double ulpError) { - MPFRNumber mpfrResult = - ternaryOperationOneOutput(op, input.x, input.y, input.z); - double ulp = mpfrResult.ulp(libcResult); - - bool bitsAreEven = ((FPBits(libcResult).uintval() & 1) == 0); - return (ulp < ulpError) || - ((ulp == ulpError) && ((ulp != 0.5) || bitsAreEven)); +bool compare_ternary_operation_one_output(Operation op, + const TernaryInput &input, + T libc_result, double ulp_error) { + MPFRNumber mpfr_result = + ternary_operation_one_output(op, input.x, input.y, input.z); + double ulp = mpfr_result.ulp(libc_result); + + bool bits_are_even = ((FPBits(libc_result).uintval() & 1) == 0); + return (ulp < ulp_error) || + ((ulp == ulp_error) && ((ulp != 0.5) || bits_are_even)); } -template bool -compareTernaryOperationOneOutput(Operation, const TernaryInput &, - float, double); -template bool compareTernaryOperationOneOutput( +template bool compare_ternary_operation_one_output( + Operation, const TernaryInput &, float, double); +template bool compare_ternary_operation_one_output( Operation, const TernaryInput &, double, double); -template bool compareTernaryOperationOneOutput( +template bool compare_ternary_operation_one_output( Operation, const TernaryInput &, long double, double); -static mpfr_rnd_t getMPFRRoundingMode(RoundingMode mode) { +static mpfr_rnd_t get_mpfr_rounding_mode(RoundingMode mode) { switch (mode) { case RoundingMode::Upward: return MPFR_RNDU; @@ -795,33 +799,33 @@ static mpfr_rnd_t getMPFRRoundingMode(RoundingMode mode) { } // namespace internal -template bool RoundToLong(T x, long &result) { +template bool round_to_long(T x, long &result) { MPFRNumber mpfr(x); - return mpfr.roundToLong(result); + return mpfr.roung_to_long(result); } -template bool RoundToLong(float, long &); -template bool RoundToLong(double, long &); -template bool RoundToLong(long double, long &); +template bool round_to_long(float, long &); +template bool round_to_long(double, long &); +template bool round_to_long(long double, long &); -template bool RoundToLong(T x, RoundingMode mode, long &result) { +template bool round_to_long(T x, RoundingMode mode, long &result) { MPFRNumber mpfr(x); - return mpfr.roundToLong(internal::getMPFRRoundingMode(mode), result); + return mpfr.roung_to_long(internal::get_mpfr_rounding_mode(mode), result); } -template bool RoundToLong(float, RoundingMode, long &); -template bool RoundToLong(double, RoundingMode, long &); -template bool RoundToLong(long double, RoundingMode, long &); +template bool round_to_long(float, RoundingMode, long &); +template bool round_to_long(double, RoundingMode, long &); +template bool round_to_long(long double, RoundingMode, long &); -template T Round(T x, RoundingMode mode) { +template T round(T x, RoundingMode mode) { MPFRNumber mpfr(x); - MPFRNumber result = mpfr.rint(internal::getMPFRRoundingMode(mode)); + MPFRNumber result = mpfr.rint(internal::get_mpfr_rounding_mode(mode)); return result.as(); } -template float Round(float, RoundingMode); -template double Round(double, RoundingMode); -template long double Round(long double, RoundingMode); +template float round(float, RoundingMode); +template double round(double, RoundingMode); +template long double round(long double, RoundingMode); } // namespace mpfr } // namespace testing diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h index ff35a56231bb..c52f2a76d9c6 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.h +++ b/libc/utils/MPFRWrapper/MPFRUtils.h @@ -98,132 +98,137 @@ namespace internal { template struct AreMatchingBinaryInputAndBinaryOutput { - static constexpr bool value = false; + static constexpr bool VALUE = false; }; template struct AreMatchingBinaryInputAndBinaryOutput, BinaryOutput> { - static constexpr bool value = cpp::IsFloatingPointType::Value; + static constexpr bool VALUE = cpp::IsFloatingPointType::Value; }; template -bool compareUnaryOperationSingleOutput(Operation op, T input, T libcOutput, - double t); +bool compare_unary_operation_single_output(Operation op, T input, T libc_output, + double t); template -bool compareUnaryOperationTwoOutputs(Operation op, T input, - const BinaryOutput &libcOutput, - double t); +bool compare_unary_operation_two_outputs(Operation op, T input, + const BinaryOutput &libc_output, + double t); template -bool compareBinaryOperationTwoOutputs(Operation op, const BinaryInput &input, - const BinaryOutput &libcOutput, - double t); +bool compare_binary_operation_two_outputs(Operation op, + const BinaryInput &input, + const BinaryOutput &libc_output, + double t); template -bool compareBinaryOperationOneOutput(Operation op, const BinaryInput &input, - T libcOutput, double t); +bool compare_binary_operation_one_output(Operation op, + const BinaryInput &input, + T libc_output, double t); template -bool compareTernaryOperationOneOutput(Operation op, - const TernaryInput &input, - T libcOutput, double t); +bool compare_ternary_operation_one_output(Operation op, + const TernaryInput &input, + T libc_output, double t); template -void explainUnaryOperationSingleOutputError(Operation op, T input, T matchValue, - testutils::StreamWrapper &OS); +void explain_unary_operation_single_output_error(Operation op, T input, + T match_value, + testutils::StreamWrapper &OS); template -void explainUnaryOperationTwoOutputsError(Operation op, T input, - const BinaryOutput &matchValue, - testutils::StreamWrapper &OS); +void explain_unary_operation_two_outputs_error( + Operation op, T input, const BinaryOutput &match_value, + testutils::StreamWrapper &OS); template -void explainBinaryOperationTwoOutputsError(Operation op, - const BinaryInput &input, - const BinaryOutput &matchValue, - testutils::StreamWrapper &OS); +void explain_binary_operation_two_outputs_error( + Operation op, const BinaryInput &input, + const BinaryOutput &match_value, testutils::StreamWrapper &OS); template -void explainBinaryOperationOneOutputError(Operation op, - const BinaryInput &input, - T matchValue, - testutils::StreamWrapper &OS); +void explain_binary_operation_one_output_error(Operation op, + const BinaryInput &input, + T match_value, + testutils::StreamWrapper &OS); template -void explainTernaryOperationOneOutputError(Operation op, - const TernaryInput &input, - T matchValue, - testutils::StreamWrapper &OS); +void explain_ternary_operation_one_output_error(Operation op, + const TernaryInput &input, + T match_value, + testutils::StreamWrapper &OS); template class MPFRMatcher : public testing::Matcher { InputType input; - OutputType matchValue; - double ulpTolerance; + OutputType match_value; + double ulp_tolerance; public: - MPFRMatcher(InputType testInput, double ulpTolerance) - : input(testInput), ulpTolerance(ulpTolerance) {} + MPFRMatcher(InputType testInput, double ulp_tolerance) + : input(testInput), ulp_tolerance(ulp_tolerance) {} bool match(OutputType libcResult) { - matchValue = libcResult; - return match(input, matchValue, ulpTolerance); + match_value = libcResult; + return match(input, match_value, ulp_tolerance); } - void explainError(testutils::StreamWrapper &OS) override { - explainError(input, matchValue, OS); + // This method is marked with NOLINT because it the name `explainError` + // does not confirm to the coding style. + void explainError(testutils::StreamWrapper &OS) override { // NOLINT + explain_error(input, match_value, OS); } private: template static bool match(T in, T out, double tolerance) { - return compareUnaryOperationSingleOutput(op, in, out, tolerance); + return compare_unary_operation_single_output(op, in, out, tolerance); } template static bool match(T in, const BinaryOutput &out, double tolerance) { - return compareUnaryOperationTwoOutputs(op, in, out, tolerance); + return compare_unary_operation_two_outputs(op, in, out, tolerance); } template static bool match(const BinaryInput &in, T out, double tolerance) { - return compareBinaryOperationOneOutput(op, in, out, tolerance); + return compare_binary_operation_one_output(op, in, out, tolerance); } template static bool match(BinaryInput in, const BinaryOutput &out, double tolerance) { - return compareBinaryOperationTwoOutputs(op, in, out, tolerance); + return compare_binary_operation_two_outputs(op, in, out, tolerance); } template static bool match(const TernaryInput &in, T out, double tolerance) { - return compareTernaryOperationOneOutput(op, in, out, tolerance); + return compare_ternary_operation_one_output(op, in, out, tolerance); } template - static void explainError(T in, T out, testutils::StreamWrapper &OS) { - explainUnaryOperationSingleOutputError(op, in, out, OS); + static void explain_error(T in, T out, testutils::StreamWrapper &OS) { + explain_unary_operation_single_output_error(op, in, out, OS); } template - static void explainError(T in, const BinaryOutput &out, - testutils::StreamWrapper &OS) { - explainUnaryOperationTwoOutputsError(op, in, out, OS); + static void explain_error(T in, const BinaryOutput &out, + testutils::StreamWrapper &OS) { + explain_unary_operation_two_outputs_error(op, in, out, OS); } template - static void explainError(const BinaryInput &in, const BinaryOutput &out, - testutils::StreamWrapper &OS) { - explainBinaryOperationTwoOutputsError(op, in, out, OS); + static void explain_error(const BinaryInput &in, + const BinaryOutput &out, + testutils::StreamWrapper &OS) { + explain_binary_operation_two_outputs_error(op, in, out, OS); } template - static void explainError(const BinaryInput &in, T out, - testutils::StreamWrapper &OS) { - explainBinaryOperationOneOutputError(op, in, out, OS); + static void explain_error(const BinaryInput &in, T out, + testutils::StreamWrapper &OS) { + explain_binary_operation_one_output_error(op, in, out, OS); } template - static void explainError(const TernaryInput &in, T out, - testutils::StreamWrapper &OS) { - explainTernaryOperationOneOutputError(op, in, out, OS); + static void explain_error(const TernaryInput &in, T out, + testutils::StreamWrapper &OS) { + explain_ternary_operation_one_output_error(op, in, out, OS); } }; @@ -232,7 +237,7 @@ class MPFRMatcher : public testing::Matcher { // Return true if the input and ouput types for the operation op are valid // types. template -constexpr bool isValidOperation() { +constexpr bool is_valid_operation() { return (Operation::BeginUnaryOperationsSingleOutput < op && op < Operation::EndUnaryOperationsSingleOutput && cpp::IsSame::Value && @@ -248,7 +253,7 @@ constexpr bool isValidOperation() { (Operation::BeginBinaryOperationsTwoOutputs < op && op < Operation::EndBinaryOperationsTwoOutputs && internal::AreMatchingBinaryInputAndBinaryOutput::value) || + OutputType>::VALUE) || (Operation::BeginTernaryOperationsSingleOuput < op && op < Operation::EndTernaryOperationsSingleOutput && cpp::IsFloatingPointType::Value && @@ -257,29 +262,29 @@ constexpr bool isValidOperation() { template __attribute__((no_sanitize("address"))) -cpp::EnableIfType(), +cpp::EnableIfType(), internal::MPFRMatcher> -getMPFRMatcher(InputType input, OutputType outputUnused, double t) { +get_mpfr_matcher(InputType input, OutputType output_unused, double t) { return internal::MPFRMatcher(input, t); } enum class RoundingMode : uint8_t { Upward, Downward, TowardZero, Nearest }; -template T Round(T x, RoundingMode mode); +template T round(T x, RoundingMode mode); -template bool RoundToLong(T x, long &result); -template bool RoundToLong(T x, RoundingMode mode, long &result); +template bool round_to_long(T x, long &result); +template bool round_to_long(T x, RoundingMode mode, long &result); } // namespace mpfr } // namespace testing } // namespace __llvm_libc -#define EXPECT_MPFR_MATCH(op, input, matchValue, tolerance) \ - EXPECT_THAT(matchValue, __llvm_libc::testing::mpfr::getMPFRMatcher( \ - input, matchValue, tolerance)) +#define EXPECT_MPFR_MATCH(op, input, match_value, tolerance) \ + EXPECT_THAT(match_value, __llvm_libc::testing::mpfr::get_mpfr_matcher( \ + input, match_value, tolerance)) -#define ASSERT_MPFR_MATCH(op, input, matchValue, tolerance) \ - ASSERT_THAT(matchValue, __llvm_libc::testing::mpfr::getMPFRMatcher( \ - input, matchValue, tolerance)) +#define ASSERT_MPFR_MATCH(op, input, match_value, tolerance) \ + ASSERT_THAT(match_value, __llvm_libc::testing::mpfr::get_mpfr_matcher( \ + input, match_value, tolerance)) #endif // LLVM_LIBC_UTILS_TESTUTILS_MPFRUTILS_H From 8a26ba6a02f1319ddaca017bbda81c2b82fb4050 Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Thu, 23 Dec 2021 15:18:03 -0800 Subject: [PATCH 189/293] Load binary by UUID from qProcessInfo packet fields Support three new keys in the qProcessInfo response from the remote gdb stub to handle the case of attaching to a core running some type of standalone/firmware code and the stub knows the UUID and load address-or-slide for the binary. There will be no proper DynamicLoader plugin in this scenario, but we can try to locate and load the binary into lldb at the correct offset. Differential Revision: https://reviews.llvm.org/D116211 rdar://75191077 --- lldb/docs/lldb-gdb-remote.txt | 3 + .../GDBRemoteCommunicationClient.cpp | 36 ++++++++ .../gdb-remote/GDBRemoteCommunicationClient.h | 6 ++ .../Process/gdb-remote/ProcessGDBRemote.cpp | 88 +++++++++++++++++++ 4 files changed, 133 insertions(+) diff --git a/lldb/docs/lldb-gdb-remote.txt b/lldb/docs/lldb-gdb-remote.txt index 17588021e313..980dc77c86f5 100644 --- a/lldb/docs/lldb-gdb-remote.txt +++ b/lldb/docs/lldb-gdb-remote.txt @@ -1003,6 +1003,9 @@ vendor: is a string that represents the vendor (apple) endian: is one of "little", "big", or "pdp" ptrsize: is a number that represents how big pointers are in bytes +main-binary-uuid: is the UUID of a firmware type binary that the gdb stub knows about +main-binary-address: is the load address of the firmware type binary +main-binary-slide: is the slide of the firmware type binary, if address isn't known //---------------------------------------------------------------------- // "qShlibInfoAddr" diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp index 07dfa5e04ee5..b5b105351de5 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp @@ -1006,6 +1006,23 @@ GDBRemoteCommunicationClient::GetProcessArchitecture() { return m_process_arch; } +bool GDBRemoteCommunicationClient::GetProcessStandaloneBinary( + UUID &uuid, addr_t &value, bool &value_is_offset) { + if (m_qProcessInfo_is_valid == eLazyBoolCalculate) + GetCurrentProcessInfo(); + + // Return true if we have a UUID or an address/offset of the + // main standalone / firmware binary being used. + if (!m_process_standalone_uuid.IsValid() && + m_process_standalone_value == LLDB_INVALID_ADDRESS) + return false; + + uuid = m_process_standalone_uuid; + value = m_process_standalone_value; + value_is_offset = m_process_standalone_value_is_offset; + return true; +} + bool GDBRemoteCommunicationClient::GetGDBServerVersion() { if (m_qGDBServerVersion_is_valid == eLazyBoolCalculate) { m_gdb_server_name.clear(); @@ -2147,6 +2164,25 @@ bool GDBRemoteCommunicationClient::GetCurrentProcessInfo(bool allow_lazy) { } else if (name.equals("elf_abi")) { elf_abi = std::string(value); ++num_keys_decoded; + } else if (name.equals("main-binary-uuid")) { + m_process_standalone_uuid.SetFromStringRef(value); + ++num_keys_decoded; + } else if (name.equals("main-binary-slide")) { + StringExtractor extractor(value); + m_process_standalone_value = + extractor.GetU64(LLDB_INVALID_ADDRESS, 16); + if (m_process_standalone_value != LLDB_INVALID_ADDRESS) { + m_process_standalone_value_is_offset = true; + ++num_keys_decoded; + } + } else if (name.equals("main-binary-address")) { + StringExtractor extractor(value); + m_process_standalone_value = + extractor.GetU64(LLDB_INVALID_ADDRESS, 16); + if (m_process_standalone_value != LLDB_INVALID_ADDRESS) { + m_process_standalone_value_is_offset = false; + ++num_keys_decoded; + } } } if (num_keys_decoded > 0) diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h index 6765372ce124..c69c33bb1c15 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h @@ -217,6 +217,9 @@ class GDBRemoteCommunicationClient : public GDBRemoteClientBase { const ArchSpec &GetProcessArchitecture(); + bool GetProcessStandaloneBinary(UUID &uuid, lldb::addr_t &value, + bool &value_is_offset); + void GetRemoteQSupported(); bool GetVContSupported(char flavor); @@ -584,6 +587,9 @@ class GDBRemoteCommunicationClient : public GDBRemoteClientBase { ArchSpec m_host_arch; ArchSpec m_process_arch; + UUID m_process_standalone_uuid; + lldb::addr_t m_process_standalone_value = LLDB_INVALID_ADDRESS; + bool m_process_standalone_value_is_offset = false; llvm::VersionTuple m_os_version; llvm::VersionTuple m_maccatalyst_version; std::string m_os_build; diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index 89702d176ef8..93fe36c0d9d6 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -561,6 +561,94 @@ Status ProcessGDBRemote::DoConnectRemote(llvm::StringRef remote_url) { } } + // The remote stub may know about the "main binary" in + // the context of a firmware debug session, and can + // give us a UUID and an address/slide of where the + // binary is loaded in memory. + UUID standalone_uuid; + addr_t standalone_value; + bool standalone_value_is_offset; + if (m_gdb_comm.GetProcessStandaloneBinary( + standalone_uuid, standalone_value, standalone_value_is_offset)) { + ModuleSP module_sp; + + if (standalone_uuid.IsValid()) { + ModuleSpec module_spec; + module_spec.GetUUID() = standalone_uuid; + + // Look up UUID in global module cache before attempting + // a more expensive search. + Status error = ModuleList::GetSharedModule(module_spec, module_sp, + nullptr, nullptr, nullptr); + + if (!module_sp) { + // Force a an external lookup, if that tool is available. + if (!module_spec.GetSymbolFileSpec()) + Symbols::DownloadObjectAndSymbolFile(module_spec, true); + + if (FileSystem::Instance().Exists(module_spec.GetFileSpec())) { + module_sp = std::make_shared(module_spec); + } + } + + // If we couldn't find the binary anywhere else, as a last resort, + // read it out of memory. + if (!module_sp.get() && standalone_value != LLDB_INVALID_ADDRESS && + !standalone_value_is_offset) { + char namebuf[80]; + snprintf(namebuf, sizeof(namebuf), "mem-image-0x%" PRIx64, + standalone_value); + module_sp = + ReadModuleFromMemory(FileSpec(namebuf), standalone_value); + } + + Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet( + LIBLLDB_LOG_DYNAMIC_LOADER)); + if (module_sp.get()) { + target.GetImages().AppendIfNeeded(module_sp, false); + + bool changed = false; + if (module_sp->GetObjectFile()) { + if (standalone_value != LLDB_INVALID_ADDRESS) { + if (log) + log->Printf("Loading binary UUID %s at %s 0x%" PRIx64, + standalone_uuid.GetAsString().c_str(), + standalone_value_is_offset ? "offset" : "address", + standalone_value); + module_sp->SetLoadAddress(target, standalone_value, + standalone_value_is_offset, changed); + } else { + // No address/offset/slide, load the binary at file address, + // offset 0. + if (log) + log->Printf("Loading binary UUID %s at file address", + standalone_uuid.GetAsString().c_str()); + const bool value_is_slide = true; + module_sp->SetLoadAddress(target, 0, value_is_slide, changed); + } + } else { + // In-memory image, load at its true address, offset 0. + if (log) + log->Printf("Loading binary UUID %s from memory", + standalone_uuid.GetAsString().c_str()); + const bool value_is_slide = true; + module_sp->SetLoadAddress(target, 0, value_is_slide, changed); + } + + ModuleList added_module; + added_module.Append(module_sp, false); + target.ModulesDidLoad(added_module); + } else { + if (log) + log->Printf("Unable to find binary with UUID %s and load it at " + "%s 0x%" PRIx64, + standalone_uuid.GetAsString().c_str(), + standalone_value_is_offset ? "offset" : "address", + standalone_value); + } + } + } + const StateType state = SetThreadStopInfo(response); if (state != eStateInvalid) { SetPrivateState(state); From 735fe1da6b9710d68f93ec3818e18f8aa2da11d0 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Fri, 24 Dec 2021 00:09:15 +0000 Subject: [PATCH 190/293] Revert "[mlir][tosa] Split tosa-to-linalg named ops out of pass" This reverts commit 313de31fbb757643db13bcb47f8fe515039e298a. There is a missing CMake dependency, building with shared libraries is broken: 55.509 [45/4/3061] Linking CXX shared library lib/libMLIRTosaToLinalg.so.14git FAILED: lib/libMLIRTosaToLinalg.so.14git ... TosaToLinalgPass.cpp: undefined reference to `mlir::createCanonicalizerPass()' --- mlir/include/mlir/Conversion/Passes.td | 14 - .../Conversion/TosaToLinalg/TosaToLinalg.h | 4 - .../Conversion/TosaToLinalg/CMakeLists.txt | 2 - .../Conversion/TosaToLinalg/TosaToLinalg.cpp | 814 +++++++++++++++- .../TosaToLinalg/TosaToLinalgNamed.cpp | 885 ------------------ .../TosaToLinalg/TosaToLinalgNamedPass.cpp | 68 -- .../TosaToLinalg/TosaToLinalgPass.cpp | 4 - .../TosaToLinalg/tosa-to-linalg-named.mlir | 448 --------- .../TosaToLinalg/tosa-to-linalg.mlir | 460 +++++++++ 9 files changed, 1273 insertions(+), 1426 deletions(-) delete mode 100644 mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp delete mode 100644 mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamedPass.cpp delete mode 100644 mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 4d1f383c0229..f6e49cc889d1 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -645,20 +645,6 @@ def TosaToLinalg : FunctionPass<"tosa-to-linalg"> { let constructor = "tosa::createTosaToLinalg()"; } -//===----------------------------------------------------------------------===// -// TosaToLinalgNamed -//===----------------------------------------------------------------------===// - -def TosaToLinalgNamed : FunctionPass<"tosa-to-linalg-named"> { - let summary = "Lower TOSA to LinAlg named operations"; - let description = [{ - Pass that converts TOSA operations to the equivalent operations using the - Linalg named operations. - }]; - - let constructor = "tosa::createTosaToLinalgNamed()"; -} - //===----------------------------------------------------------------------===// // TosaToSCF //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Conversion/TosaToLinalg/TosaToLinalg.h b/mlir/include/mlir/Conversion/TosaToLinalg/TosaToLinalg.h index ec44d01065a7..b542833fa0e9 100644 --- a/mlir/include/mlir/Conversion/TosaToLinalg/TosaToLinalg.h +++ b/mlir/include/mlir/Conversion/TosaToLinalg/TosaToLinalg.h @@ -20,7 +20,6 @@ namespace mlir { namespace tosa { std::unique_ptr createTosaToLinalg(); -std::unique_ptr createTosaToLinalgNamed(); /// Populates passes to convert from TOSA to Linalg on buffers. At the end of /// the pass, the function will only contain linalg ops or standard ops if the @@ -30,9 +29,6 @@ void addTosaToLinalgPasses(OpPassManager &pm); /// Populates conversion passes from TOSA dialect to Linalg dialect. void populateTosaToLinalgConversionPatterns(RewritePatternSet *patterns); -/// Populates conversion passes from TOSA dialect to Linalg named operations. -void populateTosaToLinalgNamedConversionPatterns(RewritePatternSet *patterns); - } // namespace tosa } // namespace mlir diff --git a/mlir/lib/Conversion/TosaToLinalg/CMakeLists.txt b/mlir/lib/Conversion/TosaToLinalg/CMakeLists.txt index b98afb2aaadc..5617dd3e0ce0 100644 --- a/mlir/lib/Conversion/TosaToLinalg/CMakeLists.txt +++ b/mlir/lib/Conversion/TosaToLinalg/CMakeLists.txt @@ -1,7 +1,5 @@ add_mlir_conversion_library(MLIRTosaToLinalg TosaToLinalg.cpp - TosaToLinalgNamed.cpp - TosaToLinalgNamedPass.cpp TosaToLinalgPass.cpp ADDITIONAL_HEADER_DIRS diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp index 04262234ceaa..abff8b57ccdc 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp @@ -61,6 +61,37 @@ static mlir::SelectOp clampHelper(Location loc, Value arg, return rewriter.create(loc, largerThanMax, max, minOrArg); } +static mlir::Value applyPad(Location loc, Value input, ArrayRef pad, + Attribute padAttr, OpBuilder &rewriter) { + // Input should be padded if necessary. + if (llvm::all_of(pad, [](int64_t p) { return p == 0; })) + return input; + + ShapedType inputTy = input.getType().cast(); + Type inputETy = inputTy.getElementType(); + auto inputShape = inputTy.getShape(); + + assert((inputShape.size() * 2) == pad.size()); + + SmallVector paddedShape; + SmallVector lowIndices; + SmallVector highIndices; + for (int i = 0, s = inputShape.size(); i < s; i++) { + auto lowPad = pad[i * 2]; + auto highPad = pad[i * 2 + 1]; + paddedShape.push_back(inputShape[i] + highPad + lowPad); + lowIndices.push_back(rewriter.getIndexAttr(lowPad)); + highIndices.push_back(rewriter.getIndexAttr(highPad)); + } + + Value padValue = rewriter.create(loc, padAttr); + + return linalg::PadTensorOp::createPadScalarOp( + RankedTensorType::get(paddedShape, inputETy), input, padValue, + lowIndices, highIndices, /*nofold=*/false, loc, rewriter) + .result(); +} + static SmallVector filterDynamicDims(SmallVector dynDims) { SmallVector filteredDims; for (auto dim : dynDims) @@ -1034,6 +1065,510 @@ class PointwiseConverter : public OpRewritePattern { } }; +class ConvConverter : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(tosa::Conv2DOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const final { + Location loc = op->getLoc(); + Value input = op->getOperand(0); + Value weight = op->getOperand(1); + Value bias = op->getOperand(2); + + ShapedType inputTy = input.getType().cast(); + ShapedType weightTy = weight.getType().cast(); + ShapedType biasTy = bias.getType().cast(); + ShapedType resultTy = op->getResult(0).getType().cast(); + + Type inputETy = inputTy.getElementType(); + Type resultETy = resultTy.getElementType(); + + auto padAttr = op->getAttr("pad").cast(); + auto strideTosaAttr = op->getAttr("stride").cast(); + auto dilationTosaAttr = op->getAttr("dilation").cast(); + bool isQuantized = op->hasAttr("quantization_info"); + + if (!inputTy.hasStaticShape() || !weightTy.hasStaticShape() || + !biasTy.hasStaticShape() || !resultTy.hasStaticShape()) + return rewriter.notifyMatchFailure(op, + "tosa.conv ops require static shapes"); + + if (inputETy.isUnsignedInteger()) + return rewriter.notifyMatchFailure( + op, "tosa.conv ops does not support unsigned integer input"); + + auto weightShape = weightTy.getShape(); + + // Apply padding as necessary. + Attribute zeroAttr = rewriter.getZeroAttr(inputETy); + if (isQuantized) { + auto quantizationInfo = + op->getAttr("quantization_info").cast(); + auto iZp = quantizationInfo.input_zp().getValue().getSExtValue(); + + int64_t intMin = + APInt::getSignedMinValue(inputETy.getIntOrFloatBitWidth()) + .getSExtValue(); + int64_t intMax = + APInt::getSignedMaxValue(inputETy.getIntOrFloatBitWidth()) + .getSExtValue(); + + if (iZp < intMin || iZp > intMax) + return rewriter.notifyMatchFailure( + op, "tosa.conv op quantization has zp outside of input range"); + + zeroAttr = rewriter.getIntegerAttr(inputETy, iZp); + } + + llvm::SmallVector pad; + pad.resize(2, 0); + getValuesFromIntArrayAttribute(padAttr, pad); + pad.resize(pad.size() + 2, 0); + input = applyPad(loc, input, pad, zeroAttr, rewriter); + + // Transpose the kernel to match dimension ordering of the linalg + // convolution operation. + // TODO(suderman): See if this can be efficiently folded - check whether + // the input is used anywhere else, if not fold the constant. + SmallVector weightPerm{1, 2, 3, 0}; + SmallVector newWeightShape{weightShape[1], weightShape[2], + weightShape[3], weightShape[0]}; + auto weightPermAttr = DenseIntElementsAttr::get( + RankedTensorType::get({4}, rewriter.getI64Type()), weightPerm); + Value weightPermValue = + rewriter.create(loc, weightPermAttr); + Type newWeightTy = + RankedTensorType::get(newWeightShape, weightTy.getElementType()); + weight = rewriter.create(loc, newWeightTy, weight, + weightPermValue); + + Attribute resultZeroAttr = rewriter.getZeroAttr(resultETy); + Value initTensor = rewriter.create( + loc, resultTy.getShape(), resultETy); + Value zero = rewriter.create(loc, resultZeroAttr); + Value zeroTensor = + rewriter.create(loc, zero, initTensor).getResult(0); + + // Extract the attributes for convolution. + llvm::SmallVector stride, dilation; + getValuesFromIntArrayAttribute(strideTosaAttr, stride); + getValuesFromIntArrayAttribute(dilationTosaAttr, dilation); + + // Create the convolution op. + auto strideAttr = DenseIntElementsAttr::get( + RankedTensorType::get({2}, rewriter.getI64Type()), stride); + auto dilationAttr = DenseIntElementsAttr::get( + RankedTensorType::get({2}, rewriter.getI64Type()), dilation); + + // Create maps for the bias broadcasting + SmallVector indexingMaps; + indexingMaps.push_back(AffineMap::get( + /*dimCount=*/resultTy.getRank(), /*symbolCount=*/0, + {rewriter.getAffineDimExpr(3)}, rewriter.getContext())); + indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); + indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); + + Value biasInitTensor = rewriter.create( + loc, resultTy.getShape(), resultETy); + + if (isQuantized) { + auto quantizationInfo = + op->getAttr("quantization_info").cast(); + auto iZp = rewriter.getI32IntegerAttr( + quantizationInfo.input_zp().getValue().getSExtValue()); + auto kZp = rewriter.getI32IntegerAttr( + quantizationInfo.weight_zp().getValue().getSExtValue()); + + auto iZpVal = rewriter.create(loc, iZp); + auto kZpVal = rewriter.create(loc, kZp); + Value conv = + rewriter + .create( + loc, resultTy, ValueRange{input, weight, iZpVal, kZpVal}, + ValueRange{zeroTensor}, strideAttr, dilationAttr) + ->getResult(0); + + Value result = + rewriter + .create( + loc, resultTy, ValueRange({bias, conv}), biasInitTensor, + indexingMaps, getNParallelLoopsAttrs(resultTy.getRank()), + [&](OpBuilder &nestedBuilder, Location nestedLoc, + ValueRange args) { + Value added = nestedBuilder.create( + loc, args[0], args[1]); + nestedBuilder.create(nestedLoc, added); + }) + .getResult(0); + rewriter.replaceOp(op, result); + return success(); + } + + Value conv = rewriter + .create( + loc, resultTy, ValueRange{input, weight}, + ValueRange{zeroTensor}, strideAttr, dilationAttr) + ->getResult(0); + + Value result = + rewriter + .create( + loc, resultTy, ValueRange({bias, conv}), biasInitTensor, + indexingMaps, getNParallelLoopsAttrs(resultTy.getRank()), + [&](OpBuilder &nestedBuilder, Location nestedLoc, + ValueRange args) { + Value added = nestedBuilder.create( + loc, args[0], args[1]); + nestedBuilder.create(nestedLoc, added); + }) + .getResult(0); + + rewriter.replaceOp(op, result); + return success(); + } +}; + +class DepthwiseConvConverter + : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(tosa::DepthwiseConv2DOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const final { + Location loc = op->getLoc(); + Value input = op->getOperand(0); + Value weight = op->getOperand(1); + Value bias = op->getOperand(2); + + ShapedType inputTy = input.getType().cast(); + ShapedType weightTy = weight.getType().cast(); + ShapedType biasTy = bias.getType().cast(); + ShapedType resultTy = op->getResult(0).getType().cast(); + + Type inputETy = inputTy.getElementType(); + Type resultETy = resultTy.getElementType(); + + auto padAttr = op->getAttr("pad").cast(); + auto strideTosaAttr = op->getAttr("stride").cast(); + auto dilationTosaAttr = op->getAttr("dilation").cast(); + + bool isQuantized = op->hasAttr("quantization_info"); + IntegerAttr iZp; + IntegerAttr kZp; + if (isQuantized) { + auto quantizationInfo = + op->getAttr("quantization_info").cast(); + iZp = rewriter.getI32IntegerAttr( + quantizationInfo.input_zp().getValue().getSExtValue()); + kZp = rewriter.getI32IntegerAttr( + quantizationInfo.weight_zp().getValue().getSExtValue()); + } + + if (!inputTy.hasStaticShape() || !weightTy.hasStaticShape() || + !biasTy.hasStaticShape() || !resultTy.hasStaticShape()) + return rewriter.notifyMatchFailure(op, + "tosa.conv ops require static shapes"); + + auto weightShape = weightTy.getShape(); + auto resultShape = resultTy.getShape(); + + // Apply padding as necessary. + Attribute zeroAttr = rewriter.getZeroAttr(inputETy); + if (isQuantized) { + auto quantizationInfo = + op->getAttr("quantization_info").cast(); + auto iZp = quantizationInfo.input_zp().getValue().getSExtValue(); + + int64_t intMin = + APInt::getSignedMinValue(inputETy.getIntOrFloatBitWidth()) + .getSExtValue(); + int64_t intMax = + APInt::getSignedMaxValue(inputETy.getIntOrFloatBitWidth()) + .getSExtValue(); + + if (iZp < intMin || iZp > intMax) + return rewriter.notifyMatchFailure( + op, "tosa.depthwise_conv op quantization has zp outside of input " + "range"); + + zeroAttr = rewriter.getIntegerAttr(inputETy, iZp); + } + + llvm::SmallVector pad; + pad.resize(2, 0); + getValuesFromIntArrayAttribute(padAttr, pad); + pad.resize(pad.size() + 2, 0); + + input = applyPad(loc, input, pad, zeroAttr, rewriter); + + // Extract the attributes for convolution. + llvm::SmallVector stride, dilation; + getValuesFromIntArrayAttribute(strideTosaAttr, stride); + getValuesFromIntArrayAttribute(dilationTosaAttr, dilation); + + // Create the convolution op. + auto strideAttr = DenseIntElementsAttr::get( + RankedTensorType::get({2}, rewriter.getI64Type()), stride); + auto dilationAttr = DenseIntElementsAttr::get( + RankedTensorType::get({2}, rewriter.getI64Type()), dilation); + ShapedType linalgConvTy = + RankedTensorType::get({resultShape[0], resultShape[1], resultShape[2], + weightShape[2], weightShape[3]}, + resultETy); + + // Broadcast the initial value to the output tensor before convolving. + SmallVector indexingMaps; + indexingMaps.push_back(AffineMap::get( + /*dimCount=*/resultTy.getRank(), /*symbolCount=*/0, + {rewriter.getAffineDimExpr(3)}, rewriter.getContext())); + indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); + indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); + + Attribute resultZeroAttr = rewriter.getZeroAttr(resultETy); + Value initTensor = rewriter.create( + loc, linalgConvTy.getShape(), resultETy); + Value zero = rewriter.create(loc, resultZeroAttr); + Value zeroTensor = + rewriter.create(loc, zero, initTensor).getResult(0); + + Value biasInitTensor = rewriter.create( + loc, resultTy.getShape(), resultETy); + if (!isQuantized) { + Value conv = rewriter + .create( + loc, linalgConvTy, ValueRange{input, weight}, + ValueRange{zeroTensor}, strideAttr, dilationAttr) + .getResult(0); + Value convReshape = rewriter.create(loc, resultTy, conv); + Value result = + rewriter + .create( + loc, resultTy, ValueRange({bias, convReshape}), + biasInitTensor, indexingMaps, + getNParallelLoopsAttrs(resultTy.getRank()), + [&](OpBuilder &nestedBuilder, Location nestedLoc, + ValueRange args) { + Value added = nestedBuilder.create( + loc, args[0], args[1]); + nestedBuilder.create(nestedLoc, added); + }) + .getResult(0); + rewriter.replaceOp(op, result); + } else { + auto iZpVal = rewriter.create(loc, iZp); + auto kZpVal = rewriter.create(loc, kZp); + Value conv = + rewriter + .create( + loc, linalgConvTy, ValueRange{input, weight, iZpVal, kZpVal}, + ValueRange{zeroTensor}, strideAttr, dilationAttr) + .getResult(0); + Value convReshape = rewriter.create(loc, resultTy, conv); + Value result = + rewriter + .create( + loc, resultTy, ValueRange({bias, convReshape}), + biasInitTensor, indexingMaps, + getNParallelLoopsAttrs(resultTy.getRank()), + [&](OpBuilder &nestedBuilder, Location nestedLoc, + ValueRange args) { + Value added = nestedBuilder.create( + loc, args[0], args[1]); + nestedBuilder.create(nestedLoc, added); + }) + .getResult(0); + rewriter.replaceOp(op, result); + } + return success(); + } +}; + +class MatMulConverter : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(tosa::MatMulOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const final { + Location loc = op.getLoc(); + + auto outputTy = op.getType().cast(); + auto outputElementTy = outputTy.getElementType(); + + auto firstOperandTy = op->getOperand(0).getType().cast(); + auto secondOperandTy = op->getOperand(1).getType().cast(); + + SmallVector dynDims; + dynDims.resize(op->getResult(0).getType().cast().getRank()); + + if (!firstOperandTy.hasRank() || firstOperandTy.isDynamicDim(0)) { + dynDims[0] = rewriter.create(loc, op->getOperand(0), 0); + } + + if (!firstOperandTy.hasRank() || firstOperandTy.isDynamicDim(1)) { + dynDims[1] = rewriter.create(loc, op->getOperand(0), 1); + } + + if (!secondOperandTy.hasRank() || secondOperandTy.isDynamicDim(2)) { + dynDims[2] = rewriter.create(loc, op->getOperand(1), 2); + } + + SmallVector filteredDims = filterDynamicDims(dynDims); + + auto zeroAttr = rewriter.getZeroAttr(outputElementTy); + Value zero = rewriter.create(loc, zeroAttr); + auto initTensor = rewriter.create( + loc, filteredDims, outputTy.getShape(), outputTy.getElementType()); + Value zeroTensor = + rewriter.create(loc, zero, initTensor).getResult(0); + if (!op.quantization_info()) { + rewriter.replaceOpWithNewOp( + op, TypeRange{op.getType()}, ValueRange{adaptor.a(), adaptor.b()}, + ValueRange{zeroTensor}); + return success(); + } + + auto quantizationInfo = op.quantization_info().getValue(); + auto aZp = rewriter.create( + loc, rewriter.getI32IntegerAttr( + quantizationInfo.a_zp().getValue().getSExtValue())); + auto bZp = rewriter.create( + loc, rewriter.getI32IntegerAttr( + quantizationInfo.b_zp().getValue().getSExtValue())); + rewriter.replaceOpWithNewOp( + op, TypeRange{op.getType()}, + ValueRange{adaptor.a(), adaptor.b(), aZp, bZp}, zeroTensor); + + return success(); + } +}; + +class FullyConnectedConverter + : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(tosa::FullyConnectedOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const final { + Location loc = op.getLoc(); + auto outputTy = op.getType().cast(); + auto input = op.input(); + auto inputTy = input.getType().cast(); + + auto bias = op.bias(); + + auto weight = op.weight(); + auto weightTy = weight.getType().cast(); + auto weightShape = weightTy.getShape(); + + auto outputETy = outputTy.getElementType(); + + SmallVector dynDims; + dynDims.resize(op->getResult(0).getType().cast().getRank()); + + if (!inputTy.hasRank() || inputTy.isDynamicDim(0)) { + dynDims[0] = rewriter.create(loc, input, 0); + } + + if (!weightTy.hasRank() || weightTy.isDynamicDim(0)) { + dynDims[1] = rewriter.create(loc, weight, 0); + } + + SmallVector filteredDims = filterDynamicDims(dynDims); + + // Creating maps for the output of MatMul and the bias + SmallVector indexingMaps; + + // Broadcast the bias. + indexingMaps.push_back(AffineMap::get(/*dimCount=*/2, /*symbolCount=*/0, + {rewriter.getAffineDimExpr(1)}, + rewriter.getContext())); + + indexingMaps.push_back(rewriter.getMultiDimIdentityMap(outputTy.getRank())); + indexingMaps.push_back(rewriter.getMultiDimIdentityMap(outputTy.getRank())); + + auto initTensor = rewriter.create( + loc, filteredDims, outputTy.getShape(), outputTy.getElementType()); + + // When quantized, the input elemeny type is not the same as the output + Attribute resultZeroAttr = rewriter.getZeroAttr(outputETy); + Value zero = rewriter.create(loc, resultZeroAttr); + Value zeroTensor = + rewriter.create(loc, zero, initTensor).getResult(0); + + SmallVector permutation{1, 0}; + auto permutationAttr = DenseIntElementsAttr::get( + RankedTensorType::get({2}, rewriter.getI64Type()), permutation); + Value permutationValue = + rewriter.create(loc, permutationAttr); + + SmallVector newWeightShape{weightShape[1], weightShape[0]}; + Type newWeightTy = + RankedTensorType::get(newWeightShape, weightTy.getElementType()); + + Value transposedWeight = rewriter.create( + loc, newWeightTy, weight, permutationValue); + + auto biasInitTensor = + rewriter + .create(loc, filteredDims, + outputTy.getShape(), outputETy) + ->getResults(); + + if (!op.quantization_info()) { + Value matmul = rewriter + .create( + loc, TypeRange{op.getType()}, + ValueRange{input, transposedWeight}, zeroTensor) + ->getResult(0); + + Value result = + rewriter + .create( + loc, outputTy, ValueRange({bias, matmul}), biasInitTensor, + indexingMaps, getNParallelLoopsAttrs(outputTy.getRank()), + [&](OpBuilder &nestedBuilder, Location nestedLoc, + ValueRange args) { + Value added = nestedBuilder.create( + loc, args[0], args[1]); + nestedBuilder.create(nestedLoc, added); + }) + .getResult(0); + rewriter.replaceOp(op, result); + return success(); + } + + auto quantizationInfo = op.quantization_info().getValue(); + auto inputZp = rewriter.create( + loc, rewriter.getI32IntegerAttr( + quantizationInfo.input_zp().getValue().getSExtValue())); + auto outputZp = rewriter.create( + loc, rewriter.getI32IntegerAttr( + quantizationInfo.weight_zp().getValue().getSExtValue())); + Value matmul = + rewriter + .create( + loc, TypeRange{op.getType()}, + ValueRange{input, transposedWeight, inputZp, outputZp}, + zeroTensor) + ->getResult(0); + Value result = + rewriter + .create( + loc, outputTy, ValueRange({bias, matmul}), biasInitTensor, + indexingMaps, getNParallelLoopsAttrs(outputTy.getRank()), + [&](OpBuilder &nestedBuilder, Location nestedLoc, + ValueRange args) { + Value added = nestedBuilder.create( + loc, args[0], args[1]); + nestedBuilder.create(nestedLoc, added); + }) + .getResult(0); + rewriter.replaceOp(op, result); + return success(); + } +}; + class ReshapeConverterCollapse : public OpConversionPattern { public: using OpConversionPattern::OpConversionPattern; @@ -2275,6 +2810,277 @@ class TableConverter : public OpRewritePattern { } }; +class MaxPool2dConverter : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(tosa::MaxPool2dOp op, + PatternRewriter &rewriter) const final { + Location loc = op.getLoc(); + Value input = op.input(); + ShapedType inputTy = input.getType().cast(); + + ShapedType resultTy = op.getType().template cast(); + Type resultETy = inputTy.getElementType(); + + if (!inputTy.hasStaticShape()) + return failure(); + + // Determine what the initial value needs to be for the max pool op. + Attribute initialAttr; + if (resultETy.isF32()) + initialAttr = rewriter.getFloatAttr( + resultETy, + APFloat::getLargest(resultETy.cast().getFloatSemantics(), + true)); + + if (resultETy.isa()) + initialAttr = rewriter.getIntegerAttr( + resultETy, + APInt::getSignedMinValue(resultETy.getIntOrFloatBitWidth())); + + if (!initialAttr) + return rewriter.notifyMatchFailure( + op, "Unsupported initial value for tosa.maxpool_2d op"); + + // Apply padding as necessary. + llvm::SmallVector pad; + pad.resize(2, 0); + getValuesFromIntArrayAttribute(op.pad(), pad); + pad.resize(pad.size() + 2, 0); + Value paddedInput = applyPad(loc, input, pad, initialAttr, rewriter); + + Value initialValue = rewriter.create(loc, initialAttr); + + SmallVector kernel, stride; + getValuesFromIntArrayAttribute(op.kernel(), kernel); + getValuesFromIntArrayAttribute(op.stride(), stride); + + Attribute strideAttr = rewriter.getI64VectorAttr(stride); + Attribute dilationAttr = rewriter.getI64VectorAttr({1, 1}); + + // Create the linalg op that performs pooling. + Value initTensor = rewriter.create( + loc, resultTy.getShape(), resultTy.getElementType()); + + Value filledInitTensor = + rewriter.create(loc, initialValue, initTensor).result(); + + Value fakeWindowDims = + rewriter.create(loc, kernel, resultETy); + + rewriter.replaceOpWithNewOp( + op, ArrayRef{resultTy}, ValueRange{paddedInput, fakeWindowDims}, + filledInitTensor, strideAttr, dilationAttr); + return success(); + } +}; + +class AvgPool2dConverter : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(tosa::AvgPool2dOp op, + PatternRewriter &rewriter) const final { + Location loc = op.getLoc(); + Value input = op.input(); + ShapedType inputTy = input.getType().cast(); + Type inElementTy = inputTy.getElementType(); + + ShapedType resultTy = op.getType().template cast(); + Type resultETy = op.getType().cast().getElementType(); + + Type accETy = + inElementTy.isa() ? rewriter.getI32Type() : inElementTy; + ShapedType accTy = resultTy.clone(accETy); + + if (!inputTy.hasStaticShape()) + return failure(); + + // Apply padding as necessary. + llvm::SmallVector pad; + pad.resize(2, 0); + getValuesFromIntArrayAttribute(op.pad(), pad); + pad.resize(pad.size() + 2, 0); + Attribute padAttr = rewriter.getZeroAttr(inElementTy); + Value paddedInput = applyPad(loc, input, pad, padAttr, rewriter); + + Attribute initialAttr = rewriter.getZeroAttr(accETy); + Value initialValue = rewriter.create(loc, initialAttr); + + SmallVector kernel, stride; + getValuesFromIntArrayAttribute(op.kernel(), kernel); + getValuesFromIntArrayAttribute(op.stride(), stride); + + Attribute strideAttr = rewriter.getI64VectorAttr(stride); + Attribute dilationAttr = rewriter.getI64VectorAttr({1, 1}); + + // Create the linalg op that performs pooling. + Value poolInitTensor = + rewriter.create(loc, accTy.getShape(), accETy); + + Value filledInitTensor = + rewriter.create(loc, initialValue, poolInitTensor) + .result(); + + Value fakeWindowDims = + rewriter.create(loc, kernel, accETy); + + // Sum across the pooled region. + Value poolingOp = rewriter + .create( + loc, ArrayRef{accTy}, + ValueRange{paddedInput, fakeWindowDims}, + filledInitTensor, strideAttr, dilationAttr) + .getResult(0); + + // Normalize the summed value by the number of elements grouped in each + // pool. + auto poolingOpTy = poolingOp.getType().cast(); + auto affineMap = rewriter.getMultiDimIdentityMap(resultTy.getRank()); + + Value genericInitTensor = rewriter.create( + loc, resultTy.getShape(), resultETy); + + auto genericOp = rewriter.create( + loc, ArrayRef({resultTy}), ValueRange{poolingOp}, + ValueRange{genericInitTensor}, + ArrayRef({affineMap, affineMap}), + getNParallelLoopsAttrs(resultTy.getRank()), + [&](OpBuilder &b, Location loc, ValueRange args) { + auto zero = rewriter.create(loc, 0); + auto one = rewriter.create(loc, 1); + auto iH = rewriter.create( + loc, poolingOpTy.getDimSize(1) - 1); + auto iW = rewriter.create( + loc, poolingOpTy.getDimSize(2) - 1); + + // Compute the indices from either end. + auto y0 = rewriter.create(loc, 1); + auto x0 = rewriter.create(loc, 2); + auto y1 = rewriter.create(loc, iH, y0); + auto x1 = rewriter.create(loc, iW, x0); + + // Determines what the portion of valid input is covered by the + // kernel. + auto padFn = [&](Value v, Value x, int64_t pad) -> Value { + if (pad == 0) + return v; + + auto padVal = rewriter.create(loc, pad); + Value dx = rewriter.create(loc, x, padVal); + + Value cmp = rewriter.create( + loc, arith::CmpIPredicate::slt, dx, zero); + Value offset = rewriter.create(loc, cmp, dx, zero); + return rewriter.create(loc, v, offset)->getResult(0); + }; + + // Compute the vertical component of coverage. + auto kH0 = rewriter.create(loc, kernel[0]); + auto kH1 = padFn(kH0, y0, pad[2]); + auto kH2 = padFn(kH1, y1, pad[3]); + auto kHCmp = rewriter.create( + loc, arith::CmpIPredicate::slt, kH2, one); + auto kH3 = rewriter.create(loc, kHCmp, one, kH2); + + // compute the horizontal component of coverage. + auto kW0 = rewriter.create(loc, kernel[1]); + auto kW1 = padFn(kW0, x0, pad[4]); + auto kW2 = padFn(kW1, x1, pad[5]); + auto kWCmp = rewriter.create( + loc, arith::CmpIPredicate::slt, kW2, one); + auto kW3 = rewriter.create(loc, kWCmp, one, kW2); + + // Compute the total number of elements and normalize. + Value count = rewriter.create(loc, kH3, kW3); + auto countI = rewriter.create( + loc, rewriter.getI32Type(), count); + + // Divide by the number of summed values. For floats this is just + // a div however for quantized values input normalization had + // to be applied. + Value poolVal = args[0]; + if (accETy.isa()) { + auto countF = rewriter.create(loc, accETy, countI); + poolVal = rewriter.create(loc, poolVal, countF) + ->getResult(0); + } else { + + // If we have quantization information we need to apply an offset + // for the input zp value. + if (op.quantization_info()) { + auto quantizationInfo = op.quantization_info().getValue(); + auto inputZp = rewriter.create( + loc, quantizationInfo.input_zp()); + Value offset = + rewriter.create(loc, accETy, countI, inputZp); + poolVal = + rewriter.create(loc, accETy, poolVal, offset); + } + + // Compute the multiplier and shift values for the quantization + // normalization. Preferably we would want to compute more bits + // however 32-bits should be enough for compute. Honestly we + // should probably straight divide. + int64_t numerator = ((1 << 30) + 1); + int64_t shift = 30; + + Value numeratorVal = rewriter.create( + loc, rewriter.getI32IntegerAttr(numerator)); + Value multiplierVal = + rewriter + .create(loc, rewriter.getI32Type(), + numeratorVal, countI) + .getResult(); + Value shiftVal = rewriter.create( + loc, rewriter.getI8IntegerAttr(shift)); + + auto scaled = + rewriter + .create( + loc, rewriter.getI32Type(), poolVal, multiplierVal, + shiftVal, rewriter.getBoolAttr(false)) + .getResult(); + + // If we have quantization information we need to apply output + // zeropoint. + if (op.quantization_info()) { + auto quantizationInfo = op.quantization_info().getValue(); + auto outputZp = rewriter.create( + loc, quantizationInfo.output_zp()); + scaled = rewriter.create(loc, scaled, outputZp) + .getResult(); + } + + // Apply Clip. + int64_t outBitwidth = resultETy.getIntOrFloatBitWidth(); + + auto min = rewriter.create( + loc, APInt::getSignedMinValue(outBitwidth).getSExtValue(), + accETy); + auto max = rewriter.create( + loc, APInt::getSignedMaxValue(outBitwidth).getSExtValue(), + accETy); + auto clamp = clampHelper( + loc, scaled, min, max, arith::CmpIPredicate::slt, rewriter); + + poolVal = clamp; + // Convert type. + if (resultETy != clamp.getType()) { + poolVal = + rewriter.create(loc, resultETy, poolVal); + } + } + + rewriter.create(loc, poolVal); + }); + + rewriter.replaceOp(op, genericOp.getResult(0)); + return success(); + } +}; + } // namespace void mlir::tosa::populateTosaToLinalgConversionPatterns( @@ -2326,6 +3132,8 @@ void mlir::tosa::populateTosaToLinalgConversionPatterns( ReduceConverter, ArgMaxConverter, ConcatConverter, + ConvConverter, + DepthwiseConvConverter, GatherConverter, PadConverter, ReshapeConverterCollapse, @@ -2336,6 +3144,10 @@ void mlir::tosa::populateTosaToLinalgConversionPatterns( ReverseConverter, TableConverter, TileConverter, - TransposeConverter>(patterns->getContext()); + TransposeConverter, + MatMulConverter, + MaxPool2dConverter, + AvgPool2dConverter, + FullyConnectedConverter>(patterns->getContext()); // clang-format on } diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp deleted file mode 100644 index 90220ef44e97..000000000000 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp +++ /dev/null @@ -1,885 +0,0 @@ -//===- TosaToLinalgNamed.cpp - Lowering Tosa to Linalg Named Ops ----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// These rewriters lower from the Tosa to the Linalg named ops. -// -//===----------------------------------------------------------------------===// - -#include "mlir/Conversion/TosaToLinalg/TosaToLinalg.h" -#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" -#include "mlir/Dialect/Linalg/IR/Linalg.h" -#include "mlir/Dialect/Math/IR/Math.h" -#include "mlir/Dialect/SCF/SCF.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" -#include "mlir/Dialect/Tensor/IR/Tensor.h" -#include "mlir/Dialect/Tosa/IR/TosaOps.h" -#include "mlir/Dialect/Utils/ReshapeOpsUtils.h" -#include "mlir/IR/Matchers.h" -#include "mlir/IR/PatternMatch.h" -#include "mlir/Transforms/DialectConversion.h" -#include "mlir/Transforms/GreedyPatternRewriteDriver.h" - -#include - -using namespace mlir; - -static SmallVector getNParallelLoopsAttrs(unsigned nParallelLoops) { - return SmallVector(nParallelLoops, getParallelIteratorTypeName()); -} - -template -static void getValuesFromIntArrayAttribute(ArrayAttr attr, - SmallVector &arrayValues) { - for (Attribute val : attr.getValue()) { - arrayValues.push_back(val.cast().getValue().getSExtValue()); - } -} - -template -static mlir::SelectOp clampHelper(Location loc, Value arg, - arith::ConstantOp min, arith::ConstantOp max, - P pred, OpBuilder &rewriter) { - auto smallerThanMin = rewriter.create(loc, pred, arg, min); - auto minOrArg = - rewriter.create(loc, smallerThanMin, min, arg); - auto largerThanMax = rewriter.create(loc, pred, max, arg); - return rewriter.create(loc, largerThanMax, max, minOrArg); -} - -static mlir::Value applyPad(Location loc, Value input, ArrayRef pad, - Attribute padAttr, OpBuilder &rewriter) { - // Input should be padded if necessary. - if (llvm::all_of(pad, [](int64_t p) { return p == 0; })) - return input; - - ShapedType inputTy = input.getType().cast(); - Type inputETy = inputTy.getElementType(); - auto inputShape = inputTy.getShape(); - - assert((inputShape.size() * 2) == pad.size()); - - SmallVector paddedShape; - SmallVector lowIndices; - SmallVector highIndices; - for (int i = 0, s = inputShape.size(); i < s; i++) { - auto lowPad = pad[i * 2]; - auto highPad = pad[i * 2 + 1]; - paddedShape.push_back(inputShape[i] + highPad + lowPad); - lowIndices.push_back(rewriter.getIndexAttr(lowPad)); - highIndices.push_back(rewriter.getIndexAttr(highPad)); - } - - Value padValue = rewriter.create(loc, padAttr); - - return linalg::PadTensorOp::createPadScalarOp( - RankedTensorType::get(paddedShape, inputETy), input, padValue, - lowIndices, highIndices, /*nofold=*/false, loc, rewriter) - .result(); -} - -static SmallVector filterDynamicDims(SmallVector dynDims) { - SmallVector filteredDims; - for (auto dim : dynDims) - if (dim) - filteredDims.push_back(dim); - return filteredDims; -} - -namespace { - -class ConvConverter : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - LogicalResult - matchAndRewrite(tosa::Conv2DOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const final { - Location loc = op->getLoc(); - Value input = op->getOperand(0); - Value weight = op->getOperand(1); - Value bias = op->getOperand(2); - - ShapedType inputTy = input.getType().cast(); - ShapedType weightTy = weight.getType().cast(); - ShapedType biasTy = bias.getType().cast(); - ShapedType resultTy = op->getResult(0).getType().cast(); - - Type inputETy = inputTy.getElementType(); - Type resultETy = resultTy.getElementType(); - - auto padAttr = op->getAttr("pad").cast(); - auto strideTosaAttr = op->getAttr("stride").cast(); - auto dilationTosaAttr = op->getAttr("dilation").cast(); - bool isQuantized = op->hasAttr("quantization_info"); - - if (!inputTy.hasStaticShape() || !weightTy.hasStaticShape() || - !biasTy.hasStaticShape() || !resultTy.hasStaticShape()) - return rewriter.notifyMatchFailure(op, - "tosa.conv ops require static shapes"); - - if (inputETy.isUnsignedInteger()) - return rewriter.notifyMatchFailure( - op, "tosa.conv ops does not support unsigned integer input"); - - auto weightShape = weightTy.getShape(); - - // Apply padding as necessary. - Attribute zeroAttr = rewriter.getZeroAttr(inputETy); - if (isQuantized) { - auto quantizationInfo = - op->getAttr("quantization_info").cast(); - auto iZp = quantizationInfo.input_zp().getValue().getSExtValue(); - - int64_t intMin = - APInt::getSignedMinValue(inputETy.getIntOrFloatBitWidth()) - .getSExtValue(); - int64_t intMax = - APInt::getSignedMaxValue(inputETy.getIntOrFloatBitWidth()) - .getSExtValue(); - - if (iZp < intMin || iZp > intMax) - return rewriter.notifyMatchFailure( - op, "tosa.conv op quantization has zp outside of input range"); - - zeroAttr = rewriter.getIntegerAttr(inputETy, iZp); - } - - llvm::SmallVector pad; - pad.resize(2, 0); - getValuesFromIntArrayAttribute(padAttr, pad); - pad.resize(pad.size() + 2, 0); - input = applyPad(loc, input, pad, zeroAttr, rewriter); - - // Transpose the kernel to match dimension ordering of the linalg - // convolution operation. - // TODO(suderman): See if this can be efficiently folded - check whether - // the input is used anywhere else, if not fold the constant. - SmallVector weightPerm{1, 2, 3, 0}; - SmallVector newWeightShape{weightShape[1], weightShape[2], - weightShape[3], weightShape[0]}; - auto weightPermAttr = DenseIntElementsAttr::get( - RankedTensorType::get({4}, rewriter.getI64Type()), weightPerm); - Value weightPermValue = - rewriter.create(loc, weightPermAttr); - Type newWeightTy = - RankedTensorType::get(newWeightShape, weightTy.getElementType()); - weight = rewriter.create(loc, newWeightTy, weight, - weightPermValue); - - Attribute resultZeroAttr = rewriter.getZeroAttr(resultETy); - Value initTensor = rewriter.create( - loc, resultTy.getShape(), resultETy); - Value zero = rewriter.create(loc, resultZeroAttr); - Value zeroTensor = - rewriter.create(loc, zero, initTensor).getResult(0); - - // Extract the attributes for convolution. - llvm::SmallVector stride, dilation; - getValuesFromIntArrayAttribute(strideTosaAttr, stride); - getValuesFromIntArrayAttribute(dilationTosaAttr, dilation); - - // Create the convolution op. - auto strideAttr = DenseIntElementsAttr::get( - RankedTensorType::get({2}, rewriter.getI64Type()), stride); - auto dilationAttr = DenseIntElementsAttr::get( - RankedTensorType::get({2}, rewriter.getI64Type()), dilation); - - // Create maps for the bias broadcasting - SmallVector indexingMaps; - indexingMaps.push_back(AffineMap::get( - /*dimCount=*/resultTy.getRank(), /*symbolCount=*/0, - {rewriter.getAffineDimExpr(3)}, rewriter.getContext())); - indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); - indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); - - Value biasInitTensor = rewriter.create( - loc, resultTy.getShape(), resultETy); - - if (isQuantized) { - auto quantizationInfo = - op->getAttr("quantization_info").cast(); - auto iZp = rewriter.getI32IntegerAttr( - quantizationInfo.input_zp().getValue().getSExtValue()); - auto kZp = rewriter.getI32IntegerAttr( - quantizationInfo.weight_zp().getValue().getSExtValue()); - - auto iZpVal = rewriter.create(loc, iZp); - auto kZpVal = rewriter.create(loc, kZp); - Value conv = - rewriter - .create( - loc, resultTy, ValueRange{input, weight, iZpVal, kZpVal}, - ValueRange{zeroTensor}, strideAttr, dilationAttr) - ->getResult(0); - - Value result = - rewriter - .create( - loc, resultTy, ValueRange({bias, conv}), biasInitTensor, - indexingMaps, getNParallelLoopsAttrs(resultTy.getRank()), - [&](OpBuilder &nestedBuilder, Location nestedLoc, - ValueRange args) { - Value added = nestedBuilder.create( - loc, args[0], args[1]); - nestedBuilder.create(nestedLoc, added); - }) - .getResult(0); - rewriter.replaceOp(op, result); - return success(); - } - - Value conv = rewriter - .create( - loc, resultTy, ValueRange{input, weight}, - ValueRange{zeroTensor}, strideAttr, dilationAttr) - ->getResult(0); - - Value result = - rewriter - .create( - loc, resultTy, ValueRange({bias, conv}), biasInitTensor, - indexingMaps, getNParallelLoopsAttrs(resultTy.getRank()), - [&](OpBuilder &nestedBuilder, Location nestedLoc, - ValueRange args) { - Value added = nestedBuilder.create( - loc, args[0], args[1]); - nestedBuilder.create(nestedLoc, added); - }) - .getResult(0); - - rewriter.replaceOp(op, result); - return success(); - } -}; - -class DepthwiseConvConverter - : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - LogicalResult - matchAndRewrite(tosa::DepthwiseConv2DOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const final { - Location loc = op->getLoc(); - Value input = op->getOperand(0); - Value weight = op->getOperand(1); - Value bias = op->getOperand(2); - - ShapedType inputTy = input.getType().cast(); - ShapedType weightTy = weight.getType().cast(); - ShapedType biasTy = bias.getType().cast(); - ShapedType resultTy = op->getResult(0).getType().cast(); - - Type inputETy = inputTy.getElementType(); - Type resultETy = resultTy.getElementType(); - - auto padAttr = op->getAttr("pad").cast(); - auto strideTosaAttr = op->getAttr("stride").cast(); - auto dilationTosaAttr = op->getAttr("dilation").cast(); - - bool isQuantized = op->hasAttr("quantization_info"); - IntegerAttr iZp; - IntegerAttr kZp; - if (isQuantized) { - auto quantizationInfo = - op->getAttr("quantization_info").cast(); - iZp = rewriter.getI32IntegerAttr( - quantizationInfo.input_zp().getValue().getSExtValue()); - kZp = rewriter.getI32IntegerAttr( - quantizationInfo.weight_zp().getValue().getSExtValue()); - } - - if (!inputTy.hasStaticShape() || !weightTy.hasStaticShape() || - !biasTy.hasStaticShape() || !resultTy.hasStaticShape()) - return rewriter.notifyMatchFailure(op, - "tosa.conv ops require static shapes"); - - auto weightShape = weightTy.getShape(); - auto resultShape = resultTy.getShape(); - - // Apply padding as necessary. - Attribute zeroAttr = rewriter.getZeroAttr(inputETy); - if (isQuantized) { - auto quantizationInfo = - op->getAttr("quantization_info").cast(); - auto iZp = quantizationInfo.input_zp().getValue().getSExtValue(); - - int64_t intMin = - APInt::getSignedMinValue(inputETy.getIntOrFloatBitWidth()) - .getSExtValue(); - int64_t intMax = - APInt::getSignedMaxValue(inputETy.getIntOrFloatBitWidth()) - .getSExtValue(); - - if (iZp < intMin || iZp > intMax) - return rewriter.notifyMatchFailure( - op, "tosa.depthwise_conv op quantization has zp outside of input " - "range"); - - zeroAttr = rewriter.getIntegerAttr(inputETy, iZp); - } - - llvm::SmallVector pad; - pad.resize(2, 0); - getValuesFromIntArrayAttribute(padAttr, pad); - pad.resize(pad.size() + 2, 0); - - input = applyPad(loc, input, pad, zeroAttr, rewriter); - - // Extract the attributes for convolution. - llvm::SmallVector stride, dilation; - getValuesFromIntArrayAttribute(strideTosaAttr, stride); - getValuesFromIntArrayAttribute(dilationTosaAttr, dilation); - - // Create the convolution op. - auto strideAttr = DenseIntElementsAttr::get( - RankedTensorType::get({2}, rewriter.getI64Type()), stride); - auto dilationAttr = DenseIntElementsAttr::get( - RankedTensorType::get({2}, rewriter.getI64Type()), dilation); - ShapedType linalgConvTy = - RankedTensorType::get({resultShape[0], resultShape[1], resultShape[2], - weightShape[2], weightShape[3]}, - resultETy); - - // Broadcast the initial value to the output tensor before convolving. - SmallVector indexingMaps; - indexingMaps.push_back(AffineMap::get( - /*dimCount=*/resultTy.getRank(), /*symbolCount=*/0, - {rewriter.getAffineDimExpr(3)}, rewriter.getContext())); - indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); - indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultTy.getRank())); - - Attribute resultZeroAttr = rewriter.getZeroAttr(resultETy); - Value initTensor = rewriter.create( - loc, linalgConvTy.getShape(), resultETy); - Value zero = rewriter.create(loc, resultZeroAttr); - Value zeroTensor = - rewriter.create(loc, zero, initTensor).getResult(0); - - Value biasInitTensor = rewriter.create( - loc, resultTy.getShape(), resultETy); - if (!isQuantized) { - Value conv = rewriter - .create( - loc, linalgConvTy, ValueRange{input, weight}, - ValueRange{zeroTensor}, strideAttr, dilationAttr) - .getResult(0); - Value convReshape = rewriter.create( - loc, resultTy, conv, rewriter.getI64ArrayAttr(resultTy.getShape())); - Value result = - rewriter - .create( - loc, resultTy, ValueRange({bias, convReshape}), - biasInitTensor, indexingMaps, - getNParallelLoopsAttrs(resultTy.getRank()), - [&](OpBuilder &nestedBuilder, Location nestedLoc, - ValueRange args) { - Value added = nestedBuilder.create( - loc, args[0], args[1]); - nestedBuilder.create(nestedLoc, added); - }) - .getResult(0); - rewriter.replaceOp(op, result); - } else { - auto iZpVal = rewriter.create(loc, iZp); - auto kZpVal = rewriter.create(loc, kZp); - Value conv = - rewriter - .create( - loc, linalgConvTy, ValueRange{input, weight, iZpVal, kZpVal}, - ValueRange{zeroTensor}, strideAttr, dilationAttr) - .getResult(0); - Value convReshape = rewriter.create( - loc, resultTy, conv, rewriter.getI64ArrayAttr(resultTy.getShape())); - Value result = - rewriter - .create( - loc, resultTy, ValueRange({bias, convReshape}), - biasInitTensor, indexingMaps, - getNParallelLoopsAttrs(resultTy.getRank()), - [&](OpBuilder &nestedBuilder, Location nestedLoc, - ValueRange args) { - Value added = nestedBuilder.create( - loc, args[0], args[1]); - nestedBuilder.create(nestedLoc, added); - }) - .getResult(0); - rewriter.replaceOp(op, result); - } - return success(); - } -}; - -class MatMulConverter : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - LogicalResult - matchAndRewrite(tosa::MatMulOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const final { - Location loc = op.getLoc(); - - auto outputTy = op.getType().cast(); - auto outputElementTy = outputTy.getElementType(); - - auto firstOperandTy = op->getOperand(0).getType().cast(); - auto secondOperandTy = op->getOperand(1).getType().cast(); - - SmallVector dynDims; - dynDims.resize(op->getResult(0).getType().cast().getRank()); - - if (!firstOperandTy.hasRank() || firstOperandTy.isDynamicDim(0)) { - dynDims[0] = rewriter.create(loc, op->getOperand(0), 0); - } - - if (!firstOperandTy.hasRank() || firstOperandTy.isDynamicDim(1)) { - dynDims[1] = rewriter.create(loc, op->getOperand(0), 1); - } - - if (!secondOperandTy.hasRank() || secondOperandTy.isDynamicDim(2)) { - dynDims[2] = rewriter.create(loc, op->getOperand(1), 2); - } - - SmallVector filteredDims = filterDynamicDims(dynDims); - - auto zeroAttr = rewriter.getZeroAttr(outputElementTy); - Value zero = rewriter.create(loc, zeroAttr); - auto initTensor = rewriter.create( - loc, filteredDims, outputTy.getShape(), outputTy.getElementType()); - Value zeroTensor = - rewriter.create(loc, zero, initTensor).getResult(0); - if (!op.quantization_info()) { - rewriter.replaceOpWithNewOp( - op, TypeRange{op.getType()}, ValueRange{adaptor.a(), adaptor.b()}, - ValueRange{zeroTensor}); - return success(); - } - - auto quantizationInfo = op.quantization_info().getValue(); - auto aZp = rewriter.create( - loc, rewriter.getI32IntegerAttr( - quantizationInfo.a_zp().getValue().getSExtValue())); - auto bZp = rewriter.create( - loc, rewriter.getI32IntegerAttr( - quantizationInfo.b_zp().getValue().getSExtValue())); - rewriter.replaceOpWithNewOp( - op, TypeRange{op.getType()}, - ValueRange{adaptor.a(), adaptor.b(), aZp, bZp}, zeroTensor); - - return success(); - } -}; - -class FullyConnectedConverter - : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - LogicalResult - matchAndRewrite(tosa::FullyConnectedOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const final { - Location loc = op.getLoc(); - auto outputTy = op.getType().cast(); - auto input = op.input(); - auto inputTy = input.getType().cast(); - - auto bias = op.bias(); - - auto weight = op.weight(); - auto weightTy = weight.getType().cast(); - auto weightShape = weightTy.getShape(); - - auto outputETy = outputTy.getElementType(); - - SmallVector dynDims; - dynDims.resize(op->getResult(0).getType().cast().getRank()); - - if (!inputTy.hasRank() || inputTy.isDynamicDim(0)) { - dynDims[0] = rewriter.create(loc, input, 0); - } - - if (!weightTy.hasRank() || weightTy.isDynamicDim(0)) { - dynDims[1] = rewriter.create(loc, weight, 0); - } - - SmallVector filteredDims = filterDynamicDims(dynDims); - - // Creating maps for the output of MatMul and the bias - SmallVector indexingMaps; - - // Broadcast the bias. - indexingMaps.push_back(AffineMap::get(/*dimCount=*/2, /*symbolCount=*/0, - {rewriter.getAffineDimExpr(1)}, - rewriter.getContext())); - - indexingMaps.push_back(rewriter.getMultiDimIdentityMap(outputTy.getRank())); - indexingMaps.push_back(rewriter.getMultiDimIdentityMap(outputTy.getRank())); - - auto initTensor = rewriter.create( - loc, filteredDims, outputTy.getShape(), outputTy.getElementType()); - - // When quantized, the input elemeny type is not the same as the output - Attribute resultZeroAttr = rewriter.getZeroAttr(outputETy); - Value zero = rewriter.create(loc, resultZeroAttr); - Value zeroTensor = - rewriter.create(loc, zero, initTensor).getResult(0); - - SmallVector permutation{1, 0}; - auto permutationAttr = DenseIntElementsAttr::get( - RankedTensorType::get({2}, rewriter.getI64Type()), permutation); - Value permutationValue = - rewriter.create(loc, permutationAttr); - - SmallVector newWeightShape{weightShape[1], weightShape[0]}; - Type newWeightTy = - RankedTensorType::get(newWeightShape, weightTy.getElementType()); - - Value transposedWeight = rewriter.create( - loc, newWeightTy, weight, permutationValue); - - auto biasInitTensor = - rewriter - .create(loc, filteredDims, - outputTy.getShape(), outputETy) - ->getResults(); - - if (!op.quantization_info()) { - Value matmul = rewriter - .create( - loc, TypeRange{op.getType()}, - ValueRange{input, transposedWeight}, zeroTensor) - ->getResult(0); - - Value result = - rewriter - .create( - loc, outputTy, ValueRange({bias, matmul}), biasInitTensor, - indexingMaps, getNParallelLoopsAttrs(outputTy.getRank()), - [&](OpBuilder &nestedBuilder, Location nestedLoc, - ValueRange args) { - Value added = nestedBuilder.create( - loc, args[0], args[1]); - nestedBuilder.create(nestedLoc, added); - }) - .getResult(0); - rewriter.replaceOp(op, result); - return success(); - } - - auto quantizationInfo = op.quantization_info().getValue(); - auto inputZp = rewriter.create( - loc, rewriter.getI32IntegerAttr( - quantizationInfo.input_zp().getValue().getSExtValue())); - auto outputZp = rewriter.create( - loc, rewriter.getI32IntegerAttr( - quantizationInfo.weight_zp().getValue().getSExtValue())); - Value matmul = - rewriter - .create( - loc, TypeRange{op.getType()}, - ValueRange{input, transposedWeight, inputZp, outputZp}, - zeroTensor) - ->getResult(0); - Value result = - rewriter - .create( - loc, outputTy, ValueRange({bias, matmul}), biasInitTensor, - indexingMaps, getNParallelLoopsAttrs(outputTy.getRank()), - [&](OpBuilder &nestedBuilder, Location nestedLoc, - ValueRange args) { - Value added = nestedBuilder.create( - loc, args[0], args[1]); - nestedBuilder.create(nestedLoc, added); - }) - .getResult(0); - rewriter.replaceOp(op, result); - return success(); - } -}; - -class MaxPool2dConverter : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(tosa::MaxPool2dOp op, - PatternRewriter &rewriter) const final { - Location loc = op.getLoc(); - Value input = op.input(); - ShapedType inputTy = input.getType().cast(); - - ShapedType resultTy = op.getType().template cast(); - Type resultETy = inputTy.getElementType(); - - if (!inputTy.hasStaticShape()) - return failure(); - - // Determine what the initial value needs to be for the max pool op. - Attribute initialAttr; - if (resultETy.isF32()) - initialAttr = rewriter.getFloatAttr( - resultETy, - APFloat::getLargest(resultETy.cast().getFloatSemantics(), - true)); - - if (resultETy.isa()) - initialAttr = rewriter.getIntegerAttr( - resultETy, - APInt::getSignedMinValue(resultETy.getIntOrFloatBitWidth())); - - if (!initialAttr) - return rewriter.notifyMatchFailure( - op, "Unsupported initial value for tosa.maxpool_2d op"); - - // Apply padding as necessary. - llvm::SmallVector pad; - pad.resize(2, 0); - getValuesFromIntArrayAttribute(op.pad(), pad); - pad.resize(pad.size() + 2, 0); - Value paddedInput = applyPad(loc, input, pad, initialAttr, rewriter); - - Value initialValue = rewriter.create(loc, initialAttr); - - SmallVector kernel, stride; - getValuesFromIntArrayAttribute(op.kernel(), kernel); - getValuesFromIntArrayAttribute(op.stride(), stride); - - Attribute strideAttr = rewriter.getI64VectorAttr(stride); - Attribute dilationAttr = rewriter.getI64VectorAttr({1, 1}); - - // Create the linalg op that performs pooling. - Value initTensor = rewriter.create( - loc, resultTy.getShape(), resultTy.getElementType()); - - Value filledInitTensor = - rewriter.create(loc, initialValue, initTensor).result(); - - Value fakeWindowDims = - rewriter.create(loc, kernel, resultETy); - - rewriter.replaceOpWithNewOp( - op, ArrayRef{resultTy}, ValueRange{paddedInput, fakeWindowDims}, - filledInitTensor, strideAttr, dilationAttr); - return success(); - } -}; - -class AvgPool2dConverter : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(tosa::AvgPool2dOp op, - PatternRewriter &rewriter) const final { - Location loc = op.getLoc(); - Value input = op.input(); - ShapedType inputTy = input.getType().cast(); - Type inElementTy = inputTy.getElementType(); - - ShapedType resultTy = op.getType().template cast(); - Type resultETy = op.getType().cast().getElementType(); - - Type accETy = - inElementTy.isa() ? rewriter.getI32Type() : inElementTy; - ShapedType accTy = resultTy.clone(accETy); - - if (!inputTy.hasStaticShape()) - return failure(); - - // Apply padding as necessary. - llvm::SmallVector pad; - pad.resize(2, 0); - getValuesFromIntArrayAttribute(op.pad(), pad); - pad.resize(pad.size() + 2, 0); - Attribute padAttr = rewriter.getZeroAttr(inElementTy); - Value paddedInput = applyPad(loc, input, pad, padAttr, rewriter); - - Attribute initialAttr = rewriter.getZeroAttr(accETy); - Value initialValue = rewriter.create(loc, initialAttr); - - SmallVector kernel, stride; - getValuesFromIntArrayAttribute(op.kernel(), kernel); - getValuesFromIntArrayAttribute(op.stride(), stride); - - Attribute strideAttr = rewriter.getI64VectorAttr(stride); - Attribute dilationAttr = rewriter.getI64VectorAttr({1, 1}); - - // Create the linalg op that performs pooling. - Value poolInitTensor = - rewriter.create(loc, accTy.getShape(), accETy); - - Value filledInitTensor = - rewriter.create(loc, initialValue, poolInitTensor) - .result(); - - Value fakeWindowDims = - rewriter.create(loc, kernel, accETy); - - // Sum across the pooled region. - Value poolingOp = rewriter - .create( - loc, ArrayRef{accTy}, - ValueRange{paddedInput, fakeWindowDims}, - filledInitTensor, strideAttr, dilationAttr) - .getResult(0); - - // Normalize the summed value by the number of elements grouped in each - // pool. - auto poolingOpTy = poolingOp.getType().cast(); - auto affineMap = rewriter.getMultiDimIdentityMap(resultTy.getRank()); - - Value genericInitTensor = rewriter.create( - loc, resultTy.getShape(), resultETy); - - auto genericOp = rewriter.create( - loc, ArrayRef({resultTy}), ValueRange{poolingOp}, - ValueRange{genericInitTensor}, - ArrayRef({affineMap, affineMap}), - getNParallelLoopsAttrs(resultTy.getRank()), - [&](OpBuilder &b, Location loc, ValueRange args) { - auto zero = rewriter.create(loc, 0); - auto one = rewriter.create(loc, 1); - auto iH = rewriter.create( - loc, poolingOpTy.getDimSize(1) - 1); - auto iW = rewriter.create( - loc, poolingOpTy.getDimSize(2) - 1); - - // Compute the indices from either end. - auto y0 = rewriter.create(loc, 1); - auto x0 = rewriter.create(loc, 2); - auto y1 = rewriter.create(loc, iH, y0); - auto x1 = rewriter.create(loc, iW, x0); - - // Determines what the portion of valid input is covered by the - // kernel. - auto padFn = [&](Value v, Value x, int64_t pad) -> Value { - if (pad == 0) - return v; - - auto padVal = rewriter.create(loc, pad); - Value dx = rewriter.create(loc, x, padVal); - - Value cmp = rewriter.create( - loc, arith::CmpIPredicate::slt, dx, zero); - Value offset = rewriter.create(loc, cmp, dx, zero); - return rewriter.create(loc, v, offset)->getResult(0); - }; - - // Compute the vertical component of coverage. - auto kH0 = rewriter.create(loc, kernel[0]); - auto kH1 = padFn(kH0, y0, pad[2]); - auto kH2 = padFn(kH1, y1, pad[3]); - auto kHCmp = rewriter.create( - loc, arith::CmpIPredicate::slt, kH2, one); - auto kH3 = rewriter.create(loc, kHCmp, one, kH2); - - // compute the horizontal component of coverage. - auto kW0 = rewriter.create(loc, kernel[1]); - auto kW1 = padFn(kW0, x0, pad[4]); - auto kW2 = padFn(kW1, x1, pad[5]); - auto kWCmp = rewriter.create( - loc, arith::CmpIPredicate::slt, kW2, one); - auto kW3 = rewriter.create(loc, kWCmp, one, kW2); - - // Compute the total number of elements and normalize. - Value count = rewriter.create(loc, kH3, kW3); - auto countI = rewriter.create( - loc, rewriter.getI32Type(), count); - - // Divide by the number of summed values. For floats this is just - // a div however for quantized values input normalization had - // to be applied. - Value poolVal = args[0]; - if (accETy.isa()) { - auto countF = rewriter.create(loc, accETy, countI); - poolVal = rewriter.create(loc, poolVal, countF) - ->getResult(0); - } else { - - // If we have quantization information we need to apply an offset - // for the input zp value. - if (op.quantization_info()) { - auto quantizationInfo = op.quantization_info().getValue(); - auto inputZp = rewriter.create( - loc, quantizationInfo.input_zp()); - Value offset = - rewriter.create(loc, accETy, countI, inputZp); - poolVal = - rewriter.create(loc, accETy, poolVal, offset); - } - - // Compute the multiplier and shift values for the quantization - // normalization. Preferably we would want to compute more bits - // however 32-bits should be enough for compute. Honestly we - // should probably straight divide. - int64_t numerator = ((1 << 30) + 1); - int64_t shift = 30; - - Value numeratorVal = rewriter.create( - loc, rewriter.getI32IntegerAttr(numerator)); - Value multiplierVal = - rewriter - .create(loc, rewriter.getI32Type(), - numeratorVal, countI) - .getResult(); - Value shiftVal = rewriter.create( - loc, rewriter.getI8IntegerAttr(shift)); - - auto scaled = - rewriter - .create( - loc, rewriter.getI32Type(), poolVal, multiplierVal, - shiftVal, rewriter.getBoolAttr(false)) - .getResult(); - - // If we have quantization information we need to apply output - // zeropoint. - if (op.quantization_info()) { - auto quantizationInfo = op.quantization_info().getValue(); - auto outputZp = rewriter.create( - loc, quantizationInfo.output_zp()); - scaled = rewriter.create(loc, scaled, outputZp) - .getResult(); - } - - // Apply Clip. - int64_t outBitwidth = resultETy.getIntOrFloatBitWidth(); - - auto min = rewriter.create( - loc, APInt::getSignedMinValue(outBitwidth).getSExtValue(), - accETy); - auto max = rewriter.create( - loc, APInt::getSignedMaxValue(outBitwidth).getSExtValue(), - accETy); - auto clamp = clampHelper( - loc, scaled, min, max, arith::CmpIPredicate::slt, rewriter); - - poolVal = clamp; - // Convert type. - if (resultETy != clamp.getType()) { - poolVal = - rewriter.create(loc, resultETy, poolVal); - } - } - - rewriter.create(loc, poolVal); - }); - - rewriter.replaceOp(op, genericOp.getResult(0)); - return success(); - } -}; - -} // namespace - -void mlir::tosa::populateTosaToLinalgNamedConversionPatterns( - RewritePatternSet *patterns) { - patterns->add< - // clang-format off - ConvConverter, - DepthwiseConvConverter, - MatMulConverter, - MaxPool2dConverter, - AvgPool2dConverter, - FullyConnectedConverter>(patterns->getContext()); - // clang-format on -} diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamedPass.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamedPass.cpp deleted file mode 100644 index f5f6ac1a5469..000000000000 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamedPass.cpp +++ /dev/null @@ -1,68 +0,0 @@ -//===- TosaToLinalgPass.cpp - Lowering Tosa to Linalg Dialect -------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This transformation pass legalizes Tosa operations to the Linalg dialect. -// -//===----------------------------------------------------------------------===// - -#include "../PassDetail.h" -#include "mlir/Conversion/TosaToLinalg/TosaToLinalg.h" -#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" -#include "mlir/Dialect/Linalg/IR/Linalg.h" -#include "mlir/Dialect/Math/IR/Math.h" -#include "mlir/Dialect/SCF/SCF.h" -#include "mlir/Dialect/StandardOps/IR/Ops.h" -#include "mlir/Dialect/Tensor/IR/Tensor.h" -#include "mlir/Dialect/Tosa/IR/TosaOps.h" -#include "mlir/Dialect/Tosa/Transforms/PassDetail.h" -#include "mlir/Dialect/Tosa/Transforms/Passes.h" -#include "mlir/Dialect/Tosa/Utils/QuantUtils.h" -#include "mlir/IR/PatternMatch.h" -#include "mlir/Pass/PassManager.h" -#include "mlir/Transforms/DialectConversion.h" -#include "mlir/Transforms/GreedyPatternRewriteDriver.h" - -using namespace mlir; - -namespace { -struct TosaToLinalgNamed : public TosaToLinalgNamedBase { -public: - void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); - } - - void runOnFunction() override { - RewritePatternSet patterns(&getContext()); - ConversionTarget target(getContext()); - target.addLegalDialect(); - - // Not every TOSA op can be legalized to linalg. - target.addIllegalOp(); - target.addIllegalOp(); - target.addIllegalOp(); - target.addIllegalOp(); - target.addIllegalOp(); - target.addIllegalOp(); - - target.markUnknownOpDynamicallyLegal([](Operation *) { return true; }); - - FuncOp func = getFunction(); - mlir::tosa::populateTosaToLinalgNamedConversionPatterns(&patterns); - if (failed(applyFullConversion(func, target, std::move(patterns)))) - signalPassFailure(); - } -}; -} // namespace - -std::unique_ptr mlir::tosa::createTosaToLinalgNamed() { - return std::make_unique(); -} diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgPass.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgPass.cpp index 3813ba345137..8f4f872c8860 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgPass.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgPass.cpp @@ -26,7 +26,6 @@ #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" -#include "mlir/Transforms/Passes.h" using namespace mlir; @@ -68,9 +67,6 @@ std::unique_ptr mlir::tosa::createTosaToLinalg() { } void mlir::tosa::addTosaToLinalgPasses(OpPassManager &pm) { - pm.addNestedPass(createTosaMakeBroadcastablePass()); - pm.addNestedPass(createTosaToLinalgNamed()); - pm.addNestedPass(mlir::createCanonicalizerPass()); pm.addNestedPass(createTosaMakeBroadcastablePass()); pm.addNestedPass(createTosaToLinalg()); } diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir deleted file mode 100644 index f5814883cc49..000000000000 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir +++ /dev/null @@ -1,448 +0,0 @@ -// RUN: mlir-opt --split-input-file --tosa-to-linalg-named %s -verify-diagnostics -o -| FileCheck %s - -// CHECK-LABEL: @matmul -func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) { - // CHECK: [[C0:%.+]] = arith.constant 0 - // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6] - // CHECK: [[FILLED:%.+]] = linalg.fill([[C0]], [[INIT]]) : f32, tensor<1x5x6xf32> -> tensor<1x5x6xf32> - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) outs([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> - %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) - return %0 : tensor<1x5x6xf32> -} - -// ----- - - -// CHECK-LABEL: @matmul_quantized -func @matmul_quantized(%arg0: tensor<1x5x3xi8>, %arg1: tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) { - // CHECK: [[C0:%.+]] = arith.constant 0 - // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6] - // CHECK: [[FILLED:%.+]] = linalg.fill([[C0]], [[INIT]]) : i32, tensor<1x5x6xi32> -> tensor<1x5x6xi32> - // CHECK: [[ONE:%.+]] = arith.constant 1 - // CHECK: [[TWO:%.+]] = arith.constant 2 - // CHECK: linalg.quantized_batch_matmul ins(%arg0, %arg1, [[ONE]], [[TWO]] : tensor<1x5x3xi8>, tensor<1x3x6xi8>, i32, i32) outs([[FILLED]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32> - %0 = "tosa.matmul"(%arg0, %arg1) {quantization_info = {a_zp = 1 : i32, b_zp = 2 : i32}} : (tensor<1x5x3xi8>, tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) - return %0 : tensor<1x5x6xi32> -} - -// ----- - -// CHECK-LABEL: @matmul_dyn_batch -func @matmul_dyn_batch(%arg0: tensor, %arg1: tensor) -> (tensor) { - // CHECK: %[[C0:.+]] = arith.constant 0 - // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]] - // CHECK: %[[C0_0:.+]] = arith.constant 0 - // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DIM]], 5, 6] - // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0_0]], %[[INIT]]) : f32, tensor -> tensor - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor, tensor) outs(%[[FILLED]] : tensor) -> tensor - %0 = "tosa.matmul"(%arg0, %arg1) : (tensor, tensor) -> (tensor) - return %0 : tensor -} - -// ----- - -// CHECK-LABEL: @matmul_dyn_independent_dim -func @matmul_dyn_independent_dim(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x?xf32>) -> (tensor<1x5x?xf32>) { - // CHECK: %[[C2:.+]] = arith.constant 2 - // CHECK: %[[DIM:.+]] = tensor.dim %arg1, %[[C2]] - // CHECK: %[[C0:.+]] = arith.constant 0 - // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 5, %[[DIM]]] - // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0]], %[[INIT]]) : f32, tensor<1x5x?xf32> -> tensor<1x5x?xf32> - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x?xf32>) outs(%[[FILLED]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32> - %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x?xf32>) -> (tensor<1x5x?xf32>) - return %0 : tensor<1x5x?xf32> -} - -// ----- - -// CHECK-LABEL: @matmul_dyn_independent_dim -func @matmul_dyn_independent_dim(%arg0: tensor<1x5x?xf32>, %arg1: tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) { - // CHECK: %[[C0:.+]] = arith.constant 0 - // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 5, 6] - // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0]], %[[INIT]]) : f32, tensor<1x5x6xf32> -> tensor<1x5x6xf32> - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) outs(%[[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> - %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x?xf32>, tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) - return %0 : tensor<1x5x6xf32> -} - -// ----- - -// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)> -// CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0, d1)> - -// CHECK-LABEL: @fully_connected -func @fully_connected(%arg0: tensor<5x3xf32>, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor<5x6xf32>) { - // CHECK: [[INITT:%.+]] = linalg.init_tensor [5, 6] - // CHECK: [[ZERO:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill([[ZERO]], [[INITT]]) - // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]> - // CHECK: [[TRANSPOSE:%.+]] = "tosa.transpose"(%arg1, [[PERM]]) - // CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6] - // CHECK: [[MATMUL:%.+]] = linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) outs([[FILL]] : tensor<5x6xf32>) -> tensor<5x6xf32> - // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xf32>, tensor<5x6xf32>) outs([[INITB]] : tensor<5x6xf32>) { - // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): - // CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32 - // CHECK: linalg.yield [[ADD]] : f32 - - %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) : (tensor<5x3xf32>, tensor<6x3xf32>, tensor<6xf32>) -> (tensor<5x6xf32>) - return %0 : tensor<5x6xf32> -} - -// ----- - -// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)> -// CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0, d1)> - -// CHECK-LABEL: @quantized_fully_connected -func @quantized_fully_connected(%arg0: tensor<5x3xi8>, %arg1: tensor<6x3xi8>, %arg2: tensor<6xi32>) -> (tensor<5x6xi32>) { - // CHECK: [[INITT:%.+]] = linalg.init_tensor [5, 6] - // CHECK: [[ZERO:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill([[ZERO]], [[INITT]]) - // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]> - // CHECK: [[TRANSPOSE:%.+]] = "tosa.transpose"(%arg1, [[PERM]]) - // CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6] - // CHECK: [[ONE:%.+]] = arith.constant 1 - // CHECK: [[TWO:%.+]] = arith.constant 2 - // CHECK: [[MATMUL:%.+]] = linalg.quantized_matmul ins(%arg0, [[TRANSPOSE]], [[ONE]], [[TWO]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) outs([[FILL]] : tensor<5x6xi32>) -> tensor<5x6xi32> - // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xi32>, tensor<5x6xi32>) outs([[INITB]] - // CHECK: ^bb0([[IN1:%.+]]: i32, [[IN2:%.+]]: i32, [[UNUSED:%.+]]: i32): - // CHECK: [[ADD:%.+]] = arith.addi - // CHECK: linalg.yield [[ADD]] : i32 - %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) {quantization_info = {input_zp = 1:i32, weight_zp = 2:i32}} : (tensor<5x3xi8>, tensor<6x3xi8>, tensor<6xi32>) -> (tensor<5x6xi32>) - return %0 : tensor<5x6xi32> -} - -// ----- - -// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d1)> -// CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0, d1)> - -// CHECK-LABEL: @fully_connected_dyn -func @fully_connected_dyn(%arg0: tensor, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor) { - // CHECK: %[[C0:.+]] = arith.constant 0 - // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]] - // CHECK: %[[INITT:.+]] = linalg.init_tensor [%[[DIM]], 6] - // CHECK: %[[ZERO:.+]] = arith.constant 0 - // CHECK: %[[FILL:.+]] = linalg.fill(%[[ZERO]], %[[INITT]]) - // CHECK: %[[PERM:.+]] = arith.constant dense<[1, 0]> - // CHECK: %[[TRANSPOSE:.+]] = "tosa.transpose"(%arg1, %[[PERM]]) - // CHECK: %[[INITB:.+]] = linalg.init_tensor [%[[DIM]], 6] - // CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%arg0, %[[TRANSPOSE]] : tensor, tensor<3x6xf32>) outs(%[[FILL]] : tensor) -> tensor - // CHECK: %[[ADDED:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, %[[MATMUL]] : tensor<6xf32>, tensor) outs(%[[INITB]] : tensor) { - // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): - // CHECK: %[[ADD:.+]] = arith.addf %arg3, %arg4 : f32 - // CHECK: linalg.yield %[[ADD]] : f32 - - %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) : (tensor, tensor<6x3xf32>, tensor<6xf32>) -> (tensor) - return %0 : tensor -} - -// ----- - -// CHECK-LABEL: @max_pool -func @max_pool(%arg0: tensor<1x6x34x62xf32>) -> () { - // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38 - // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 32, 62] - // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[CONST]], [[INIT]]) - // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3] - // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, [[KERNEL]] : tensor<1x6x34x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x32x62xf32>) - %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x4x32x62xf32>) - return -} - -// CHECK-LABEL: @max_pool_padded -func @max_pool_padded(%arg0: tensor<1x6x34x62xf32>) -> () { - // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38 : f32 - // CHECK-DAG: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 0, 0, 0] high[0, 0, 1, 0] - // CHECK-DAG: linalg.yield [[CONST]] - // CHECK-DAG: [[INITVAL:%.+]] = arith.constant -3.40282347E+38 : f32 - // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 33, 62] - // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[INITVAL]], [[INIT]]) - // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3] - // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x6x35x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x33x62xf32>) - %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 1], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x4x33x62xf32>) - return -} - -// CHECK-LABEL: @max_pool_i8 -func @max_pool_i8(%arg0: tensor<1x6x34x62xi8>) -> () { - // CHECK: arith.constant -128 - // CHECK: linalg.pooling_nhwc_max - %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi8>) -> (tensor<1x4x32x62xi8>) - return -} - -// CHECK-LABEL: @max_pool_i16 -func @max_pool_i16(%arg0: tensor<1x6x34x62xi16>) -> () { - // CHECK: arith.constant -32768 - // CHECK: linalg.pooling_nhwc_max - %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi16>) -> (tensor<1x4x32x62xi16>) - return -} - -// CHECK-LABEL: @max_pool_i32 -func @max_pool_i32(%arg0: tensor<1x6x34x62xi32>) -> () { - // CHECK: arith.constant -2147483648 - // CHECK: linalg.pooling_nhwc_max - %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi32>) -> (tensor<1x4x32x62xi32>) - return -} -// ----- - -// CHECK-LABEL: @avg_pool -func @avg_pool(%arg0: tensor<1x6x34x62xf32>) -> (tensor<1x5x33x62xf32>) { - // Initial piece computes the sum of the pooling region, with appropriate padding. - // CHECK: [[CONST:%.+]] = arith.constant 0 - // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] - // CHECK: [[CONST:%.+]] = arith.constant 0 - // CHECK: [[POOLINIT:%.+]] = linalg.init_tensor [1, 5, 33, 62] - // CHECK: [[FILL:%.+]] = linalg.fill([[CONST]], [[POOLINIT]]) - // CHECK: [[KERNEL:%.+]] = linalg.init_tensor [4, 4] - // CHECK: [[POOL:%.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x8x36x62xf32>, tensor<4x4xf32>) outs([[FILL]] : tensor<1x5x33x62xf32>) - // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 33, 62] - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins([[POOL]] : tensor<1x5x33x62xf32>) outs([[INIT]] : tensor<1x5x33x62xf32>) - // CHECK: [[ZERO:%.0]] = arith.constant 0 - // CHECK: [[ONE:%.+]] = arith.constant 1 - // CHECK: [[HEIGHT:%.+]] = arith.constant 4 - // CHECK: [[WIDTH:%.+]] = arith.constant 32 - // CHECK: [[IDX1:%.+]] = linalg.index 1 - // CHECK: [[IDX2:%.+]] = linalg.index 2 - - // The large block below computes what portion of the kernel is within non-padded input. - // CHECK: [[NY:%.+]] = arith.subi [[HEIGHT]], [[IDX1]] - // CHECK: [[NX:%.+]] = arith.subi [[WIDTH]], [[IDX2]] - // CHECK: [[KH:%.+]] = arith.constant 4 - // CHECK: [[PAD0:%.+]] = arith.constant 1 - // CHECK: [[SUBP0:%.+]] = arith.subi [[IDX1]], [[PAD0]] - // CHECK: [[P0CMP:%.+]] = arith.cmpi slt, [[SUBP0]], [[ZERO]] - // CHECK: [[SELP0:%.+]] = select [[P0CMP]], [[SUBP0]], [[ZERO]] - // CHECK: [[ADDP0:%.+]] = arith.addi [[KH]], [[SELP0]] - // CHECK: [[PAD1:%.+]] = arith.constant 1 - // CHECK: [[SUBP1:%.+]] = arith.subi [[NY]], [[PAD1]] - // CHECK: [[P1CMP:%.+]] = arith.cmpi slt, [[SUBP1]], [[ZERO]] - // CHECK: [[SELP1:%.+]] = select [[P1CMP]], [[SUBP1]], [[ZERO]] - // CHECK: [[ADDP1:%.+]] = arith.addi [[ADDP0]], [[SELP1]] - // CHECK: [[YCMP:%.+]] = arith.cmpi slt, [[ADDP1]], [[ONE]] - // CHECK: [[YSEL:%.+]] = select [[YCMP]], [[ONE]], [[ADDP1]] - // CHECK: [[KW:%.+]] = arith.constant 4 : index - // CHECK: [[PAD2:%.+]] = arith.constant 1 : index - // CHECK: [[SUBP2:%.+]] = arith.subi [[IDX2]], [[PAD2]] - // CHECK: [[P2CMP:%.+]] = arith.cmpi slt, [[SUBP2]], [[ZERO]] - // CHECK: [[SELP2:%.+]] = select [[P2CMP]], [[SUBP2]], [[ZERO]] - // CHECK: [[ADDP2:%.+]] = arith.addi [[KW]], [[SELP2]] - // CHECK: [[PAD3:%.+]] = arith.constant 1 : index - // CHECK: [[SUBP3:%.+]] = arith.subi [[NX]], [[PAD3]] - // CHECK: [[P3CMP:%.+]] = arith.cmpi slt, [[SUBP3]], [[ZERO]] - // CHECK: [[SELP3:%.+]] = select [[P3CMP]], [[SUBP3]], [[ZERO]] - // CHECK: [[ADDP3:%.+]] = arith.addi [[ADDP2]], [[SELP3]] - // CHECK: [[XCMP:%.+]] = arith.cmpi slt, [[ADDP3]], [[ONE]] - // CHECK: [[XSEL:%.+]] = select [[XCMP]], [[ONE]], [[ADDP3]] - - // Given the valid coverage of the pooling region, normalize the summation. - // CHECK: [[C:%.+]] = arith.muli [[YSEL]], [[XSEL]] - // CHECK: [[CI:%.+]] = arith.index_cast [[C]] - // CHECK: [[CF:%.+]] = arith.sitofp [[CI]] - // CHECK: [[RESULT:%.+]] = arith.divf %arg1, [[CF]] - // CHECK: linalg.yield [[RESULT]] - %0 = "tosa.avg_pool2d"(%arg0) {pad = [1, 1, 1, 1], kernel = [4, 4], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x5x33x62xf32>) - return %0 : tensor<1x5x33x62xf32> -} - -// ----- - -// CHECK-LABEL: @avg_pool_i8 -func @avg_pool_i8(%arg0 : tensor<1x128x128x2xi8>) -> () { - - // CHECK: linalg.pooling_nhwc_sum - // CHECK: linalg.generic - - // CHECK: %[[INZP:.+]] = arith.constant -128 - // CHECK: %[[INZP_OFF:.+]] = arith.muli %{{.+}}, %[[INZP]] - // CHECK: %[[OFFSETED:.+]] = arith.subi %arg1, %[[INZP_OFF]] - // CHECK: %[[NUMERATOR:.+]] = arith.constant 1073741825 - // CHECK: %[[MULTIPLIER:.+]] = arith.divui %[[NUMERATOR]], %{{.+}} - // CHECK: %[[SHIFT:.+]] = arith.constant 30 - // CHECK: %[[SCALE:.+]] = "tosa.apply_scale"(%{{.+}}, %[[MULTIPLIER]], %[[SHIFT]]) {double_round = false} - // CHECK: %[[OUTZP:.+]] = arith.constant -128 - // CHECK: %[[OUT:.+]] = arith.addi %[[SCALE]], %[[OUTZP]] - // CHECK: %[[MIN:.+]] = arith.constant -128 - // CHECK: %[[MAX:.+]] = arith.constant 127 - // CHECK: %[[CMP_MIN:.+]] = arith.cmpi slt, %[[OUT]], %[[MIN]] - // CHECK: %[[CLMP_MIN:.+]] = select %[[CMP_MIN]], %[[MIN]], %[[OUT]] - // CHECK: %[[CMP_MAX:.+]] = arith.cmpi slt, %[[MAX]], %[[OUT]] - // CHECK: %[[CLMP_MAX:.+]] = select %[[CMP_MAX]], %[[MAX]], %[[CLMP_MIN]] - // CHECK: %[[TRUNC:.+]] = arith.trunci %[[CLMP_MAX]] - // CHECK: linalg.yield %[[TRUNC]] - %0 = "tosa.avg_pool2d"(%arg0) {kernel = [4, 4], pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, output_zp = -128 : i32}, stride = [4, 4]} : (tensor<1x128x128x2xi8>) -> tensor<1x32x32x2xi8> - return -} - -// ----- - -// CHECK-LABEL: @avg_pool_i16 -func @avg_pool_i16(%arg0 : tensor<1x128x128x2xi16>) -> () { - - // CHECK: linalg.pooling_nhwc_sum - // CHECK: linalg.generic - - // CHECK: %[[INZP:.+]] = arith.constant -128 - // CHECK: %[[INZP_OFF:.+]] = arith.muli %{{.+}}, %[[INZP]] - // CHECK: %[[OFFSETED:.+]] = arith.subi %arg1, %[[INZP_OFF]] - // CHECK: %[[NUMERATOR:.+]] = arith.constant 1073741825 - // CHECK: %[[MULTIPLIER:.+]] = arith.divui %[[NUMERATOR]], %{{.+}} - // CHECK: %[[SHIFT:.+]] = arith.constant 30 - // CHECK: %[[SCALE:.+]] = "tosa.apply_scale"(%{{.+}}, %[[MULTIPLIER]], %[[SHIFT]]) {double_round = false} - // CHECK: %[[OUTZP:.+]] = arith.constant -128 - // CHECK: %[[OUT:.+]] = arith.addi %[[SCALE]], %[[OUTZP]] - // CHECK: %[[MIN:.+]] = arith.constant -32768 - // CHECK: %[[MAX:.+]] = arith.constant 32767 - // CHECK: %[[CMP_MIN:.+]] = arith.cmpi slt, %[[OUT]], %[[MIN]] - // CHECK: %[[CLMP_MIN:.+]] = select %[[CMP_MIN]], %[[MIN]], %[[OUT]] - // CHECK: %[[CMP_MAX:.+]] = arith.cmpi slt, %[[MAX]], %[[OUT]] - // CHECK: %[[CLMP_MAX:.+]] = select %[[CMP_MAX]], %[[MAX]], %[[CLMP_MIN]] - // CHECK: %[[TRUNC:.+]] = arith.trunci %[[CLMP_MAX]] - // CHECK: linalg.yield %[[TRUNC]] - %0 = "tosa.avg_pool2d"(%arg0) {kernel = [4, 4], pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, output_zp = -128 : i32}, stride = [4, 4]} : (tensor<1x128x128x2xi16>) -> tensor<1x32x32x2xi16> - return -} - -// ----- - -// CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d3)> -// CHECK: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> - -// CHECK-LABEL: @conv2d_f32 -func @conv2d_f32(%input: tensor<1x49x42x27xf32>, %weights: tensor<28x3x3x27xf32>, %bias: tensor<28xf32>) -> () { - // CHECK: %[[PERM:.+]] = arith.constant dense<[1, 2, 3, 0]> - // CHECK: %[[W:.+]] = "tosa.transpose"(%arg1, %[[PERM]]) - // CHECK: %[[M_IN:.+]] = linalg.init_tensor [1, 45, 40, 28] - // CHECK: %[[CST:.+]] = arith.constant 0 - // CHECK: %[[FILL:.+]] = linalg.fill - // CHECK: %[[B_IN:.+]] = linalg.init_tensor [1, 45, 40, 28] - // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<1x49x42x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<1x45x40x28xf32>) - // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x45x40x28xf32>) outs(%[[B_IN]] : tensor<1x45x40x28xf32>) - // CHECK: arith.addf - // CHECK: linalg.yield - %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [0, 0, 0, 0], stride = [1, 1], dilation = [2, 1]} : (tensor<1x49x42x27xf32>, tensor<28x3x3x27xf32>, tensor<28xf32>) -> (tensor<1x45x40x28xf32>) - return -} - -// ----- - -// CHECK-LABEL: @conv2d_padded_f32 -func @conv2d_padded_f32(%input: tensor<1x47x40x28xf32>, %weights: tensor<28x3x3x28xf32>, %bias: tensor<28xf32>) -> () { - // CHECK: %[[C0:.+]] = arith.constant 0 - // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] - // CHECK: linalg.yield %[[C0]] - // CHECK: linalg.conv_2d_nhwc_hwcf - %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [1, 1, 1, 1], stride = [1, 1], dilation = [2, 1]} : (tensor<1x47x40x28xf32>, tensor<28x3x3x28xf32>, tensor<28xf32>) -> (tensor<1x45x40x28xf32>) - return -} - -// ----- - -// CHECK-LABEL: @conv2d_quant -func @conv2d_quant(%arg0 : tensor<1x12x12x1xi8>, %arg1 : tensor<1024x3x3x1xi8>, %arg2 : tensor<1024xi32>) -> () { - // CHECK: %[[C22:.+]] = arith.constant -22 - // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] - // CHECK: linalg.yield %[[C22]] - // CHECK: linalg.conv_2d_nhwc_hwcf_q - %0 = "tosa.conv2d"(%arg0, %arg1, %arg2) {dilation = [1, 1], pad = [1, 1, 1, 1], quantization_info = {input_zp = -22 : i32, weight_zp = 42 : i32}, stride = [1, 1]} : (tensor<1x12x12x1xi8>, tensor<1024x3x3x1xi8>, tensor<1024xi32>) -> tensor<1x12x12x1024xi32> - return -} - -// ----- - -// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> -// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> - -// CHECK-LABEL: @depthwise_conv -func @depthwise_conv(%arg0 : tensor<1x7x5x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () { - // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 5, 3, 11] - // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) - // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33] - // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>) - // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 5, 5, 33]} - // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) { - // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors - // CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32 - // CHECK: linalg.yield [[ADD]] : f32 - // CHECK: } -> tensor<1x5x5x33xf32> - %2 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) { pad = [0, 0, 0, 0], stride = [1, 1], dilation = [1, 1] } : (tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>, tensor<33xf32>) -> (tensor<1x5x5x33xf32>) - return -} - -// ----- - -// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> -// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> - -// CHECK-LABEL: @depthwise_conv_strides -func @depthwise_conv_strides(%arg0 : tensor<1x11x9x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () { - // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 5, 3, 11] - // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) - // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33] - // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>) - // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 5, 5, 33]} - // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) { - // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors - // CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32 - // CHECK: linalg.yield [[ADD]] : f32 - // CHECK: } -> tensor<1x5x5x33xf32> - %2 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) { pad = [0, 0, 0, 0], stride = [2, 2], dilation = [1, 1] } : (tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>, tensor<33xf32>) -> (tensor<1x5x5x33xf32>) - return -} - -// ----- - -// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> -// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> - -// CHECK-LABEL: @depthwise_conv_quant -func @depthwise_conv_quant(%arg0 : tensor<1x12x12x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () { - // CHECK: [[PADV:%.+]] = arith.constant -128 - // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] - // CHECK: linalg.yield [[PADV]] - - // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 12, 12, 4, 128] - // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) - // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 12, 12, 512] - // CHECK: [[C128:%.+]] = arith.constant -128 - // CHECK: [[C42:%.+]] = arith.constant 42 - // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x12x12x4x128xi32>) - // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 12, 12, 512]} - // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x12x12x512xi32>) outs([[OUT]] : tensor<1x12x12x512xi32>) { - // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32): // no predecessors - // CHECK: [[ADD:%.+]] = arith.addi %arg3, %arg4 : i32 - // CHECK: linalg.yield [[ADD]] : i32 - // CHECK: } -> tensor<1x12x12x512xi32> - %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = [1, 1, 1, 1], quantization_info = {input_zp = -128 : i32, weight_zp = 42 : i32}, stride = [1, 1], dilation = [1, 1] } : (tensor<1x12x12x4xi8>, tensor<3x3x4x128xi8>, tensor<512xi32>) -> tensor<1x12x12x512xi32> - return -} - -// ----- - -// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> -// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> - -// CHECK-LABEL: @depthwise_conv_quant_dilations -func @depthwise_conv_quant_dilations(%arg0 : tensor<1x14x14x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () { - // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 10, 10, 4, 128] - // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) - // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 10, 10, 512] - // CHECK: [[C128:%.+]] = arith.constant -128 - // CHECK: [[C42:%.+]] = arith.constant 42 - // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x10x10x4x128xi32>) - // CHECK: [[COLLAPSED:%.+]] = "tosa.reshape"([[DEPTH]]) {new_shape = [1, 10, 10, 512]} - // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x10x10x512xi32>) outs([[OUT]] : tensor<1x10x10x512xi32>) { - // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32): // no predecessors - // CHECK: [[ADD:%.+]] = arith.addi %arg3, %arg4 : i32 - // CHECK: linalg.yield [[ADD]] : i32 - // CHECK: } -> tensor<1x10x10x512xi32> - %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, weight_zp = 42 : i32}, stride = [1, 1], dilation = [2, 2] } : (tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, tensor<512xi32>) -> tensor<1x10x10x512xi32> - return -} diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir index e68e76c67ef9..f8dc2e0bbd08 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir @@ -1064,6 +1064,154 @@ func @tile(%arg0 : tensor<2x3xi8>) -> () { // ----- + +// CHECK-LABEL: @matmul +func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) { + // CHECK: [[C0:%.+]] = arith.constant 0 + // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6] + // CHECK: [[FILLED:%.+]] = linalg.fill([[C0]], [[INIT]]) : f32, tensor<1x5x6xf32> -> tensor<1x5x6xf32> + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) outs([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> + %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) + return %0 : tensor<1x5x6xf32> +} + +// ----- + + +// CHECK-LABEL: @matmul_quantized +func @matmul_quantized(%arg0: tensor<1x5x3xi8>, %arg1: tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) { + // CHECK: [[C0:%.+]] = arith.constant 0 + // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 6] + // CHECK: [[FILLED:%.+]] = linalg.fill([[C0]], [[INIT]]) : i32, tensor<1x5x6xi32> -> tensor<1x5x6xi32> + // CHECK: [[ONE:%.+]] = arith.constant 1 + // CHECK: [[TWO:%.+]] = arith.constant 2 + // CHECK: linalg.quantized_batch_matmul ins(%arg0, %arg1, [[ONE]], [[TWO]] : tensor<1x5x3xi8>, tensor<1x3x6xi8>, i32, i32) outs([[FILLED]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32> + %0 = "tosa.matmul"(%arg0, %arg1) {quantization_info = {a_zp = 1 : i32, b_zp = 2 : i32}} : (tensor<1x5x3xi8>, tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) + return %0 : tensor<1x5x6xi32> +} + +// ----- + +// CHECK-LABEL: @matmul_dyn_batch +func @matmul_dyn_batch(%arg0: tensor, %arg1: tensor) -> (tensor) { + // CHECK: %[[C0:.+]] = arith.constant 0 + // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]] + // CHECK: %[[C0_0:.+]] = arith.constant 0 + // CHECK: %[[INIT:.+]] = linalg.init_tensor [%[[DIM]], 5, 6] + // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0_0]], %[[INIT]]) : f32, tensor -> tensor + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor, tensor) outs(%[[FILLED]] : tensor) -> tensor + %0 = "tosa.matmul"(%arg0, %arg1) : (tensor, tensor) -> (tensor) + return %0 : tensor +} + +// ----- + +// CHECK-LABEL: @matmul_dyn_independent_dim +func @matmul_dyn_independent_dim(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x?xf32>) -> (tensor<1x5x?xf32>) { + // CHECK: %[[C2:.+]] = arith.constant 2 + // CHECK: %[[DIM:.+]] = tensor.dim %arg1, %[[C2]] + // CHECK: %[[C0:.+]] = arith.constant 0 + // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 5, %[[DIM]]] + // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0]], %[[INIT]]) : f32, tensor<1x5x?xf32> -> tensor<1x5x?xf32> + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x?xf32>) outs(%[[FILLED]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32> + %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x?xf32>) -> (tensor<1x5x?xf32>) + return %0 : tensor<1x5x?xf32> +} + +// ----- + +// CHECK-LABEL: @matmul_dyn_independent_dim +func @matmul_dyn_independent_dim(%arg0: tensor<1x5x?xf32>, %arg1: tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) { + // CHECK: %[[C0:.+]] = arith.constant 0 + // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 5, 6] + // CHECK: %[[FILLED:.+]] = linalg.fill(%[[C0]], %[[INIT]]) : f32, tensor<1x5x6xf32> -> tensor<1x5x6xf32> + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) outs(%[[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> + %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x?xf32>, tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) + return %0 : tensor<1x5x6xf32> +} + +// ----- + +// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d1, d0)> +// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0, d1)> +// CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d1)> + +// CHECK-LABEL: @fully_connected +func @fully_connected(%arg0: tensor<5x3xf32>, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor<5x6xf32>) { + // CHECK: [[INITT:%.+]] = linalg.init_tensor [5, 6] + // CHECK: [[ZERO:%.+]] = arith.constant 0 + // CHECK: [[FILL:%.+]] = linalg.fill([[ZERO]], [[INITT]]) + // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]> + // CHECK: [[INITT:%.+]] = linalg.init_tensor [3, 6] + // CHECK: [[TRANSPOSE:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg1 : tensor<6x3xf32>) outs([[INITT]] : tensor<3x6xf32>) { + // CHECK: ^bb0([[IN:%.+]]: f32, [[UNUSED:%.+]]: f32): + // CHECK: linalg.yield [[IN]] : f32 + // CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6] + // CHECK: [[MATMUL:%.+]] = linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) outs([[FILL]] : tensor<5x6xf32>) -> tensor<5x6xf32> + // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xf32>, tensor<5x6xf32>) outs([[INITB]] : tensor<5x6xf32>) { + // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): + // CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32 + // CHECK: linalg.yield [[ADD]] : f32 + + %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) : (tensor<5x3xf32>, tensor<6x3xf32>, tensor<6xf32>) -> (tensor<5x6xf32>) + return %0 : tensor<5x6xf32> +} + +// ----- + +// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d1, d0)> +// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0, d1)> +// CHECK: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d1)> + +// CHECK-LABEL: @quantized_fully_connected +func @quantized_fully_connected(%arg0: tensor<5x3xi8>, %arg1: tensor<6x3xi8>, %arg2: tensor<6xi32>) -> (tensor<5x6xi32>) { + // CHECK: [[INITT:%.+]] = linalg.init_tensor [5, 6] + // CHECK: [[ZERO:%.+]] = arith.constant 0 + // CHECK: [[FILL:%.+]] = linalg.fill([[ZERO]], [[INITT]]) + // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]> + // CHECK: [[INITT:%.+]] = linalg.init_tensor [3, 6] + // CHECK: [[TRANSPOSE:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg1 : tensor<6x3xi8>) outs([[INITT]] : tensor<3x6xi8>) { + // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: i8): + // CHECK: linalg.yield [[IN]] : i8 + // CHECK: [[INITB:%.+]] = linalg.init_tensor [5, 6] + // CHECK: [[ONE:%.+]] = arith.constant 1 + // CHECK: [[TWO:%.+]] = arith.constant 2 + // CHECK: [[MATMUL:%.+]] = linalg.quantized_matmul ins(%arg0, [[TRANSPOSE]], [[ONE]], [[TWO]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) outs([[FILL]] : tensor<5x6xi32>) -> tensor<5x6xi32> + // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xi32>, tensor<5x6xi32>) outs([[INITB]] + // CHECK: ^bb0([[IN1:%.+]]: i32, [[IN2:%.+]]: i32, [[UNUSED:%.+]]: i32): + // CHECK: [[ADD:%.+]] = arith.addi + // CHECK: linalg.yield [[ADD]] : i32 + %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) {quantization_info = {input_zp = 1:i32, weight_zp = 2:i32}} : (tensor<5x3xi8>, tensor<6x3xi8>, tensor<6xi32>) -> (tensor<5x6xi32>) + return %0 : tensor<5x6xi32> +} + +// ----- + +// CHECK-LABEL: @fully_connected_dyn +func @fully_connected_dyn(%arg0: tensor, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor) { + // CHECK: %[[C0:.+]] = arith.constant 0 + // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]] + // CHECK: %[[INITT:.+]] = linalg.init_tensor [%[[DIM]], 6] + // CHECK: %[[ZERO:.+]] = arith.constant 0 + // CHECK: %[[FILL:.+]] = linalg.fill(%[[ZERO]], %[[INITT]]) + // CHECK: %[[PERM:.+]] = arith.constant dense<[1, 0]> + // CHECK: %[[INITT:.+]] = linalg.init_tensor [3, 6] + // CHECK: %[[TRANSPOSE:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg1 : tensor<6x3xf32>) outs(%[[INITT]] : tensor<3x6xf32>) { + // CHECK: ^bb0(%[[IN:.+]]: f32, %[[UNUSED:.+]]: f32): + // CHECK: linalg.yield %[[IN]] : f32 + // CHECK: %[[INITB:.+]] = linalg.init_tensor [%[[DIM]], 6] + // CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%arg0, %[[TRANSPOSE]] : tensor, tensor<3x6xf32>) outs(%[[FILL]] : tensor) -> tensor + // CHECK: %[[ADDED:.+]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, %[[MATMUL]] : tensor<6xf32>, tensor) outs(%[[INITB]] : tensor) { + // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): + // CHECK: %[[ADD:.+]] = arith.addf %arg3, %arg4 : f32 + // CHECK: linalg.yield %[[ADD]] : f32 + + %0 = "tosa.fully_connected"(%arg0, %arg1, %arg2) : (tensor, tensor<6x3xf32>, tensor<6xf32>) -> (tensor) + return %0 : tensor +} + +// ----- + func @pad_float(%arg0 : tensor<1x2xf32>) -> (tensor<4x9xf32>) { %0 = arith.constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32> // TODO: Output contains multiple "arith.constant 1 : index". @@ -1247,6 +1395,318 @@ func @table16(%arg0: tensor<6xi16>, %arg1: tensor<513xi16>) -> () { // ----- +// CHECK-LABEL: @max_pool +func @max_pool(%arg0: tensor<1x6x34x62xf32>) -> () { + // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38 + // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 32, 62] + // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[CONST]], [[INIT]]) + // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3] + // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, [[KERNEL]] : tensor<1x6x34x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x32x62xf32>) + %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x4x32x62xf32>) + return +} + +// CHECK-LABEL: @max_pool_padded +func @max_pool_padded(%arg0: tensor<1x6x34x62xf32>) -> () { + // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38 : f32 + // CHECK-DAG: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 0, 0, 0] high[0, 0, 1, 0] + // CHECK-DAG: linalg.yield [[CONST]] + // CHECK-DAG: [[INITVAL:%.+]] = arith.constant -3.40282347E+38 : f32 + // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 33, 62] + // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[INITVAL]], [[INIT]]) + // CHECK-DAG: [[KERNEL:%.+]] = linalg.init_tensor [3, 3] + // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x6x35x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x33x62xf32>) + %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 1], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x4x33x62xf32>) + return +} + +// CHECK-LABEL: @max_pool_i8 +func @max_pool_i8(%arg0: tensor<1x6x34x62xi8>) -> () { + // CHECK: arith.constant -128 + // CHECK: linalg.pooling_nhwc_max + %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi8>) -> (tensor<1x4x32x62xi8>) + return +} + +// CHECK-LABEL: @max_pool_i16 +func @max_pool_i16(%arg0: tensor<1x6x34x62xi16>) -> () { + // CHECK: arith.constant -32768 + // CHECK: linalg.pooling_nhwc_max + %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi16>) -> (tensor<1x4x32x62xi16>) + return +} + +// CHECK-LABEL: @max_pool_i32 +func @max_pool_i32(%arg0: tensor<1x6x34x62xi32>) -> () { + // CHECK: arith.constant -2147483648 + // CHECK: linalg.pooling_nhwc_max + %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xi32>) -> (tensor<1x4x32x62xi32>) + return +} +// ----- + +// CHECK-LABEL: @avg_pool +func @avg_pool(%arg0: tensor<1x6x34x62xf32>) -> (tensor<1x5x33x62xf32>) { + // Initial piece computes the sum of the pooling region, with appropriate padding. + // CHECK: [[CONST:%.+]] = arith.constant 0 + // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] + // CHECK: [[CONST:%.+]] = arith.constant 0 + // CHECK: [[POOLINIT:%.+]] = linalg.init_tensor [1, 5, 33, 62] + // CHECK: [[FILL:%.+]] = linalg.fill([[CONST]], [[POOLINIT]]) + // CHECK: [[KERNEL:%.+]] = linalg.init_tensor [4, 4] + // CHECK: [[POOL:%.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x8x36x62xf32>, tensor<4x4xf32>) outs([[FILL]] : tensor<1x5x33x62xf32>) + // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 33, 62] + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins([[POOL]] : tensor<1x5x33x62xf32>) outs([[INIT]] : tensor<1x5x33x62xf32>) + // CHECK: [[ZERO:%.0]] = arith.constant 0 + // CHECK: [[ONE:%.+]] = arith.constant 1 + // CHECK: [[HEIGHT:%.+]] = arith.constant 4 + // CHECK: [[WIDTH:%.+]] = arith.constant 32 + // CHECK: [[IDX1:%.+]] = linalg.index 1 + // CHECK: [[IDX2:%.+]] = linalg.index 2 + + // The large block below computes what portion of the kernel is within non-padded input. + // CHECK: [[NY:%.+]] = arith.subi [[HEIGHT]], [[IDX1]] + // CHECK: [[NX:%.+]] = arith.subi [[WIDTH]], [[IDX2]] + // CHECK: [[KH:%.+]] = arith.constant 4 + // CHECK: [[PAD0:%.+]] = arith.constant 1 + // CHECK: [[SUBP0:%.+]] = arith.subi [[IDX1]], [[PAD0]] + // CHECK: [[P0CMP:%.+]] = arith.cmpi slt, [[SUBP0]], [[ZERO]] + // CHECK: [[SELP0:%.+]] = select [[P0CMP]], [[SUBP0]], [[ZERO]] + // CHECK: [[ADDP0:%.+]] = arith.addi [[KH]], [[SELP0]] + // CHECK: [[PAD1:%.+]] = arith.constant 1 + // CHECK: [[SUBP1:%.+]] = arith.subi [[NY]], [[PAD1]] + // CHECK: [[P1CMP:%.+]] = arith.cmpi slt, [[SUBP1]], [[ZERO]] + // CHECK: [[SELP1:%.+]] = select [[P1CMP]], [[SUBP1]], [[ZERO]] + // CHECK: [[ADDP1:%.+]] = arith.addi [[ADDP0]], [[SELP1]] + // CHECK: [[YCMP:%.+]] = arith.cmpi slt, [[ADDP1]], [[ONE]] + // CHECK: [[YSEL:%.+]] = select [[YCMP]], [[ONE]], [[ADDP1]] + // CHECK: [[KW:%.+]] = arith.constant 4 : index + // CHECK: [[PAD2:%.+]] = arith.constant 1 : index + // CHECK: [[SUBP2:%.+]] = arith.subi [[IDX2]], [[PAD2]] + // CHECK: [[P2CMP:%.+]] = arith.cmpi slt, [[SUBP2]], [[ZERO]] + // CHECK: [[SELP2:%.+]] = select [[P2CMP]], [[SUBP2]], [[ZERO]] + // CHECK: [[ADDP2:%.+]] = arith.addi [[KW]], [[SELP2]] + // CHECK: [[PAD3:%.+]] = arith.constant 1 : index + // CHECK: [[SUBP3:%.+]] = arith.subi [[NX]], [[PAD3]] + // CHECK: [[P3CMP:%.+]] = arith.cmpi slt, [[SUBP3]], [[ZERO]] + // CHECK: [[SELP3:%.+]] = select [[P3CMP]], [[SUBP3]], [[ZERO]] + // CHECK: [[ADDP3:%.+]] = arith.addi [[ADDP2]], [[SELP3]] + // CHECK: [[XCMP:%.+]] = arith.cmpi slt, [[ADDP3]], [[ONE]] + // CHECK: [[XSEL:%.+]] = select [[XCMP]], [[ONE]], [[ADDP3]] + + // Given the valid coverage of the pooling region, normalize the summation. + // CHECK: [[C:%.+]] = arith.muli [[YSEL]], [[XSEL]] + // CHECK: [[CI:%.+]] = arith.index_cast [[C]] + // CHECK: [[CF:%.+]] = arith.sitofp [[CI]] + // CHECK: [[RESULT:%.+]] = arith.divf %arg1, [[CF]] + // CHECK: linalg.yield [[RESULT]] + %0 = "tosa.avg_pool2d"(%arg0) {pad = [1, 1, 1, 1], kernel = [4, 4], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x5x33x62xf32>) + return %0 : tensor<1x5x33x62xf32> +} + +// ----- + +// CHECK-LABEL: @avg_pool_i8 +func @avg_pool_i8(%arg0 : tensor<1x128x128x2xi8>) -> () { + + // CHECK: linalg.pooling_nhwc_sum + // CHECK: linalg.generic + + // CHECK: %[[INZP:.+]] = arith.constant -128 + // CHECK: %[[INZP_OFF:.+]] = arith.muli %{{.+}}, %[[INZP]] + // CHECK: %[[OFFSETED:.+]] = arith.subi %arg1, %[[INZP_OFF]] + // CHECK: %[[NUMERATOR:.+]] = arith.constant 1073741825 + // CHECK: %[[MULTIPLIER:.+]] = arith.divui %[[NUMERATOR]], %{{.+}} + // CHECK: %[[SHIFT:.+]] = arith.constant 30 + // CHECK: %[[SCALE:.+]] = "tosa.apply_scale"(%{{.+}}, %[[MULTIPLIER]], %[[SHIFT]]) {double_round = false} + // CHECK: %[[OUTZP:.+]] = arith.constant -128 + // CHECK: %[[OUT:.+]] = arith.addi %[[SCALE]], %[[OUTZP]] + // CHECK: %[[MIN:.+]] = arith.constant -128 + // CHECK: %[[MAX:.+]] = arith.constant 127 + // CHECK: %[[CMP_MIN:.+]] = arith.cmpi slt, %[[OUT]], %[[MIN]] + // CHECK: %[[CLMP_MIN:.+]] = select %[[CMP_MIN]], %[[MIN]], %[[OUT]] + // CHECK: %[[CMP_MAX:.+]] = arith.cmpi slt, %[[MAX]], %[[OUT]] + // CHECK: %[[CLMP_MAX:.+]] = select %[[CMP_MAX]], %[[MAX]], %[[CLMP_MIN]] + // CHECK: %[[TRUNC:.+]] = arith.trunci %[[CLMP_MAX]] + // CHECK: linalg.yield %[[TRUNC]] + %0 = "tosa.avg_pool2d"(%arg0) {kernel = [4, 4], pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, output_zp = -128 : i32}, stride = [4, 4]} : (tensor<1x128x128x2xi8>) -> tensor<1x32x32x2xi8> + return +} + +// ----- + +// CHECK-LABEL: @avg_pool_i16 +func @avg_pool_i16(%arg0 : tensor<1x128x128x2xi16>) -> () { + + // CHECK: linalg.pooling_nhwc_sum + // CHECK: linalg.generic + + // CHECK: %[[INZP:.+]] = arith.constant -128 + // CHECK: %[[INZP_OFF:.+]] = arith.muli %{{.+}}, %[[INZP]] + // CHECK: %[[OFFSETED:.+]] = arith.subi %arg1, %[[INZP_OFF]] + // CHECK: %[[NUMERATOR:.+]] = arith.constant 1073741825 + // CHECK: %[[MULTIPLIER:.+]] = arith.divui %[[NUMERATOR]], %{{.+}} + // CHECK: %[[SHIFT:.+]] = arith.constant 30 + // CHECK: %[[SCALE:.+]] = "tosa.apply_scale"(%{{.+}}, %[[MULTIPLIER]], %[[SHIFT]]) {double_round = false} + // CHECK: %[[OUTZP:.+]] = arith.constant -128 + // CHECK: %[[OUT:.+]] = arith.addi %[[SCALE]], %[[OUTZP]] + // CHECK: %[[MIN:.+]] = arith.constant -32768 + // CHECK: %[[MAX:.+]] = arith.constant 32767 + // CHECK: %[[CMP_MIN:.+]] = arith.cmpi slt, %[[OUT]], %[[MIN]] + // CHECK: %[[CLMP_MIN:.+]] = select %[[CMP_MIN]], %[[MIN]], %[[OUT]] + // CHECK: %[[CMP_MAX:.+]] = arith.cmpi slt, %[[MAX]], %[[OUT]] + // CHECK: %[[CLMP_MAX:.+]] = select %[[CMP_MAX]], %[[MAX]], %[[CLMP_MIN]] + // CHECK: %[[TRUNC:.+]] = arith.trunci %[[CLMP_MAX]] + // CHECK: linalg.yield %[[TRUNC]] + %0 = "tosa.avg_pool2d"(%arg0) {kernel = [4, 4], pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, output_zp = -128 : i32}, stride = [4, 4]} : (tensor<1x128x128x2xi16>) -> tensor<1x32x32x2xi16> + return +} + +// ----- + +// CHECK: #[[$MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d3, d0, d1, d2)> +// CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> +// CHECK: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d3)> + +// CHECK-LABEL: @conv2d_f32 +func @conv2d_f32(%input: tensor<1x49x42x27xf32>, %weights: tensor<28x3x3x27xf32>, %bias: tensor<28xf32>) -> () { + // CHECK: %[[W_IN:.+]] = linalg.init_tensor [3, 3, 27, 28] + // CHECK: %[[W:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg1 : tensor<28x3x3x27xf32>) outs(%[[W_IN]] : tensor<3x3x27x28xf32>) + // CHECK: linalg.yield %arg3 : f32 + // CHECK: %[[M_IN:.+]] = linalg.init_tensor [1, 45, 40, 28] + // CHECK: %[[CST:.+]] = arith.constant 0 + // CHECK: %[[FILL:.+]] = linalg.fill + // CHECK: %[[B_IN:.+]] = linalg.init_tensor [1, 45, 40, 28] + // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<1x49x42x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<1x45x40x28xf32>) + // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x45x40x28xf32>) outs(%[[B_IN]] : tensor<1x45x40x28xf32>) + // CHECK: arith.addf + // CHECK: linalg.yield %7 : f32 + %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [0, 0, 0, 0], stride = [1, 1], dilation = [2, 1]} : (tensor<1x49x42x27xf32>, tensor<28x3x3x27xf32>, tensor<28xf32>) -> (tensor<1x45x40x28xf32>) + return +} + +// ----- + +// CHECK-LABEL: @conv2d_padded_f32 +func @conv2d_padded_f32(%input: tensor<1x47x40x28xf32>, %weights: tensor<28x3x3x28xf32>, %bias: tensor<28xf32>) -> () { + // CHECK: %[[C0:.+]] = arith.constant 0 + // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] + // CHECK: linalg.yield %[[C0]] + // CHECK: linalg.conv_2d_nhwc_hwcf + %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [1, 1, 1, 1], stride = [1, 1], dilation = [2, 1]} : (tensor<1x47x40x28xf32>, tensor<28x3x3x28xf32>, tensor<28xf32>) -> (tensor<1x45x40x28xf32>) + return +} + +// ----- + +// CHECK-LABEL: @conv2d_quant +func @conv2d_quant(%arg0 : tensor<1x12x12x1xi8>, %arg1 : tensor<1024x3x3x1xi8>, %arg2 : tensor<1024xi32>) -> () { + // CHECK: %[[C22:.+]] = arith.constant -22 + // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] + // CHECK: linalg.yield %[[C22]] + // CHECK: linalg.conv_2d_nhwc_hwcf_q + %0 = "tosa.conv2d"(%arg0, %arg1, %arg2) {dilation = [1, 1], pad = [1, 1, 1, 1], quantization_info = {input_zp = -22 : i32, weight_zp = 42 : i32}, stride = [1, 1]} : (tensor<1x12x12x1xi8>, tensor<1024x3x3x1xi8>, tensor<1024xi32>) -> tensor<1x12x12x1024xi32> + return +} + +// ----- + +// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> +// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> + +// CHECK-LABEL: @depthwise_conv +func @depthwise_conv(%arg0 : tensor<1x7x5x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () { + // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 5, 3, 11] + // CHECK: [[CST0:%.+]] = arith.constant 0 + // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) + // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33] + // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>) + // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] + // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) { + // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors + // CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32 + // CHECK: linalg.yield [[ADD]] : f32 + // CHECK: } -> tensor<1x5x5x33xf32> + %2 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) { pad = [0, 0, 0, 0], stride = [1, 1], dilation = [1, 1] } : (tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>, tensor<33xf32>) -> (tensor<1x5x5x33xf32>) + return +} + +// ----- + +// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> +// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> + +// CHECK-LABEL: @depthwise_conv_strides +func @depthwise_conv_strides(%arg0 : tensor<1x11x9x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () { + // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 5, 5, 3, 11] + // CHECK: [[CST0:%.+]] = arith.constant 0 + // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) + // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 5, 5, 33] + // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>) + // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] + // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) { + // CHECK: ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): // no predecessors + // CHECK: [[ADD:%.+]] = arith.addf %arg3, %arg4 : f32 + // CHECK: linalg.yield [[ADD]] : f32 + // CHECK: } -> tensor<1x5x5x33xf32> + %2 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) { pad = [0, 0, 0, 0], stride = [2, 2], dilation = [1, 1] } : (tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>, tensor<33xf32>) -> (tensor<1x5x5x33xf32>) + return +} + +// ----- + +// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> +// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> + +// CHECK-LABEL: @depthwise_conv_quant +func @depthwise_conv_quant(%arg0 : tensor<1x12x12x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () { + // CHECK: [[PADV:%.+]] = arith.constant -128 + // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] + // CHECK: linalg.yield [[PADV]] + + // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 12, 12, 4, 128] + // CHECK: [[CST0:%.+]] = arith.constant 0 + // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) + // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 12, 12, 512] + // CHECK: [[C128:%.+]] = arith.constant -128 + // CHECK: [[C42:%.+]] = arith.constant 42 + // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x12x12x4x128xi32>) + // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] + // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x12x12x512xi32>) outs([[OUT]] : tensor<1x12x12x512xi32>) { + // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32): // no predecessors + // CHECK: [[ADD:%.+]] = arith.addi %arg3, %arg4 : i32 + // CHECK: linalg.yield [[ADD]] : i32 + // CHECK: } -> tensor<1x12x12x512xi32> + %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = [1, 1, 1, 1], quantization_info = {input_zp = -128 : i32, weight_zp = 42 : i32}, stride = [1, 1], dilation = [1, 1] } : (tensor<1x12x12x4xi8>, tensor<3x3x4x128xi8>, tensor<512xi32>) -> tensor<1x12x12x512xi32> + return +} + +// ----- + +// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)> +// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> + +// CHECK-LABEL: @depthwise_conv_quant_dilations +func @depthwise_conv_quant_dilations(%arg0 : tensor<1x14x14x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () { + // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 10, 10, 4, 128] + // CHECK: [[CST0:%.+]] = arith.constant 0 + // CHECK: [[FILL:%.+]] = linalg.fill([[CST0]], [[INIT]]) + // CHECK: [[OUT:%.+]] = linalg.init_tensor [1, 10, 10, 512] + // CHECK: [[C128:%.+]] = arith.constant -128 + // CHECK: [[C42:%.+]] = arith.constant 42 + // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x10x10x4x128xi32>) + // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] + // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x10x10x512xi32>) outs([[OUT]] : tensor<1x10x10x512xi32>) { + // CHECK: ^bb0(%arg3: i32, %arg4: i32, %arg5: i32): // no predecessors + // CHECK: [[ADD:%.+]] = arith.addi %arg3, %arg4 : i32 + // CHECK: linalg.yield [[ADD]] : i32 + // CHECK: } -> tensor<1x10x10x512xi32> + %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = [0, 0, 0, 0], quantization_info = {input_zp = -128 : i32, weight_zp = 42 : i32}, stride = [1, 1], dilation = [2, 2] } : (tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, tensor<512xi32>) -> tensor<1x10x10x512xi32> + return +} + +// ----- + // CHECK-LABEL: @resize_nearest func @resize_nearest(%input: tensor<1x2x2x1xf32>) -> () { // CHECK: %[[INIT:.+]] = linalg.init_tensor [1, 4, 4, 1] From 71b3bfde9cd296525bbf5b1619e199074156d12b Mon Sep 17 00:00:00 2001 From: Kirill Stoimenov Date: Thu, 23 Dec 2021 20:47:10 +0000 Subject: [PATCH 191/293] [ASan] Moved optimized callbacks into a separate library. This will allow linking in the callbacks directly instead of using PLT. Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D116182 --- clang/lib/Driver/ToolChains/CommonArgs.cpp | 5 ++++ clang/test/Driver/sanitizer-ld.c | 2 +- compiler-rt/lib/asan/CMakeLists.txt | 33 +++++++++++++++++++++- compiler-rt/lib/asan/asan_rtl_static.cpp | 15 ++++++++++ compiler-rt/lib/asan/tests/CMakeLists.txt | 2 ++ 5 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 compiler-rt/lib/asan/asan_rtl_static.cpp diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 407f81a2ae09..267625c70b25 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -826,6 +826,11 @@ collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args, if (SanArgs.needsStatsRt() && SanArgs.linkRuntimes()) StaticRuntimes.push_back("stats_client"); + // Always link the static runtime regardless of DSO or executable. + if (SanArgs.needsAsanRt()) { + HelperStaticRuntimes.push_back("asan_static"); + } + // Collect static runtimes. if (Args.hasArg(options::OPT_shared)) { // Don't link static runtimes into DSOs. diff --git a/clang/test/Driver/sanitizer-ld.c b/clang/test/Driver/sanitizer-ld.c index d62e19fd4021..ea8c49f2384a 100644 --- a/clang/test/Driver/sanitizer-ld.c +++ b/clang/test/Driver/sanitizer-ld.c @@ -22,7 +22,7 @@ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-ASAN-NO-LINK-RUNTIME-LINUX %s // -// CHECK-ASAN-NO-LINK-RUNTIME-LINUX-NOT: libclang_rt.asan +// CHECK-ASAN-NO-LINK-RUNTIME-LINUX-NOT: libclang_rt.asan-x86_64 // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ // RUN: -target i386-unknown-linux -fuse-ld=ld -fsanitize=address -shared-libsan \ diff --git a/compiler-rt/lib/asan/CMakeLists.txt b/compiler-rt/lib/asan/CMakeLists.txt index 2e63c8d05168..b79b7278f6db 100644 --- a/compiler-rt/lib/asan/CMakeLists.txt +++ b/compiler-rt/lib/asan/CMakeLists.txt @@ -34,7 +34,6 @@ set(ASAN_SOURCES if (NOT WIN32 AND NOT APPLE) list(APPEND ASAN_SOURCES - asan_rtl_x86_64.S asan_interceptors_vfork.S ) endif() @@ -43,6 +42,16 @@ set(ASAN_CXX_SOURCES asan_new_delete.cpp ) +set(ASAN_STATIC_SOURCES + asan_rtl_static.cpp + ) + +if (NOT WIN32 AND NOT APPLE) + list(APPEND ASAN_STATIC_SOURCES + asan_rtl_x86_64.S + ) +endif() + set(ASAN_PREINIT_SOURCES asan_preinit.cpp ) @@ -135,6 +144,12 @@ if(NOT APPLE) ADDITIONAL_HEADERS ${ASAN_HEADERS} CFLAGS ${ASAN_CFLAGS} DEFS ${ASAN_COMMON_DEFINITIONS}) + add_compiler_rt_object_libraries(RTAsan_static + ARCHS ${ASAN_SUPPORTED_ARCH} + SOURCES ${ASAN_STATIC_SOURCES} + ADDITIONAL_HEADERS ${ASAN_HEADERS} + CFLAGS ${ASAN_CFLAGS} + DEFS ${ASAN_COMMON_DEFINITIONS}) add_compiler_rt_object_libraries(RTAsan_preinit ARCHS ${ASAN_SUPPORTED_ARCH} SOURCES ${ASAN_PREINIT_SOURCES} @@ -176,6 +191,14 @@ if(APPLE) LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS} DEFS ${ASAN_DYNAMIC_DEFINITIONS} PARENT_TARGET asan) + + add_compiler_rt_runtime(clang_rt.asan_static + STATIC + ARCHS ${ASAN_SUPPORTED_ARCH} + OBJECT_LIBS RTAsan_static + CFLAGS ${ASAN_CFLAGS} + DEFS ${ASAN_COMMON_DEFINITIONS} + PARENT_TARGET asan) else() # Build separate libraries for each target. @@ -207,6 +230,14 @@ else() DEFS ${ASAN_COMMON_DEFINITIONS} PARENT_TARGET asan) + add_compiler_rt_runtime(clang_rt.asan_static + STATIC + ARCHS ${ASAN_SUPPORTED_ARCH} + OBJECT_LIBS RTAsan_static + CFLAGS ${ASAN_CFLAGS} + DEFS ${ASAN_COMMON_DEFINITIONS} + PARENT_TARGET asan) + add_compiler_rt_runtime(clang_rt.asan-preinit STATIC ARCHS ${ASAN_SUPPORTED_ARCH} diff --git a/compiler-rt/lib/asan/asan_rtl_static.cpp b/compiler-rt/lib/asan/asan_rtl_static.cpp new file mode 100644 index 000000000000..74e6eb0ddf1c --- /dev/null +++ b/compiler-rt/lib/asan/asan_rtl_static.cpp @@ -0,0 +1,15 @@ +//===-- asan_static_rtl.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of AddressSanitizer, an address sanity checker. +// +// Main file of the ASan run-time library. +//===----------------------------------------------------------------------===// + +// This file is empty for now. Main reason to have it is workaround for Windows +// build, which complains because no files are part of the asan_static lib. diff --git a/compiler-rt/lib/asan/tests/CMakeLists.txt b/compiler-rt/lib/asan/tests/CMakeLists.txt index cc375acf2dfb..95a324766ae7 100644 --- a/compiler-rt/lib/asan/tests/CMakeLists.txt +++ b/compiler-rt/lib/asan/tests/CMakeLists.txt @@ -261,6 +261,7 @@ if(COMPILER_RT_CAN_EXECUTE_TESTS AND NOT ANDROID) set(ASAN_TEST_RUNTIME_OBJECTS $ $ + $ $ $ $ @@ -286,6 +287,7 @@ if(ANDROID) # Test w/o ASan instrumentation. Link it with ASan statically. add_executable(AsanNoinstTest # FIXME: .arch? $ + $ $ $ $ From 417cd2e5c5e0ef0a3c8161298b7690f7911ce7ed Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 23 Dec 2021 16:49:38 -0800 Subject: [PATCH 192/293] [ELF] SymbolTable: change some vector to SmallVector The generated assembly for Symbol::insert is much shorter (std::vector resize is inefficient) and enables some inlining. --- lld/ELF/SymbolTable.cpp | 14 +++++++------- lld/ELF/SymbolTable.h | 17 +++++++++-------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index 5c7b8b00a6ee..a501b270010b 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -133,7 +133,7 @@ static bool canBeVersioned(const Symbol &sym) { // other than trying to match a pattern against all demangled symbols. // So, if "extern C++" feature is used, we need to demangle all known // symbols. -StringMap> &SymbolTable::getDemangledSyms() { +StringMap> &SymbolTable::getDemangledSyms() { if (!demangledSyms) { demangledSyms.emplace(); std::string demangled; @@ -154,7 +154,7 @@ StringMap> &SymbolTable::getDemangledSyms() { return *demangledSyms; } -std::vector SymbolTable::findByVersion(SymbolVersion ver) { +SmallVector SymbolTable::findByVersion(SymbolVersion ver) { if (ver.isExternCpp) return getDemangledSyms().lookup(ver.name); if (Symbol *sym = find(ver.name)) @@ -163,9 +163,9 @@ std::vector SymbolTable::findByVersion(SymbolVersion ver) { return {}; } -std::vector SymbolTable::findAllByVersion(SymbolVersion ver, - bool includeNonDefault) { - std::vector res; +SmallVector SymbolTable::findAllByVersion(SymbolVersion ver, + bool includeNonDefault) { + SmallVector res; SingleStringMatcher m(ver.name); auto check = [&](StringRef name) { size_t pos = name.find('@'); @@ -191,8 +191,8 @@ std::vector SymbolTable::findAllByVersion(SymbolVersion ver, } void SymbolTable::handleDynamicList() { + SmallVector syms; for (SymbolVersion &ver : config->dynamicList) { - std::vector syms; if (ver.hasWildcard) syms = findAllByVersion(ver, /*includeNonDefault=*/true); else @@ -209,7 +209,7 @@ bool SymbolTable::assignExactVersion(SymbolVersion ver, uint16_t versionId, StringRef versionName, bool includeNonDefault) { // Get a list of symbols which we need to assign the version to. - std::vector syms = findByVersion(ver); + SmallVector syms = findByVersion(ver); auto getName = [](uint16_t ver) -> std::string { if (ver == VER_NDX_LOCAL) diff --git a/lld/ELF/SymbolTable.h b/lld/ELF/SymbolTable.h index 568e4559ec34..84d93a3dc786 100644 --- a/lld/ELF/SymbolTable.h +++ b/lld/ELF/SymbolTable.h @@ -35,8 +35,9 @@ class SymbolTable { struct FilterOutPlaceholder { bool operator()(Symbol *S) const { return !S->isPlaceholder(); } }; - using iterator = llvm::filter_iterator::const_iterator, - FilterOutPlaceholder>; + using iterator = + llvm::filter_iterator::const_iterator, + FilterOutPlaceholder>; public: llvm::iterator_range symbols() const { @@ -64,11 +65,11 @@ class SymbolTable { llvm::DenseMap comdatGroups; private: - std::vector findByVersion(SymbolVersion ver); - std::vector findAllByVersion(SymbolVersion ver, - bool includeNonDefault); + SmallVector findByVersion(SymbolVersion ver); + SmallVector findAllByVersion(SymbolVersion ver, + bool includeNonDefault); - llvm::StringMap> &getDemangledSyms(); + llvm::StringMap> &getDemangledSyms(); bool assignExactVersion(SymbolVersion ver, uint16_t versionId, StringRef versionName, bool includeNonDefault); void assignWildcardVersion(SymbolVersion ver, uint16_t versionId, @@ -82,13 +83,13 @@ class SymbolTable { // FIXME: Experiment with passing in a custom hashing or sorting the symbols // once symbol resolution is finished. llvm::DenseMap symMap; - std::vector symVector; + SmallVector symVector; // A map from demangled symbol names to their symbol objects. // This mapping is 1:N because two symbols with different versions // can have the same name. We use this map to handle "extern C++ {}" // directive in version scripts. - llvm::Optional>> demangledSyms; + llvm::Optional>> demangledSyms; }; extern std::unique_ptr symtab; From a6114ff0cd6935026f34f1dc7cedfa2057955a8c Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 23 Dec 2021 19:49:19 -0500 Subject: [PATCH 193/293] [gn build] "port" 71b3bfde9cd2 --- llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn index de8f33da0bc5..f05168bd57c2 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn @@ -99,10 +99,7 @@ target(asan_target_type, "asan") { "asan_win.cpp", ] if (target_os != "mac" && target_os != "win") { - sources += [ - "asan_interceptors_vfork.S", - "asan_rtl_x86_64.S", - ] + sources += [ "asan_interceptors_vfork.S" ] } # To be able to include sanitizer_common. @@ -200,5 +197,9 @@ source_set("unused") { "asan_preinit.cpp", "asan_win_dll_thunk.cpp", "asan_win_dynamic_runtime_thunk.cpp", + + # FIXME: These are in clang_rt.asan_static in the CMake build. + "asan_rtl_static.cpp", + "asan_rtl_x86_64.S", ] } From 8ff42766d1effcee2e4ebb0016b02430263cb4f0 Mon Sep 17 00:00:00 2001 From: Stella Laurenzo Date: Thu, 23 Dec 2021 16:13:54 -0800 Subject: [PATCH 194/293] [mlir] Use thread-pool's notion of thread count instead of requerying system. The computed number of hardware threads can change over the life of the process based on affinity changes. Since we need a data structure that is at least as large as the maximum parallelism, it is important to use the value that was actually latched for the thread pool we will be dispatching work to. Also adds an assert specifically for if it doesn't line up (I was getting a crash on an index into the vector). Differential Revision: https://reviews.llvm.org/D116257 --- mlir/lib/Transforms/Inliner.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Transforms/Inliner.cpp b/mlir/lib/Transforms/Inliner.cpp index 32b12e7a3bf4..cbeb94980dcc 100644 --- a/mlir/lib/Transforms/Inliner.cpp +++ b/mlir/lib/Transforms/Inliner.cpp @@ -663,7 +663,7 @@ LogicalResult InlinerPass::optimizeSCC(CallGraph &cg, CGUseList &useList, // Optimize each of the nodes within the SCC in parallel. if (failed(optimizeSCCAsync(nodesToVisit, context))) - return failure(); + return failure(); // Recompute the uses held by each of the nodes. for (CallGraphNode *node : nodesToVisit) @@ -674,11 +674,13 @@ LogicalResult InlinerPass::optimizeSCC(CallGraph &cg, CGUseList &useList, LogicalResult InlinerPass::optimizeSCCAsync(MutableArrayRef nodesToVisit, MLIRContext *ctx) { - // Ensure that there are enough pipeline maps for the optimizer to run in - // parallel. Note: The number of pass managers here needs to remain constant + // We must maintain a fixed pool of pass managers which is at least as large + // as the maximum parallelism of the failableParallelForEach below. + // Note: The number of pass managers here needs to remain constant // to prevent issues with pass instrumentations that rely on having the same // pass manager for the main thread. - size_t numThreads = llvm::hardware_concurrency().compute_thread_count(); + llvm::ThreadPool &threadPool = ctx->getThreadPool(); + size_t numThreads = threadPool.getThreadCount(); if (opPipelines.size() < numThreads) { // Reserve before resizing so that we can use a reference to the first // element. @@ -700,6 +702,8 @@ InlinerPass::optimizeSCCAsync(MutableArrayRef nodesToVisit, bool expectedInactive = false; return isActive.compare_exchange_strong(expectedInactive, true); }); + assert(it != activePMs.end() && + "could not find inactive pass manager for thread"); unsigned pmIndex = it - activePMs.begin(); // Optimize this callable node. From a2baf634a1d70c7909bd81189c167c3c194dee96 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 23 Dec 2021 17:59:25 -0800 Subject: [PATCH 195/293] [ELF] Simplify SymbolTable::insert. NFC --- lld/ELF/SymbolTable.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index a501b270010b..88e62507e893 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -70,11 +70,8 @@ Symbol *SymbolTable::insert(StringRef name) { stem = name.take_front(pos); auto p = symMap.insert({CachedHashStringRef(stem), (int)symVector.size()}); - int &symIndex = p.first->second; - bool isNew = p.second; - - if (!isNew) { - Symbol *sym = symVector[symIndex]; + if (!p.second) { + Symbol *sym = symVector[p.first->second]; if (stem.size() != name.size()) sym->setName(name); return sym; From 41a64338cc842cc3ef45d68a4ef8f09c143d9969 Mon Sep 17 00:00:00 2001 From: Mogball Date: Fri, 24 Dec 2021 01:41:21 +0000 Subject: [PATCH 196/293] [mlir] Add getNumThreads to MLIRContext Querying threads directly from the thread pool fails if there is no thread pool or if multithreading is not enabled. Returns 1 by default. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D116259 --- mlir/include/mlir/IR/MLIRContext.h | 7 +++++++ mlir/lib/IR/MLIRContext.cpp | 10 ++++++++++ mlir/lib/Transforms/Inliner.cpp | 3 +-- mlir/test/Transforms/inlining.mlir | 1 + 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/IR/MLIRContext.h b/mlir/include/mlir/IR/MLIRContext.h index e2014954e9fb..659cd7126559 100644 --- a/mlir/include/mlir/IR/MLIRContext.h +++ b/mlir/include/mlir/IR/MLIRContext.h @@ -147,6 +147,13 @@ class MLIRContext { /// this call in this case. void setThreadPool(llvm::ThreadPool &pool); + /// Return the number of threads used by the thread pool in this context. The + /// number of computed hardware threads can change over the lifetime of a + /// process based on affinity changes, so users should use the number of + /// threads actually in the thread pool for dispatching work. Returns 1 if + /// multithreading is disabled. + unsigned getNumThreads(); + /// Return the thread pool used by this context. This method requires that /// multithreading be enabled within the context, and should generally not be /// used directly. Users should instead prefer the threading utilities within diff --git a/mlir/lib/IR/MLIRContext.cpp b/mlir/lib/IR/MLIRContext.cpp index 96fe76eaac28..7e811316c4e6 100644 --- a/mlir/lib/IR/MLIRContext.cpp +++ b/mlir/lib/IR/MLIRContext.cpp @@ -518,6 +518,16 @@ void MLIRContext::setThreadPool(llvm::ThreadPool &pool) { enableMultithreading(); } +unsigned MLIRContext::getNumThreads() { + if (isMultithreadingEnabled()) { + assert(impl->threadPool && + "multi-threading is enabled but threadpool not set"); + return impl->threadPool->getThreadCount(); + } + // No multithreading or active thread pool. Return 1 thread. + return 1; +} + llvm::ThreadPool &MLIRContext::getThreadPool() { assert(isMultithreadingEnabled() && "expected multi-threading to be enabled within the context"); diff --git a/mlir/lib/Transforms/Inliner.cpp b/mlir/lib/Transforms/Inliner.cpp index cbeb94980dcc..f080b4d112ba 100644 --- a/mlir/lib/Transforms/Inliner.cpp +++ b/mlir/lib/Transforms/Inliner.cpp @@ -679,8 +679,7 @@ InlinerPass::optimizeSCCAsync(MutableArrayRef nodesToVisit, // Note: The number of pass managers here needs to remain constant // to prevent issues with pass instrumentations that rely on having the same // pass manager for the main thread. - llvm::ThreadPool &threadPool = ctx->getThreadPool(); - size_t numThreads = threadPool.getThreadCount(); + size_t numThreads = ctx->getNumThreads(); if (opPipelines.size() < numThreads) { // Reserve before resizing so that we can use a reference to the first // element. diff --git a/mlir/test/Transforms/inlining.mlir b/mlir/test/Transforms/inlining.mlir index 98692853bf24..00a0445f5a88 100644 --- a/mlir/test/Transforms/inlining.mlir +++ b/mlir/test/Transforms/inlining.mlir @@ -1,4 +1,5 @@ // RUN: mlir-opt %s -inline='default-pipeline=''' | FileCheck %s +// RUN: mlir-opt %s --mlir-disable-threading -inline='default-pipeline=''' | FileCheck %s // RUN: mlir-opt %s -inline='default-pipeline=''' -mlir-print-debuginfo -mlir-print-local-scope | FileCheck %s --check-prefix INLINE-LOC // RUN: mlir-opt %s -inline | FileCheck %s --check-prefix INLINE_SIMPLIFY From 1abbbc7b2435913be2ee17ecc85421fd8ad8a73f Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 23 Dec 2021 18:18:25 -0800 Subject: [PATCH 197/293] [ELF] scanVersionScript: remove unused variable --- lld/ELF/SymbolTable.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/lld/ELF/SymbolTable.cpp b/lld/ELF/SymbolTable.cpp index 88e62507e893..a12c5f22c4fe 100644 --- a/lld/ELF/SymbolTable.cpp +++ b/lld/ELF/SymbolTable.cpp @@ -261,7 +261,6 @@ void SymbolTable::scanVersionScript() { SmallString<128> buf; // First, we assign versions to exact matching symbols, // i.e. version definitions not containing any glob meta-characters. - std::vector syms; for (VersionDefinition &v : config->versionDefinitions) { auto assignExact = [&](SymbolVersion pat, uint16_t id, StringRef ver) { bool found = From 0a35211b34886423199734698116b09debc74a71 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 23 Dec 2021 15:20:06 -0600 Subject: [PATCH 198/293] [RISCV] Don't allow vector types to be used with inline asm 'r' constraint The 'r' constraint uses the GPR class. There is generic support for bitcasting and extending/truncating non-integer VTs to the required integer VT. This doesn't work for scalable vectors and instead crashes. To prevent this, explicitly reject vectors. Fixed vectors might work without crashing, but it doesn't seem worthwhile to allow. While there remove an unnecessary level of indentation in the "vr" and "vm" constraint handling. Differential Revision: https://reviews.llvm.org/D115810 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 21 ++++++++++--------- llvm/test/CodeGen/RISCV/inline-asm-invalid.ll | 12 +++++++++++ 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index b455fee62f54..972928f2e5b4 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -9657,6 +9657,9 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': + // TODO: Support fixed vectors up to XLen for P extension? + if (VT.isVector()) + break; return std::make_pair(0U, &RISCV::GPRRegClass); case 'f': if (Subtarget.hasStdExtZfh() && VT == MVT::f16) @@ -9669,17 +9672,15 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, default: break; } - } else { - if (Constraint == "vr") { - for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass, - &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { - if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) - return std::make_pair(0U, RC); - } - } else if (Constraint == "vm") { - if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy)) - return std::make_pair(0U, &RISCV::VMV0RegClass); + } else if (Constraint == "vr") { + for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass, + &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { + if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) + return std::make_pair(0U, RC); } + } else if (Constraint == "vm") { + if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy)) + return std::make_pair(0U, &RISCV::VMV0RegClass); } // Clang will correctly decode the usage of register name aliases into their diff --git a/llvm/test/CodeGen/RISCV/inline-asm-invalid.ll b/llvm/test/CodeGen/RISCV/inline-asm-invalid.ll index 20ac5ef11111..14b7cb896674 100644 --- a/llvm/test/CodeGen/RISCV/inline-asm-invalid.ll +++ b/llvm/test/CodeGen/RISCV/inline-asm-invalid.ll @@ -30,3 +30,15 @@ define void @constraint_f() nounwind { tail call void asm "fadd.d fa0, fa0, $0", "f"(double 0.0) ret void } + +define void @constraint_r_fixed_vec() nounwind { +; CHECK: error: couldn't allocate input reg for constraint 'r' + tail call void asm "add a0, a0, $0", "r"(<4 x i32> zeroinitializer) + ret void +} + +define void @constraint_r_scalable_vec() nounwind { +; CHECK: error: couldn't allocate input reg for constraint 'r' + tail call void asm "add a0, a0, $0", "r"( zeroinitializer) + ret void +} From a79c567f6a9ac91ba8d79b3ab16f4ba123b97754 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 23 Dec 2021 23:21:21 -0500 Subject: [PATCH 199/293] [NFC] Remove tailing whitespaces in llvm/include/llvm/Frontend/OpenMP/OMP.td --- llvm/include/llvm/Frontend/OpenMP/OMP.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 5ee379b7fcad..eab8d03f7316 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -282,7 +282,7 @@ def OMPC_Allocate : Clause<"allocate"> { def OMPC_NonTemporal : Clause<"nontemporal"> { let clangClass = "OMPNontemporalClause"; let flangClass = "Name"; - let isValueList = true; + let isValueList = true; } def OMP_ORDER_concurrent : ClauseVal<"concurrent",1,1> {} From 1d285f2de0b76841d924e20de3c430ee6fa431c3 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 23 Dec 2021 20:23:13 -0800 Subject: [PATCH 200/293] [ELF] Simplify and optimize ObjFile::parseLazy --- lld/ELF/InputFiles.cpp | 48 +++++++++++++----------------------------- 1 file changed, 15 insertions(+), 33 deletions(-) diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 2e5bab7b9fa2..9dcf572a920b 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1782,45 +1782,27 @@ InputFile *elf::createLazyFile(MemoryBufferRef mb, StringRef archiveName, } template void ObjFile::parseLazy() { - using Elf_Sym = typename ELFT::Sym; - - // Find a symbol table. - ELFFile obj = check(ELFFile::create(mb.getBuffer())); - ArrayRef sections = CHECK(obj.sections(), this); + const ArrayRef eSyms = this->getELFSyms(); + auto &symbols = this->symbols; + SymbolTable *const symtab = elf::symtab.get(); + const StringRef strtab = this->stringTable; - for (const typename ELFT::Shdr &sec : sections) { - if (sec.sh_type != SHT_SYMTAB) - continue; - - // A symbol table is found. - ArrayRef eSyms = CHECK(obj.symbols(&sec), this); - uint32_t firstGlobal = sec.sh_info; - StringRef strtab = CHECK(obj.getStringTableForSymtab(sec, sections), this); - this->symbols.resize(eSyms.size()); - - // Get existing symbols or insert placeholder symbols. - for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) - if (eSyms[i].st_shndx != SHN_UNDEF) - this->symbols[i] = - symtab->insert(CHECK(eSyms[i].getName(strtab), this)); + symbols.resize(eSyms.size()); + for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) + if (eSyms[i].st_shndx != SHN_UNDEF) + symbols[i] = symtab->insert(CHECK(eSyms[i].getName(strtab), this)); - // Replace existing symbols with LazyObject symbols. - // - // resolve() may trigger this->extract() if an existing symbol is an - // undefined symbol. If that happens, this LazyObjFile has served - // its purpose, and we can exit from the loop early. - for (Symbol *sym : this->symbols) { - if (!sym) - continue; + // Replace existing symbols with LazyObject symbols. + // + // resolve() may trigger this->extract() if an existing symbol is an undefined + // symbol. If that happens, this function has served its purpose, and we can + // exit from the loop early. + for (Symbol *sym : this->symbols) + if (sym) { sym->resolve(LazyObject{*this, sym->getName()}); - - // If extracted, stop iterating because the symbol resolution has been - // done by ObjFile::parse. if (!lazy) return; } - return; - } } bool InputFile::shouldExtractForCommon(StringRef name) { From f3d4e168dbc7e736ef535c673ee635a40feb2037 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Fri, 24 Dec 2021 11:48:15 +0800 Subject: [PATCH 201/293] [C++20] Conform coroutine's comments in clang (NFC-ish) The comments for coroutine in clang wrote for coroutine-TS. Now coroutine is merged into standard. Try to conform the comments. --- clang/lib/CodeGen/CGCoroutine.cpp | 4 + clang/lib/Sema/SemaCoroutine.cpp | 144 ++++++++++++++++++++---------- 2 files changed, 101 insertions(+), 47 deletions(-) diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp index ca071d3d2e80..2041d2a5b4c9 100644 --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -597,6 +597,10 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { CGM.getIntrinsic(llvm::Intrinsic::coro_begin), {CoroId, Phi}); CurCoro.Data->CoroBegin = CoroBegin; + // We need to emit `get_­return_­object` first. According to: + // [dcl.fct.def.coroutine]p7 + // The call to get_­return_­object is sequenced before the call to + // initial_­suspend and is invoked at most once. GetReturnObjectManager GroManager(*this, S); GroManager.EmitGroAlloca(); diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index 27ba49fb001c..336e0c5f1d46 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -237,9 +237,9 @@ static bool isValidCoroutineContext(Sema &S, SourceLocation Loc, // placeholder type shall not be a coroutine." if (FD->getReturnType()->isUndeducedType()) DiagInvalid(DiagAutoRet); - // [dcl.fct.def.coroutine]p1: "The parameter-declaration-clause of the - // coroutine shall not terminate with an ellipsis that is not part of a - // parameter-declaration." + // [dcl.fct.def.coroutine]p1 + // The parameter-declaration-clause of the coroutine shall not terminate with + // an ellipsis that is not part of a parameter-declaration. if (FD->isVariadic()) DiagInvalid(DiagVarargs); @@ -579,8 +579,12 @@ VarDecl *Sema::buildCoroutinePromise(SourceLocation Loc) { /*TopLevelOfInitList=*/false, /*TreatUnavailableAsInvalid=*/false); - // Attempt to initialize the promise type with the arguments. - // If that fails, fall back to the promise type's default constructor. + // [dcl.fct.def.coroutine]5.7 + // promise-constructor-arguments is determined as follows: overload + // resolution is performed on a promise constructor call created by + // assembling an argument list q_1 ... q_n . If a viable constructor is + // found ([over.match.viable]), then promise-constructor-arguments is ( q_1 + // , ..., q_n ), otherwise promise-constructor-arguments is empty. if (InitSeq) { ExprResult Result = InitSeq.Perform(*this, Entity, Kind, CtorArgExprs); if (Result.isInvalid()) { @@ -648,6 +652,10 @@ static void checkNoThrow(Sema &S, const Stmt *E, return; } if (ThrowingDecls.empty()) { + // [dcl.fct.def.coroutine]p15 + // The expression co_­await promise.final_­suspend() shall not be + // potentially-throwing ([except.spec]). + // // First time seeing an error, emit the error message. S.Diag(cast(S.CurContext)->getLocation(), diag::err_coroutine_promise_final_suspend_requires_nothrow); @@ -995,9 +1003,8 @@ static Expr *buildStdNoThrowDeclRef(Sema &S, SourceLocation Loc) { LookupResult Result(S, &S.PP.getIdentifierTable().get("nothrow"), Loc, Sema::LookupOrdinaryName); if (!S.LookupQualifiedName(Result, Std)) { - // FIXME: should have been included already. - // If we require it to include then this diagnostic is no longer - // needed. + // is not requred to include , so we couldn't omit + // the check here. S.Diag(Loc, diag::err_implicit_coroutine_std_nothrow_type_not_found); return nullptr; } @@ -1029,9 +1036,21 @@ static FunctionDecl *findDeleteForPromise(Sema &S, SourceLocation Loc, auto *PointeeRD = PromiseType->getAsCXXRecordDecl(); assert(PointeeRD && "PromiseType must be a CxxRecordDecl type"); + // [dcl.fct.def.coroutine]p12 + // The deallocation function's name is looked up by searching for it in the + // scope of the promise type. If nothing is found, a search is performed in + // the global scope. if (S.FindDeallocationFunction(Loc, PointeeRD, DeleteName, OperatorDelete)) return nullptr; + // FIXME: We didn't implement following selection: + // [dcl.fct.def.coroutine]p12 + // If both a usual deallocation function with only a pointer parameter and a + // usual deallocation function with both a pointer parameter and a size + // parameter are found, then the selected deallocation function shall be the + // one with two parameters. Otherwise, the selected deallocation function + // shall be the function with one parameter. + if (!OperatorDelete) { // Look for a global declaration. const bool CanProvideSize = S.isCompleteType(Loc, PromiseType); @@ -1062,8 +1081,8 @@ void Sema::CheckCompletedCoroutineBody(FunctionDecl *FD, Stmt *&Body) { return; } - // Coroutines [stmt.return]p1: - // A return statement shall not appear in a coroutine. + // [stmt.return.coroutine]p1: + // A coroutine shall not enclose a return statement ([stmt.return]). if (Fn->FirstReturnLoc.isValid()) { assert(Fn->FirstCoroutineStmtLoc.isValid() && "first coroutine location not set"); @@ -1164,12 +1183,15 @@ bool CoroutineStmtBuilder::makeReturnOnAllocFailure() { assert(!IsPromiseDependentType && "cannot make statement while the promise type is dependent"); - // [dcl.fct.def.coroutine]/8 - // The unqualified-id get_return_object_on_allocation_failure is looked up in - // the scope of class P by class member access lookup (3.4.5). ... - // If an allocation function returns nullptr, ... the coroutine return value - // is obtained by a call to ... get_return_object_on_allocation_failure(). - + // [dcl.fct.def.coroutine]p10 + // If a search for the name get_­return_­object_­on_­allocation_­failure in + // the scope of the promise type ([class.member.lookup]) finds any + // declarations, then the result of a call to an allocation function used to + // obtain storage for the coroutine state is assumed to return nullptr if it + // fails to obtain storage, ... If the allocation function returns nullptr, + // ... and the return value is obtained by a call to + // T::get_­return_­object_­on_­allocation_­failure(), where T is the + // promise type. DeclarationName DN = S.PP.getIdentifierInfo("get_return_object_on_allocation_failure"); LookupResult Found(S, DN, Loc, Sema::LookupMemberName); @@ -1215,12 +1237,11 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { const bool RequiresNoThrowAlloc = ReturnStmtOnAllocFailure != nullptr; - // [dcl.fct.def.coroutine]/7 - // Lookup allocation functions using a parameter list composed of the - // requested size of the coroutine state being allocated, followed by - // the coroutine function's arguments. If a matching allocation function - // exists, use it. Otherwise, use an allocation function that just takes - // the requested size. + // According to [dcl.fct.def.coroutine]p9, Lookup allocation functions using a + // parameter list composed of the requested size of the coroutine state being + // allocated, followed by the coroutine function's arguments. If a matching + // allocation function exists, use it. Otherwise, use an allocation function + // that just takes the requested size. FunctionDecl *OperatorNew = nullptr; FunctionDecl *OperatorDelete = nullptr; @@ -1228,21 +1249,32 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { bool PassAlignment = false; SmallVector PlacementArgs; - // [dcl.fct.def.coroutine]/7 - // "The allocation function’s name is looked up in the scope of P. - // [...] If the lookup finds an allocation function in the scope of P, - // overload resolution is performed on a function call created by assembling - // an argument list. The first argument is the amount of space requested, - // and has type std::size_t. The lvalues p1 ... pn are the succeeding - // arguments." + // [dcl.fct.def.coroutine]p9 + // An implementation may need to allocate additional storage for a + // coroutine. + // This storage is known as the coroutine state and is obtained by calling a + // non-array allocation function ([basic.stc.dynamic.allocation]). The + // allocation function's name is looked up by searching for it in the scope of + // the promise type. + // - If any declarations are found, overload resolution is performed on a + // function call created by assembling an argument list. The first argument is + // the amount of space requested, and has type std::size_t. The + // lvalues p1 ... pn are the succeeding arguments. // // ...where "p1 ... pn" are defined earlier as: // - // [dcl.fct.def.coroutine]/3 - // "For a coroutine f that is a non-static member function, let P1 denote the - // type of the implicit object parameter (13.3.1) and P2 ... Pn be the types - // of the function parameters; otherwise let P1 ... Pn be the types of the - // function parameters. Let p1 ... pn be lvalues denoting those objects." + // [dcl.fct.def.coroutine]p3 + // The promise type of a coroutine is `std::coroutine_traits` + // , where R is the return type of the function, and `P1, ..., Pn` are the + // sequence of types of the non-object function parameters, preceded by the + // type of the object parameter ([dcl.fct]) if the coroutine is a non-static + // member function. [dcl.fct.def.coroutine]p4 In the following, p_i is an + // lvalue of type P_i, where p1 denotes the object parameter and p_i+1 denotes + // the i-th non-object function parameter for a non-static member function, + // and p_i denotes the i-th function parameter otherwise. For a non-static + // member function, q_1 is an lvalue that denotes *this; any other q_i is an + // lvalue that denotes the parameter copy corresponding to p_i. if (auto *MD = dyn_cast(&FD)) { if (MD->isInstance() && !isLambdaCallOperator(MD)) { ExprResult ThisExpr = S.ActOnCXXThis(Loc); @@ -1273,10 +1305,10 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { /*isArray*/ false, PassAlignment, PlacementArgs, OperatorNew, UnusedResult, /*Diagnose*/ false); - // [dcl.fct.def.coroutine]/7 - // "If no matching function is found, overload resolution is performed again - // on a function call created by passing just the amount of space required as - // an argument of type std::size_t." + // [dcl.fct.def.coroutine]p9 + // If no viable function is found ([over.match.viable]), overload resolution + // is performed again on a function call created by passing just the amount of + // space required as an argument of type std::size_t. if (!OperatorNew && !PlacementArgs.empty()) { PlacementArgs.clear(); S.FindAllocationFunctions(Loc, SourceRange(), /*NewScope*/ Sema::AFS_Class, @@ -1285,10 +1317,11 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { OperatorNew, UnusedResult, /*Diagnose*/ false); } - // [dcl.fct.def.coroutine]/7 - // "The allocation function’s name is looked up in the scope of P. If this - // lookup fails, the allocation function’s name is looked up in the global - // scope." + // [dcl.fct.def.coroutine]p9 + // The allocation function's name is looked up by searching for it in the + // scope of the promise type. + // - If any declarations are found, ... + // - Otherwise, a search is performed in the global scope. if (!OperatorNew) { S.FindAllocationFunctions(Loc, SourceRange(), /*NewScope*/ Sema::AFS_Global, /*DeleteScope*/ Sema::AFS_Both, PromiseType, @@ -1328,8 +1361,12 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { } } - if ((OperatorDelete = findDeleteForPromise(S, Loc, PromiseType)) == nullptr) + if ((OperatorDelete = findDeleteForPromise(S, Loc, PromiseType)) == nullptr) { + // FIXME: We should add an error here. According to: + // [dcl.fct.def.coroutine]p12 + // If no usual deallocation function is found, the program is ill-formed. return false; + } Expr *FramePtr = S.BuildBuiltinCallExpr(Loc, Builtin::BI__builtin_coro_frame, {}); @@ -1368,7 +1405,11 @@ bool CoroutineStmtBuilder::makeNewAndDeleteExpr() { SmallVector DeleteArgs{CoroFree}; - // Check if we need to pass the size. + // [dcl.fct.def.coroutine]p12 + // The selected deallocation function shall be called with the address of + // the block of storage to be reclaimed as its first argument. If a + // deallocation function with a parameter of type std::size_t is + // used, the size of the block is passed as the corresponding argument. const auto *OpDeleteType = OpDeleteQualType.getTypePtr()->castAs(); if (OpDeleteType->getNumParams() > 1) @@ -1481,8 +1522,9 @@ bool CoroutineStmtBuilder::makeOnException() { } bool CoroutineStmtBuilder::makeReturnObject() { - // Build implicit 'p.get_return_object()' expression and form initialization - // of return type from it. + // [dcl.fct.def.coroutine]p7 + // The expression promise.get_­return_­object() is used to initialize the + // returned reference or prvalue result object of a call to a coroutine. ExprResult ReturnObject = buildPromiseCall(S, Fn.CoroutinePromise, Loc, "get_return_object", None); if (ReturnObject.isInvalid()) @@ -1620,6 +1662,12 @@ bool Sema::buildCoroutineParameterMoves(SourceLocation Loc) { if (!ScopeInfo->CoroutineParameterMoves.empty()) return false; + // [dcl.fct.def.coroutine]p13 + // When a coroutine is invoked, after initializing its parameters + // ([expr.call]), a copy is created for each coroutine parameter. For a + // parameter of type cv T, the copy is a variable of type cv T with + // automatic storage duration that is direct-initialized from an xvalue of + // type T referring to the parameter. for (auto *PD : FD->parameters()) { if (PD->getType()->isDependentType()) continue; @@ -1636,7 +1684,9 @@ bool Sema::buildCoroutineParameterMoves(SourceLocation Loc) { CExpr = castForMoving(*this, PDRefExpr.get()); else CExpr = PDRefExpr.get(); - + // [dcl.fct.def.coroutine]p13 + // The initialization and destruction of each parameter copy occurs in the + // context of the called coroutine. auto D = buildVarDecl(*this, Loc, PD->getType(), PD->getIdentifier()); AddInitializerToDecl(D, CExpr, /*DirectInit=*/true); From b05df0287bafff65723c3aacfc3a6b43183a9a26 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Thu, 23 Dec 2021 20:28:37 -0800 Subject: [PATCH 202/293] Revert "[DWARF] Fix PR51087 Extraneous enum record in DWARF with type units" Causes invalid debug_gnu_pubnames (& I think non-gnu pubnames too) - visible as 0 values for the offset in gnu pubnames. More details on the original review in D115325. This reverts commit 78d15a112cbd545fbb6e1aa37c221ef5aeffb3f2. This reverts commit 54586582d3e17c0bba76258d2930486a6dfaaf2a. --- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 59 ++++++------------- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h | 6 -- .../type-units-maybe-unused-types.ll | 2 +- 3 files changed, 18 insertions(+), 49 deletions(-) rename llvm/test/DebugInfo/{X86 => Generic}/type-units-maybe-unused-types.ll (98%) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index b1c78f0faac4..48134f1fd774 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1141,21 +1141,6 @@ sortGlobalExprs(SmallVectorImpl &GVEs) { return GVEs; } -/// Create a DIE for \p Ty if it doesn't already exist. If type units are -/// enabled, try to emit a type unit without a CU skeleton DIE. -static void createMaybeUnusedType(DwarfDebug &DD, DwarfCompileUnit &CU, - DIType &Ty) { - // Try to generate a type unit without creating a skeleton DIE in this CU. - if (DICompositeType const *CTy = dyn_cast(&Ty)) { - MDString const *TypeId = CTy->getRawIdentifier(); - if (DD.generateTypeUnits() && TypeId && !Ty.isForwardDecl()) - if (DD.getOrCreateDwarfTypeUnit(CU, TypeId->getString(), CTy)) - return; - } - // We couldn't or shouldn't add a type unit so create the DIE normally. - CU.getOrCreateTypeDIE(&Ty); -} - // Emit all Dwarf sections that should come prior to the content. Create // global DIEs and emit initial debug info sections. This is invoked by // the target AsmPrinter. @@ -1239,17 +1224,15 @@ void DwarfDebug::beginModule(Module *M) { CU.getOrCreateGlobalVariableDIE(GV, sortGlobalExprs(GVMap[GV])); } - for (auto *Ty : CUNode->getEnumTypes()) { - // The enum types array by design contains pointers to - // MDNodes rather than DIRefs. Unique them here. - createMaybeUnusedType(*this, CU, *Ty); - } + for (auto *Ty : CUNode->getEnumTypes()) + CU.getOrCreateTypeDIE(cast(Ty)); + for (auto *Ty : CUNode->getRetainedTypes()) { // The retained types array by design contains pointers to // MDNodes rather than DIRefs. Unique them here. if (DIType *RT = dyn_cast(Ty)) // There is no point in force-emitting a forward declaration. - createMaybeUnusedType(*this, CU, *RT); + CU.getOrCreateTypeDIE(RT); } // Emit imported_modules last so that the relevant context is already // available. @@ -1420,7 +1403,6 @@ void DwarfDebug::finalizeModuleInfo() { SkeletonHolder.computeSizeAndOffsets(); } - // Emit all Dwarf sections that should come after the content. void DwarfDebug::endModule() { // Terminate the pending line table. @@ -3384,30 +3366,17 @@ uint64_t DwarfDebug::makeTypeSignature(StringRef Identifier) { void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier, DIE &RefDie, const DICompositeType *CTy) { - bool TopLevelType = TypeUnitsUnderConstruction.empty(); - if (auto Signature = getOrCreateDwarfTypeUnit(CU, Identifier, CTy)) { - CU.addDIETypeSignature(RefDie, *Signature); - } else if (TopLevelType) { - // Construct this type in the CU directly. - // This is inefficient because all the dependent types will be rebuilt - // from scratch, including building them in type units, discovering that - // they depend on addresses, throwing them out and rebuilding them. - CU.constructTypeDIE(RefDie, cast(CTy)); - } -} - -Optional -DwarfDebug::getOrCreateDwarfTypeUnit(DwarfCompileUnit &CU, StringRef Identifier, - const DICompositeType *CTy) { // Fast path if we're building some type units and one has already used the // address pool we know we're going to throw away all this work anyway, so // don't bother building dependent types. if (!TypeUnitsUnderConstruction.empty() && AddrPool.hasBeenUsed()) - return None; + return; auto Ins = TypeSignatures.insert(std::make_pair(CTy, 0)); - if (!Ins.second) - return Ins.first->second; + if (!Ins.second) { + CU.addDIETypeSignature(RefDie, Ins.first->second); + return; + } bool TopLevelType = TypeUnitsUnderConstruction.empty(); AddrPool.resetUsedFlag(); @@ -3461,7 +3430,13 @@ DwarfDebug::getOrCreateDwarfTypeUnit(DwarfCompileUnit &CU, StringRef Identifier, // the type that used an address. for (const auto &TU : TypeUnitsToAdd) TypeSignatures.erase(TU.second); - return None; + + // Construct this type in the CU directly. + // This is inefficient because all the dependent types will be rebuilt + // from scratch, including building them in type units, discovering that + // they depend on addresses, throwing them out and rebuilding them. + CU.constructTypeDIE(RefDie, cast(CTy)); + return; } // If the type wasn't dependent on fission addresses, finish adding the type @@ -3471,7 +3446,7 @@ DwarfDebug::getOrCreateDwarfTypeUnit(DwarfCompileUnit &CU, StringRef Identifier, InfoHolder.emitUnit(TU.first.get(), useSplitDwarf()); } } - return Signature; + CU.addDIETypeSignature(RefDie, Signature); } DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD) diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index ff2589b3c953..4e1a1b1e068d 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -667,12 +667,6 @@ class DwarfDebug : public DebugHandlerBase { void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier, DIE &Die, const DICompositeType *CTy); - /// Return the type unit signature of \p CTy after finding or creating its - /// type unit. Return None if a type unit cannot be created for \p CTy. - Optional getOrCreateDwarfTypeUnit(DwarfCompileUnit &CU, - StringRef Identifier, - const DICompositeType *CTy); - class NonTypeUnitContext { DwarfDebug *DD; decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction; diff --git a/llvm/test/DebugInfo/X86/type-units-maybe-unused-types.ll b/llvm/test/DebugInfo/Generic/type-units-maybe-unused-types.ll similarity index 98% rename from llvm/test/DebugInfo/X86/type-units-maybe-unused-types.ll rename to llvm/test/DebugInfo/Generic/type-units-maybe-unused-types.ll index 6bfc1e881de7..589cd1b337ad 100644 --- a/llvm/test/DebugInfo/X86/type-units-maybe-unused-types.ll +++ b/llvm/test/DebugInfo/Generic/type-units-maybe-unused-types.ll @@ -1,4 +1,4 @@ -; RUN: llc %s -mtriple=x86_64-linux-gnu -generate-type-units -o - -filetype=obj \ +; RUN: %llc_dwarf %s -generate-type-units -o - -filetype=obj \ ; RUN: | llvm-dwarfdump -o - - \ ; RUN: | FileCheck %s From 02478a26f269b6c212c4eb8b5e86d9fc641e9531 Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Fri, 24 Dec 2021 09:27:04 +0800 Subject: [PATCH 203/293] [RISCV] Use DAG variable directly instead of DCI.DAG Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D116087 --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 972928f2e5b4..4f5512e6fb37 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -7110,7 +7110,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth))) return SDValue(N, 0); - return combineGREVI_GORCI(N, DCI.DAG); + return combineGREVI_GORCI(N, DAG); } case RISCVISD::GREVW: case RISCVISD::GORCW: { @@ -7119,7 +7119,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, SimplifyDemandedLowBitsHelper(1, 5)) return SDValue(N, 0); - return combineGREVI_GORCI(N, DCI.DAG); + return combineGREVI_GORCI(N, DAG); } case RISCVISD::SHFL: case RISCVISD::UNSHFL: { From 097208dbf07786f3da84aec5b5f571c6e95a10e2 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Fri, 24 Dec 2021 13:33:52 +0800 Subject: [PATCH 204/293] [C++20] [Coroutines] Allow promise_type to not define return_void or return_value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to [dcl.fct.def.coroutine]p6, the promise_type is allowed to not define return_void nor return_value: > If searches for the names return_­void and return_­value in the scope > of the promise type each find any declarations, the program is > ill-formed. > [Note 1: If return_­void is found, flowing off the end of a coroutine is > equivalent to a co_­return with no operand. Otherwise, flowing off the > end of a coroutine results in > undefined behavior ([stmt.return.coroutine]). — end note] So the program isn't ill-formed if the promise_type doesn't define return_void nor return_value. It is just a potential UB. So the program should be allowed to compile. Reviewed By: urnathan Differential Revision: https://reviews.llvm.org/D116204 --- .../clang/Basic/DiagnosticSemaKinds.td | 2 -- clang/lib/Sema/SemaCoroutine.cpp | 34 +++++++++++-------- .../test/SemaCXX/coroutines-exp-namespace.cpp | 13 ------- clang/test/SemaCXX/coroutines.cpp | 29 ++++++++++++---- 4 files changed, 43 insertions(+), 35 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index f50c5a0c711a..3b6341d2232d 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11071,8 +11071,6 @@ def err_coroutine_type_missing_specialization : Error< "specialization %0">; def err_coroutine_promise_incompatible_return_functions : Error< "the coroutine promise type %0 declares both 'return_value' and 'return_void'">; -def err_coroutine_promise_requires_return_function : Error< - "the coroutine promise type %0 must declare either 'return_value' or 'return_void'">; def note_coroutine_promise_implicit_await_transform_required_here : Note< "call to 'await_transform' implicitly required by 'co_await' here">; def note_coroutine_promise_suspend_implicitly_required : Note< diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index 336e0c5f1d46..e89cecd08cca 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -1432,9 +1432,13 @@ bool CoroutineStmtBuilder::makeOnFallthrough() { assert(!IsPromiseDependentType && "cannot make statement while the promise type is dependent"); - // [dcl.fct.def.coroutine]/4 - // The unqualified-ids 'return_void' and 'return_value' are looked up in - // the scope of class P. If both are found, the program is ill-formed. + // [dcl.fct.def.coroutine]/p6 + // If searches for the names return_­void and return_­value in the scope of + // the promise type each find any declarations, the program is ill-formed. + // [Note 1: If return_­void is found, flowing off the end of a coroutine is + // equivalent to a co_­return with no operand. Otherwise, flowing off the end + // of a coroutine results in undefined behavior ([stmt.return.coroutine]). — + // end note] bool HasRVoid, HasRValue; LookupResult LRVoid = lookupMember(S, "return_void", PromiseRecordDecl, Loc, HasRVoid); @@ -1455,18 +1459,20 @@ bool CoroutineStmtBuilder::makeOnFallthrough() { << LRValue.getLookupName(); return false; } else if (!HasRVoid && !HasRValue) { - // FIXME: The PDTS currently specifies this case as UB, not ill-formed. - // However we still diagnose this as an error since until the PDTS is fixed. - S.Diag(FD.getLocation(), - diag::err_coroutine_promise_requires_return_function) - << PromiseRecordDecl; - S.Diag(PromiseRecordDecl->getLocation(), diag::note_defined_here) - << PromiseRecordDecl; - return false; + // We need to set 'Fallthrough'. Otherwise the other analysis part might + // think the coroutine has defined a return_value method. So it might emit + // **false** positive warning. e.g., + // + // promise_without_return_func foo() { + // co_await something(); + // } + // + // Then AnalysisBasedWarning would emit a warning about `foo()` lacking a + // co_return statements, which isn't correct. + Fallthrough = S.ActOnNullStmt(PromiseRecordDecl->getLocation()); + if (Fallthrough.isInvalid()) + return false; } else if (HasRVoid) { - // If the unqualified-id return_void is found, flowing off the end of a - // coroutine is equivalent to a co_return with no operand. Otherwise, - // flowing off the end of a coroutine results in undefined behavior. Fallthrough = S.BuildCoreturnStmt(FD.getLocation(), nullptr, /*IsImplicit*/false); Fallthrough = S.ActOnFinishFullStmt(Fallthrough.get()); diff --git a/clang/test/SemaCXX/coroutines-exp-namespace.cpp b/clang/test/SemaCXX/coroutines-exp-namespace.cpp index 6fce00c09ca6..a5ad37e338d2 100644 --- a/clang/test/SemaCXX/coroutines-exp-namespace.cpp +++ b/clang/test/SemaCXX/coroutines-exp-namespace.cpp @@ -978,19 +978,6 @@ extern "C" int f(mismatch_gro_type_tag4) { co_return; //expected-note {{function is a coroutine due to use of 'co_return' here}} } -struct bad_promise_no_return_func { // expected-note {{'bad_promise_no_return_func' defined here}} - coro get_return_object(); - suspend_always initial_suspend(); - suspend_always final_suspend() noexcept; - void unhandled_exception(); -}; -// FIXME: The PDTS currently specifies this as UB, technically forbidding a -// diagnostic. -coro no_return_value_or_return_void() { - // expected-error@-1 {{'bad_promise_no_return_func' must declare either 'return_value' or 'return_void'}} - co_await a; -} - struct bad_await_suspend_return { bool await_ready(); // expected-error@+1 {{return type of 'await_suspend' is required to be 'void' or 'bool' (have 'char')}} diff --git a/clang/test/SemaCXX/coroutines.cpp b/clang/test/SemaCXX/coroutines.cpp index e175e3f9fa64..b461c881355b 100644 --- a/clang/test/SemaCXX/coroutines.cpp +++ b/clang/test/SemaCXX/coroutines.cpp @@ -970,19 +970,36 @@ extern "C" int f(mismatch_gro_type_tag4) { co_return; //expected-note {{function is a coroutine due to use of 'co_return' here}} } -struct bad_promise_no_return_func { // expected-note {{'bad_promise_no_return_func' defined here}} - coro get_return_object(); +struct promise_no_return_func { + coro get_return_object(); suspend_always initial_suspend(); suspend_always final_suspend() noexcept; void unhandled_exception(); }; -// FIXME: The PDTS currently specifies this as UB, technically forbidding a -// diagnostic. -coro no_return_value_or_return_void() { - // expected-error@-1 {{'bad_promise_no_return_func' must declare either 'return_value' or 'return_void'}} +// [dcl.fct.def.coroutine]/p6 +// If searches for the names return_­void and return_­value in the scope of +// the promise type each find any declarations, the program is ill-formed. +// [Note 1: If return_­void is found, flowing off the end of a coroutine is +// equivalent to a co_­return with no operand. Otherwise, flowing off the end +// of a coroutine results in undefined behavior ([stmt.return.coroutine]). — +// end note] +// +// So it isn't ill-formed if the promise doesn't define return_value and return_void. +// It is just a potential UB. +coro no_return_value_or_return_void() { co_await a; } +// The following two tests that it would emit correct diagnostic message +// if we co_return in `promise_no_return_func`. +coro no_return_value_or_return_void_2() { + co_return; // expected-error {{no member named 'return_void'}} +} + +coro no_return_value_or_return_void_3() { + co_return 43; // expected-error {{no member named 'return_value'}} +} + struct bad_await_suspend_return { bool await_ready(); // expected-error@+1 {{return type of 'await_suspend' is required to be 'void' or 'bool' (have 'char')}} From 0d749e13f714f87f71c7dd823d302c7d5e5d8c34 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 23 Dec 2021 21:54:32 -0800 Subject: [PATCH 205/293] [ELF] Optimize symbol initialization and resolution Avoid repeated load of global pointer (symtab) / members (sections.size(), firstGlobal) in the hot paths. And remove some unneeded this-> --- lld/ELF/InputFiles.cpp | 91 ++++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 44 deletions(-) diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 9dcf572a920b..f2128c84f453 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1050,62 +1050,62 @@ InputSectionBase *ObjFile::createInputSection(uint32_t idx, // Initialize this->Symbols. this->Symbols is a parallel array as // its corresponding ELF symbol table. template void ObjFile::initializeSymbols() { + ArrayRef sections(this->sections); + SymbolTable &symtab = *elf::symtab; + ArrayRef eSyms = this->getELFSyms(); - this->symbols.resize(eSyms.size()); + symbols.resize(eSyms.size()); SymbolUnion *locals = firstGlobal == 0 ? nullptr : getSpecificAllocSingleton().Allocate(firstGlobal); - for (size_t i = 0; i != firstGlobal; ++i) { + for (size_t i = 0, end = firstGlobal; i != end; ++i) { const Elf_Sym &eSym = eSyms[i]; uint32_t secIdx = getSectionIndex(eSym); - if (secIdx >= this->sections.size()) + if (LLVM_UNLIKELY(secIdx >= sections.size())) fatal(toString(this) + ": invalid section index: " + Twine(secIdx)); - if (eSym.getBinding() != STB_LOCAL) + if (LLVM_UNLIKELY(eSym.getBinding() != STB_LOCAL)) error(toString(this) + ": non-local symbol (" + Twine(i) + - ") found at index < .symtab's sh_info (" + Twine(firstGlobal) + - ")"); + ") found at index < .symtab's sh_info (" + Twine(end) + ")"); - InputSectionBase *sec = this->sections[secIdx]; + InputSectionBase *sec = sections[secIdx]; uint8_t type = eSym.getType(); if (type == STT_FILE) - sourceFile = CHECK(eSym.getName(this->stringTable), this); - if (this->stringTable.size() <= eSym.st_name) + sourceFile = CHECK(eSym.getName(stringTable), this); + if (LLVM_UNLIKELY(stringTable.size() <= eSym.st_name)) fatal(toString(this) + ": invalid symbol name offset"); - StringRefZ name = this->stringTable.data() + eSym.st_name; + StringRefZ name = stringTable.data() + eSym.st_name; - this->symbols[i] = reinterpret_cast(locals + i); + symbols[i] = reinterpret_cast(locals + i); if (eSym.st_shndx == SHN_UNDEF || sec == &InputSection::discarded) - new (this->symbols[i]) - Undefined(this, name, STB_LOCAL, eSym.st_other, type, - /*discardedSecIdx=*/secIdx); + new (symbols[i]) Undefined(this, name, STB_LOCAL, eSym.st_other, type, + /*discardedSecIdx=*/secIdx); else - new (this->symbols[i]) Defined(this, name, STB_LOCAL, eSym.st_other, type, - eSym.st_value, eSym.st_size, sec); + new (symbols[i]) Defined(this, name, STB_LOCAL, eSym.st_other, type, + eSym.st_value, eSym.st_size, sec); } // Some entries have been filled by LazyObjFile. for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) - if (!this->symbols[i]) - this->symbols[i] = - symtab->insert(CHECK(eSyms[i].getName(this->stringTable), this)); + if (!symbols[i]) + symbols[i] = symtab.insert(CHECK(eSyms[i].getName(stringTable), this)); // Perform symbol resolution on non-local symbols. SmallVector undefineds; for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) { const Elf_Sym &eSym = eSyms[i]; uint8_t binding = eSym.getBinding(); - if (binding == STB_LOCAL) { + if (LLVM_UNLIKELY(binding == STB_LOCAL)) { errorOrWarn(toString(this) + ": STB_LOCAL symbol (" + Twine(i) + ") found at index >= .symtab's sh_info (" + Twine(firstGlobal) + ")"); continue; } uint32_t secIdx = getSectionIndex(eSym); - if (secIdx >= this->sections.size()) + if (LLVM_UNLIKELY(secIdx >= sections.size())) fatal(toString(this) + ": invalid section index: " + Twine(secIdx)); - InputSectionBase *sec = this->sections[secIdx]; + InputSectionBase *sec = sections[secIdx]; uint8_t stOther = eSym.st_other; uint8_t type = eSym.getType(); uint64_t value = eSym.st_value; @@ -1116,9 +1116,9 @@ template void ObjFile::initializeSymbols() { continue; } - Symbol *sym = this->symbols[i]; + Symbol *sym = symbols[i]; const StringRef name = sym->getName(); - if (eSym.st_shndx == SHN_COMMON) { + if (LLVM_UNLIKELY(eSym.st_shndx == SHN_COMMON)) { if (value == 0 || value >= UINT32_MAX) fatal(toString(this) + ": common symbol '" + name + "' has invalid alignment: " + Twine(value)); @@ -1170,7 +1170,7 @@ template void ObjFile::initializeSymbols() { // being resolved to different files. for (unsigned i : undefineds) { const Elf_Sym &eSym = eSyms[i]; - Symbol *sym = this->symbols[i]; + Symbol *sym = symbols[i]; sym->resolve(Undefined{this, sym->getName(), eSym.getBinding(), eSym.st_other, eSym.getType()}); sym->referenced = true; @@ -1182,8 +1182,9 @@ ArchiveFile::ArchiveFile(std::unique_ptr &&file) file(std::move(file)) {} void ArchiveFile::parse() { + SymbolTable &symtab = *elf::symtab; for (const Archive::Symbol &sym : file->symbols()) - symtab->addSymbol(LazyArchive{*this, sym}); + symtab.addSymbol(LazyArchive{*this, sym}); // Inform a future invocation of ObjFile::initializeSymbols() that this // archive has been processed. @@ -1494,6 +1495,7 @@ template void SharedFile::parse() { SmallString<0> versionedNameBuffer; // Add symbols to the symbol table. + SymbolTable &symtab = *elf::symtab; ArrayRef syms = this->getGlobalELFSyms(); for (size_t i = 0, e = syms.size(); i != e; ++i) { const Elf_Sym &sym = syms[i]; @@ -1502,7 +1504,7 @@ template void SharedFile::parse() { // symbols in each symbol table, and the index of first non-local symbol // is stored to sh_info. If a local symbol appears after some non-local // symbol, that's a violation of the spec. - StringRef name = CHECK(sym.getName(this->stringTable), this); + StringRef name = CHECK(sym.getName(stringTable), this); if (sym.getBinding() == STB_LOCAL) { warn("found local symbol '" + name + "' in global part of symbol table in file " + toString(this)); @@ -1520,12 +1522,12 @@ template void SharedFile::parse() { toString(this)); continue; } - StringRef verName = this->stringTable.data() + verneeds[idx]; + StringRef verName = stringTable.data() + verneeds[idx]; versionedNameBuffer.clear(); name = saver.save((name + "@" + verName).toStringRef(versionedNameBuffer)); } - Symbol *s = symtab->addSymbol( + Symbol *s = symtab.addSymbol( Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()}); s->exportDynamic = true; if (s->isUndefined() && sym.getBinding() != STB_WEAK && @@ -1543,9 +1545,9 @@ template void SharedFile::parse() { uint32_t alignment = getAlignment(sections, sym); if (!(versyms[i] & VERSYM_HIDDEN)) { - symtab->addSymbol(SharedSymbol{*this, name, sym.getBinding(), - sym.st_other, sym.getType(), sym.st_value, - sym.st_size, alignment, idx}); + symtab.addSymbol(SharedSymbol{*this, name, sym.getBinding(), sym.st_other, + sym.getType(), sym.st_value, sym.st_size, + alignment, idx}); } // Also add the symbol with the versioned name to handle undefined symbols @@ -1561,13 +1563,13 @@ template void SharedFile::parse() { } StringRef verName = - this->stringTable.data() + + stringTable.data() + reinterpret_cast(verdefs[idx])->getAux()->vda_name; versionedNameBuffer.clear(); name = (name + "@" + verName).toStringRef(versionedNameBuffer); - symtab->addSymbol(SharedSymbol{*this, saver.save(name), sym.getBinding(), - sym.st_other, sym.getType(), sym.st_value, - sym.st_size, alignment, idx}); + symtab.addSymbol(SharedSymbol{*this, saver.save(name), sym.getBinding(), + sym.st_other, sym.getType(), sym.st_value, + sym.st_size, alignment, idx}); } } @@ -1715,17 +1717,20 @@ template void BitcodeFile::parse() { .second); } - for (const lto::InputFile::Symbol &objSym : obj->symbols()) - symbols.push_back(createBitcodeSymbol(keptComdats, objSym, *this)); + symbols.assign(obj->symbols().size(), nullptr); + for (auto it : llvm::enumerate(obj->symbols())) + symbols[it.index()] = + createBitcodeSymbol(keptComdats, it.value(), *this); for (auto l : obj->getDependentLibraries()) addDependentLibrary(l, this); } void BitcodeFile::parseLazy() { + SymbolTable &symtab = *elf::symtab; for (const lto::InputFile::Symbol &sym : obj->symbols()) if (!sym.isUndefined()) - symtab->addSymbol(LazyObject{*this, saver.save(sym.getName())}); + symtab.addSymbol(LazyObject{*this, saver.save(sym.getName())}); } void BinaryFile::parse() { @@ -1783,21 +1788,19 @@ InputFile *elf::createLazyFile(MemoryBufferRef mb, StringRef archiveName, template void ObjFile::parseLazy() { const ArrayRef eSyms = this->getELFSyms(); - auto &symbols = this->symbols; - SymbolTable *const symtab = elf::symtab.get(); - const StringRef strtab = this->stringTable; + SymbolTable &symtab = *elf::symtab; symbols.resize(eSyms.size()); for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) if (eSyms[i].st_shndx != SHN_UNDEF) - symbols[i] = symtab->insert(CHECK(eSyms[i].getName(strtab), this)); + symbols[i] = symtab.insert(CHECK(eSyms[i].getName(stringTable), this)); // Replace existing symbols with LazyObject symbols. // // resolve() may trigger this->extract() if an existing symbol is an undefined // symbol. If that happens, this function has served its purpose, and we can // exit from the loop early. - for (Symbol *sym : this->symbols) + for (Symbol *sym : symbols) if (sym) { sym->resolve(LazyObject{*this, sym->getName()}); if (!lazy) From dd903173c0fb9ead398b8b516f0672d30d25b120 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 23 Dec 2021 10:43:47 +0100 Subject: [PATCH 206/293] [OpenMP] Avoid creating null pointer lvalue (NFC) The reduction initialization code creates a "naturally aligned null pointer to void lvalue", which I found somewhat odd, even though it works out in the end because it is not actually used. It doesn't look like this code actually needs an LValue for anything though, and we can use an invalid Address to represent this case instead. Differential Revision: https://reviews.llvm.org/D116214 --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 29 +++++++++------------------ clang/lib/CodeGen/CGOpenMPRuntime.h | 8 ++++---- clang/lib/CodeGen/CGStmtOpenMP.cpp | 2 +- 3 files changed, 14 insertions(+), 25 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 03aa84aecef4..0c71fee14025 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -774,7 +774,7 @@ LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, } void ReductionCodeGen::emitAggregateInitialization( - CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, + CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, const OMPDeclareReductionDecl *DRD) { // Emit VarDecl with copy init for arrays. // Get the address of the original variable captured in current @@ -787,7 +787,7 @@ void ReductionCodeGen::emitAggregateInitialization( EmitDeclareReductionInit, EmitDeclareReductionInit ? ClausesData[N].ReductionOp : PrivateVD->getInit(), - DRD, SharedLVal.getAddress(CGF)); + DRD, SharedAddr); } ReductionCodeGen::ReductionCodeGen(ArrayRef Shareds, @@ -881,7 +881,7 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, } void ReductionCodeGen::emitInitialization( - CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, + CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref DefaultInit) { assert(SharedAddresses.size() > N && "No variable was generated"); const auto *PrivateVD = @@ -891,21 +891,15 @@ void ReductionCodeGen::emitInitialization( QualType PrivateType = PrivateVD->getType(); PrivateAddr = CGF.Builder.CreateElementBitCast( PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); - QualType SharedType = SharedAddresses[N].first.getType(); - SharedLVal = CGF.MakeAddrLValue( - CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), - CGF.ConvertTypeForMem(SharedType)), - SharedType, SharedAddresses[N].first.getBaseInfo(), - CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { if (DRD && DRD->getInitializer()) (void)DefaultInit(CGF); - emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); + emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { (void)DefaultInit(CGF); + QualType SharedType = SharedAddresses[N].first.getType(); emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, - PrivateAddr, SharedLVal.getAddress(CGF), - SharedLVal.getType()); + PrivateAddr, SharedAddr, SharedType); } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && !CGF.isTrivialInitializer(PrivateVD->getInit())) { CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, @@ -5915,25 +5909,20 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, CGM.getContext().getSizeType(), Loc); } RCG.emitAggregateType(CGF, N, Size); - LValue OrigLVal; + Address OrigAddr = Address::invalid(); // If initializer uses initializer from declare reduction construct, emit a // pointer to the address of the original reduction item (reuired by reduction // initializer) if (RCG.usesReductionInitializer(N)) { Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); - SharedAddr = CGF.EmitLoadOfPointer( + OrigAddr = CGF.EmitLoadOfPointer( SharedAddr, CGM.getContext().VoidPtrTy.castAs()->getTypePtr()); - OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); - } else { - OrigLVal = CGF.MakeNaturalAlignAddrLValue( - llvm::ConstantPointerNull::get(CGM.VoidPtrTy), - CGM.getContext().VoidTy); } // Emit the initializer: // %0 = bitcast void* %arg to * // store , * %0 - RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, + RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, [](CodeGenFunction &) { return false; }); CGF.FinishFunction(); return Fn; diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index c7b69dee9160..b83ec78696d1 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -162,10 +162,10 @@ class ReductionCodeGen { /// Performs aggregate initialization. /// \param N Number of reduction item in the common list. /// \param PrivateAddr Address of the corresponding private item. - /// \param SharedLVal Address of the original shared variable. + /// \param SharedAddr Address of the original shared variable. /// \param DRD Declare reduction construct used for reduction item. void emitAggregateInitialization(CodeGenFunction &CGF, unsigned N, - Address PrivateAddr, LValue SharedLVal, + Address PrivateAddr, Address SharedAddr, const OMPDeclareReductionDecl *DRD); public: @@ -187,10 +187,10 @@ class ReductionCodeGen { /// \param PrivateAddr Address of the corresponding private item. /// \param DefaultInit Default initialization sequence that should be /// performed if no reduction specific initialization is found. - /// \param SharedLVal Address of the original shared variable. + /// \param SharedAddr Address of the original shared variable. void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, - LValue SharedLVal, + Address SharedAddr, llvm::function_ref DefaultInit); /// Returns true if the private copy requires cleanups. bool needCleanups(unsigned N); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 1bc086ffb1c6..ba0ced7d8f97 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1247,7 +1247,7 @@ void CodeGenFunction::EmitOMPReductionClauseInit( RedCG.emitAggregateType(*this, Count); AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), - RedCG.getSharedLValue(Count), + RedCG.getSharedLValue(Count).getAddress(*this), [&Emission](CodeGenFunction &CGF) { CGF.EmitAutoVarInit(Emission); return true; From 69ffc3cee9001caa90af83a85df8e5838f1a55f0 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 20 Dec 2021 14:27:58 +0100 Subject: [PATCH 207/293] [Attributor] Directly call areTypesABICompatible() hook Instead of using the ArgumentPromotion implementation, we now walk call sites using checkForAllCallSites() and directly call areTypesABICompatible() using the replacement types. I believe that resolves the TODO in the code. Differential Revision: https://reviews.llvm.org/D116033 --- .../Transforms/IPO/AttributorAttributes.cpp | 34 +++++++++++-------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 416c2ee45079..b977821bcaa6 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -6414,31 +6414,36 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { return indicatePessimisticFixpoint(); } + // Collect the types that will replace the privatizable type in the function + // signature. + SmallVector ReplacementTypes; + identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes); + // Verify callee and caller agree on how the promoted argument would be // passed. - // TODO: The use of the ArgumentPromotion interface here is ugly, we need a - // specialized form of TargetTransformInfo::areFunctionArgsABICompatible - // which doesn't require the arguments ArgumentPromotion wanted to pass. Function &Fn = *getIRPosition().getAnchorScope(); - SmallPtrSet ArgsToPromote, Dummy; - ArgsToPromote.insert(getAssociatedArgument()); const auto *TTI = A.getInfoCache().getAnalysisResultForFunction(Fn); - if (!TTI || - !ArgumentPromotionPass::areFunctionArgsABICompatible( - Fn, *TTI, ArgsToPromote, Dummy) || - ArgsToPromote.empty()) { + if (!TTI) { + LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Missing TTI for function " + << Fn.getName() << "\n"); + return indicatePessimisticFixpoint(); + } + + auto CallSiteCheck = [&](AbstractCallSite ACS) { + CallBase *CB = ACS.getInstruction(); + return TTI->areTypesABICompatible( + CB->getCaller(), CB->getCalledFunction(), ReplacementTypes); + }; + bool AllCallSitesKnown; + if (!A.checkForAllCallSites(CallSiteCheck, *this, true, + AllCallSitesKnown)) { LLVM_DEBUG( dbgs() << "[AAPrivatizablePtr] ABI incompatibility detected for " << Fn.getName() << "\n"); return indicatePessimisticFixpoint(); } - // Collect the types that will replace the privatizable type in the function - // signature. - SmallVector ReplacementTypes; - identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes); - // Register a rewrite of the argument. Argument *Arg = getAssociatedArgument(); if (!A.isValidFunctionSignatureRewrite(*Arg, ReplacementTypes)) { @@ -6558,7 +6563,6 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl { return false; }; - bool AllCallSitesKnown; if (!A.checkForAllCallSites(IsCompatiblePrivArgOfOtherCallSite, *this, true, AllCallSitesKnown)) return indicatePessimisticFixpoint(); From cb31a57104215c57e01e8e9bfe9e545ae0b6b18b Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 24 Dec 2021 09:24:45 +0100 Subject: [PATCH 208/293] [DebugInfo] Remove type-units-maybe-unused-types.ll test 78d15a112cbd545fbb6e1aa37c221ef5aeffb3f2 has been reverted, but the test not deleted, so it is failing now. --- .../Generic/type-units-maybe-unused-types.ll | 134 ------------------ 1 file changed, 134 deletions(-) delete mode 100644 llvm/test/DebugInfo/Generic/type-units-maybe-unused-types.ll diff --git a/llvm/test/DebugInfo/Generic/type-units-maybe-unused-types.ll b/llvm/test/DebugInfo/Generic/type-units-maybe-unused-types.ll deleted file mode 100644 index 589cd1b337ad..000000000000 --- a/llvm/test/DebugInfo/Generic/type-units-maybe-unused-types.ll +++ /dev/null @@ -1,134 +0,0 @@ -; RUN: %llc_dwarf %s -generate-type-units -o - -filetype=obj \ -; RUN: | llvm-dwarfdump -o - - \ -; RUN: | FileCheck %s - -;; PR51087 -;; Check that types that are not referenecd in the CU and have type units -;; do not get unecessary skeleton DIEs in the CU, and that types that have -;; type units but are referenced in the CU still get CU DIEs. -;; -;; In the test (source below): -;; Unused is not used anywhere and should get only a type unit. -;; -;; Outer is used (by global O) so should get a CU DIE, but none of its nested -;; types (Inner, then nested again Enum1 and Enum2) are used so they should not. -;; -;; Ex is not used directly, but its nested type Enum is, so both should get -;; a DIE in the CU. Retained types and enums are emitted after globals, so -;; having Enum used by a local variable lets us check that type DIEs emitted -;; for types that initially only need type units still get a CU DIE later on -;; if required. -;; -;; Generated with `-Xclang -debug-info-kind=unused-types` (for Unused) from: -;; $ cat test.cpp -;; struct Unused {}; -;; -;; class Outer { -;; public: -;; struct Inner { -;; enum Enum1 { X }; -;; enum Enum2 { Y }; -;; Enum1 one; -;; Enum2 two; -;; }; -;; -;; Inner n; -;; } O; -;; -;; struct Ex { enum Enum { X }; }; -;; void fun() { Ex::Enum local; } - -;; Note: The types without a type_signature match here should only get type -;; units and no CU DIE. -; CHECK: 0x00000000: Type Unit{{.+}} name = 'Outer'{{.+}} type_signature = [[SIG_Outer:[0-9a-fx]+]] -; CHECK: 0x00000000: Type Unit{{.+}} name = 'Inner'{{.+}} -; CHECK: 0x00000000: Type Unit{{.+}} name = 'Enum1'{{.+}} -; CHECK: 0x00000000: Type Unit{{.+}} name = 'Enum2'{{.+}} -; CHECK: 0x00000000: Type Unit{{.+}} name = 'Enum'{{.+}} type_signature = [[SIG_Enum:[0-9a-fx]+]] -; CHECK: 0x00000000: Type Unit{{.+}} name = 'Ex'{{.+}} type_signature = [[SIG_Ex:[0-9a-fx]+]] -; CHECK: 0x00000000: Type Unit{{.+}} name = 'Unused'{{.+}} - -;; Check the CU references and skeleton DIEs are emitted correctly. -;; The check-not directives check that Unused doesn't get a DIE in the CU. -; CHECK: DW_TAG_compile_unit -; CHECK-NOT: DW_AT_signature -; CHECK: DW_AT_type ([[DIE_Outer:[0-9a-fx]+]] "Outer") -; CHECK-NOT: DW_AT_signature - -;; Outer is referenced in the CU so it needs a DIE, but its nested enums are not -;; and so should not have DIEs here. -; CHECK: [[DIE_Outer]]: DW_TAG_class_type -; CHECK-NEXT: DW_AT_declaration (true) -; CHECK-NEXT: DW_AT_signature ([[SIG_Outer]]) - -; CHECK-NOT: DW_AT_signature -; CHECK: DW_AT_type ([[DIE_Enum:[0-9a-fx]+]] "Ex::Enum") -; CHECK-NOT: DW_AT_signature - -;; Ex is not referenced in the CU but its nested type, Enum, is. -; CHECK: DW_TAG_structure_type -; CHECK-NEXT: DW_AT_declaration (true) -; CHECK-NEXT: DW_AT_signature ([[SIG_Ex]]) -; CHECK-EMPTY: -; CHECK-NEXT: [[DIE_Enum]]: DW_TAG_enumeration_type -; CHECK-NEXT: DW_AT_declaration (true) -; CHECK-NEXT: DW_AT_signature ([[SIG_Enum]]) - -;; One last check that Unused has no CU DIE. -; CHECK-NOT: DW_AT_signature - -%class.Outer = type { %"struct.Outer::Inner" } -%"struct.Outer::Inner" = type { i32, i32 } - -@O = dso_local global %class.Outer zeroinitializer, align 4, !dbg !0 - -define dso_local void @_Z3funv() !dbg !31 { -entry: - %local = alloca i32, align 4 - call void @llvm.dbg.declare(metadata i32* %local, metadata !34, metadata !DIExpression()), !dbg !35 - ret void, !dbg !36 -} - -declare void @llvm.dbg.declare(metadata, metadata, metadata) - -!llvm.dbg.cu = !{!2} -!llvm.module.flags = !{!25, !26, !27, !28, !29} -!llvm.ident = !{!30} - -!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "O", scope: !2, file: !3, line: 13, type: !7, isLocal: false, isDefinition: true) -!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 14.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !22, globals: !24, splitDebugInlining: false, nameTableKind: None) -!3 = !DIFile(filename: "test.cpp", directory: "/") -!4 = !{!5, !13, !19} -!5 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "Enum1", scope: !6, file: !3, line: 6, baseType: !14, size: 32, elements: !17, identifier: "_ZTSN5Outer5Inner5Enum1E") -!6 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Inner", scope: !7, file: !3, line: 5, size: 64, flags: DIFlagTypePassByValue, elements: !10, identifier: "_ZTSN5Outer5InnerE") -!7 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "Outer", file: !3, line: 3, size: 64, flags: DIFlagTypePassByValue, elements: !8, identifier: "_ZTS5Outer") -!8 = !{!9} -!9 = !DIDerivedType(tag: DW_TAG_member, name: "n", scope: !7, file: !3, line: 12, baseType: !6, size: 64, flags: DIFlagPublic) -!10 = !{!11, !12} -!11 = !DIDerivedType(tag: DW_TAG_member, name: "one", scope: !6, file: !3, line: 8, baseType: !5, size: 32) -!12 = !DIDerivedType(tag: DW_TAG_member, name: "two", scope: !6, file: !3, line: 9, baseType: !13, size: 32, offset: 32) -!13 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "Enum2", scope: !6, file: !3, line: 7, baseType: !14, size: 32, elements: !15, identifier: "_ZTSN5Outer5Inner5Enum2E") -!14 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) -!15 = !{!16} -!16 = !DIEnumerator(name: "Y", value: 0, isUnsigned: true) -!17 = !{!18} -!18 = !DIEnumerator(name: "X", value: 0, isUnsigned: true) -!19 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "Enum", scope: !20, file: !3, line: 15, baseType: !14, size: 32, elements: !17, identifier: "_ZTSN2Ex4EnumE") -!20 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Ex", file: !3, line: 15, size: 8, flags: DIFlagTypePassByValue, elements: !21, identifier: "_ZTS2Ex") -!21 = !{} -!22 = !{!23, !7, !6, !20} -!23 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Unused", file: !3, line: 1, size: 8, flags: DIFlagTypePassByValue, elements: !21, identifier: "_ZTS6Unused") -!24 = !{!0} -!25 = !{i32 7, !"Dwarf Version", i32 5} -!26 = !{i32 2, !"Debug Info Version", i32 3} -!27 = !{i32 1, !"wchar_size", i32 4} -!28 = !{i32 7, !"uwtable", i32 1} -!29 = !{i32 7, !"frame-pointer", i32 2} -!30 = !{!"clang version 14.0.0"} -!31 = distinct !DISubprogram(name: "fun", linkageName: "_Z3funv", scope: !3, file: !3, line: 16, type: !32, scopeLine: 16, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !21) -!32 = !DISubroutineType(types: !33) -!33 = !{null} -!34 = !DILocalVariable(name: "local", scope: !31, file: !3, line: 16, type: !19) -!35 = !DILocation(line: 16, column: 23, scope: !31) -!36 = !DILocation(line: 16, column: 30, scope: !31) From ae64c5a0fde55ca53d0c9949562a506e784da66c Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 23 Dec 2021 09:19:26 +0100 Subject: [PATCH 209/293] [DSE][MemLoc] Handle intrinsics more generically Remove the special casing for intrinsics in MemoryLocation::getForDest() and handle them through the general attribute based code. On the DSE side, this means that isRemovable() now needs to handle more than a hardcoded list of intrinsics. We consider everything apart from volatile memory intrinsics and lifetime markers to be removable. This allows us to perform DSE on intrinsics that DSE has not been specially taught about, using a matrix store as an example here. There is an interesting test change for invariant.start, but I believe that optimization is correct. It only looks a bit odd because the code is immediate UB anyway. Differential Revision: https://reviews.llvm.org/D116210 --- llvm/lib/Analysis/MemoryLocation.cpp | 14 -------- .../Scalar/DeadStoreElimination.cpp | 34 ++++++------------- .../DeadStoreElimination/invariant.start.ll | 10 +++--- .../Transforms/DeadStoreElimination/simple.ll | 5 +-- 4 files changed, 16 insertions(+), 47 deletions(-) diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp index 120f4cda1e35..a877b19df866 100644 --- a/llvm/lib/Analysis/MemoryLocation.cpp +++ b/llvm/lib/Analysis/MemoryLocation.cpp @@ -120,20 +120,6 @@ MemoryLocation MemoryLocation::getForDest(const AnyMemIntrinsic *MI) { Optional MemoryLocation::getForDest(const CallBase *CB, const TargetLibraryInfo &TLI) { - if (const IntrinsicInst *II = dyn_cast(CB)) { - if (auto *MemInst = dyn_cast(CB)) - return getForDest(MemInst); - - switch (II->getIntrinsicID()) { - default: - return None; - case Intrinsic::init_trampoline: - return MemoryLocation::getForArgument(CB, 0, TLI); - case Intrinsic::masked_store: - return MemoryLocation::getForArgument(CB, 1, TLI); - } - } - if (!CB->onlyAccessesArgMemory()) return None; diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 4c17e2c0bdbb..683ac537bbe4 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -182,33 +182,19 @@ static bool isRemovable(Instruction *I) { if (StoreInst *SI = dyn_cast(I)) return SI->isUnordered(); - if (IntrinsicInst *II = dyn_cast(I)) { - switch (II->getIntrinsicID()) { - default: llvm_unreachable("Does not have LocForWrite"); - case Intrinsic::lifetime_end: - // Never remove dead lifetime_end's, e.g. because it is followed by a - // free. + // Note: only get here for calls with analyzable writes. + if (auto *CB = dyn_cast(I)) { + // Don't remove volatile memory intrinsics. + if (auto *MI = dyn_cast(CB)) + return !MI->isVolatile(); + + // Never remove dead lifetime intrinsics, e.g. because they are followed by + // a free. + if (CB->isLifetimeStartOrEnd()) return false; - case Intrinsic::init_trampoline: - // Always safe to remove init_trampoline. - return true; - case Intrinsic::memset: - case Intrinsic::memmove: - case Intrinsic::memcpy: - case Intrinsic::memcpy_inline: - // Don't remove volatile memory intrinsics. - return !cast(II)->isVolatile(); - case Intrinsic::memcpy_element_unordered_atomic: - case Intrinsic::memmove_element_unordered_atomic: - case Intrinsic::memset_element_unordered_atomic: - case Intrinsic::masked_store: - return true; - } - } - // note: only get here for calls with analyzable writes. - if (auto *CB = dyn_cast(I)) return CB->use_empty() && CB->willReturn() && CB->doesNotThrow(); + } return false; } diff --git a/llvm/test/Transforms/DeadStoreElimination/invariant.start.ll b/llvm/test/Transforms/DeadStoreElimination/invariant.start.ll index f00001f967e6..45c19c3588b0 100644 --- a/llvm/test/Transforms/DeadStoreElimination/invariant.start.ll +++ b/llvm/test/Transforms/DeadStoreElimination/invariant.start.ll @@ -4,13 +4,13 @@ declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly -; We cannot remove the store 1 to %p. -; FIXME: By the semantics of invariant.start, the store 3 to p is unreachable. +; We could remove either store here. The first store is dead in the +; conventional sense, because there is a later killing store. The second store +; is undefined behavior by the semantics of invariant.start, and as such +; unreachable. define void @test(i8 *%p) { ; CHECK-LABEL: @test( -; CHECK-NEXT: store i8 1, i8* [[P:%.*]], align 4 -; CHECK-NEXT: [[I:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 1, i8* [[P]]) -; CHECK-NEXT: store i8 3, i8* [[P]], align 4 +; CHECK-NEXT: store i8 3, i8* [[P:%.*]], align 4 ; CHECK-NEXT: ret void ; store i8 1, i8* %p, align 4 diff --git a/llvm/test/Transforms/DeadStoreElimination/simple.ll b/llvm/test/Transforms/DeadStoreElimination/simple.ll index eed7d535857d..cee2b076e8b5 100644 --- a/llvm/test/Transforms/DeadStoreElimination/simple.ll +++ b/llvm/test/Transforms/DeadStoreElimination/simple.ll @@ -194,12 +194,9 @@ define void @test11() { ret void } -; TODO: Specialized store intrinsics should be removed if dead. +; Specialized store intrinsics should be removed if dead. define void @test_matrix_store(i64 %stride) { ; CHECK-LABEL: @test_matrix_store( -; CHECK-NEXT: [[A:%.*]] = alloca [6 x float], align 4 -; CHECK-NEXT: [[CAST:%.*]] = bitcast [6 x float]* [[A]] to float* -; CHECK-NEXT: call void @llvm.matrix.column.major.store.v6f32.i64(<6 x float> zeroinitializer, float* [[CAST]], i64 [[STRIDE:%.*]], i1 false, i32 3, i32 2) ; CHECK-NEXT: ret void ; %a = alloca [6 x float] From ba2b34b1c7d47a5cb1f5c73400356b975cc16298 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 24 Dec 2021 09:39:44 +0100 Subject: [PATCH 210/293] [DSE] Simplify isGuaranteedLoopInvariant() (NFC) We have Value->stripInBoundsConstantOffsets() which does what we want here, but the inbounds requirement isn't actually necessary. We should probably add Value->stripConstantOffsets() as well. --- .../Scalar/DeadStoreElimination.cpp | 23 ++++++------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 683ac537bbe4..883d3a57117b 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1195,23 +1195,14 @@ struct DSEState { /// loop. In particular, this guarantees that it only references a single /// MemoryLocation during execution of the containing function. bool isGuaranteedLoopInvariant(const Value *Ptr) { - auto IsGuaranteedLoopInvariantBase = [](const Value *Ptr) { - Ptr = Ptr->stripPointerCasts(); - if (auto *I = dyn_cast(Ptr)) - return I->getParent()->isEntryBlock(); - return true; - }; - Ptr = Ptr->stripPointerCasts(); - if (auto *I = dyn_cast(Ptr)) { - if (I->getParent()->isEntryBlock()) - return true; - } - if (auto *GEP = dyn_cast(Ptr)) { - return IsGuaranteedLoopInvariantBase(GEP->getPointerOperand()) && - GEP->hasAllConstantIndices(); - } - return IsGuaranteedLoopInvariantBase(Ptr); + if (auto *GEP = dyn_cast(Ptr)) + if (GEP->hasAllConstantIndices()) + Ptr = GEP->getPointerOperand()->stripPointerCasts(); + + if (auto *I = dyn_cast(Ptr)) + return I->getParent()->isEntryBlock(); + return true; } // Find a MemoryDef writing to \p KillingLoc and dominating \p StartAccess, From ea2d4c5881373cdc12cb4a9f9b0afa3db86d05c6 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 24 Dec 2021 00:55:54 -0800 Subject: [PATCH 211/293] [GlobalISel] Fix -Wunused-function in -DLLVM_ENABLE_ASSERTIONS=off builds after D114198 --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 6b50fa49c063..262dc16f44ab 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3630,6 +3630,7 @@ Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy, return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0); } +#ifndef NDEBUG /// Check that all vector operands have same number of elements. Other operands /// should be listed in NonVecOp. static bool hasSameNumEltsOnAllVectorOperands( @@ -3665,6 +3666,7 @@ static bool hasSameNumEltsOnAllVectorOperands( return true; } +#endif /// Fill \p DstOps with DstOps that have same number of elements combined as /// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are From 81d69e1bda9e4b6a83f29ba1f614e43ab4700972 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 24 Dec 2021 09:55:44 +0100 Subject: [PATCH 212/293] [DSE] Call isRemovable() after getLocForWriteEx() (NFCI) The only non-trivial change here is that the isReadClobber() check for redundant stores is now on the DefLoc, not the UpperLoc. This is semantically the right location to use, though in practice it makes no difference (the locations are either the same, or the def inst does not read). --- .../Scalar/DeadStoreElimination.cpp | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 883d3a57117b..016a59f8a069 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1672,12 +1672,12 @@ struct DSEState { dbgs() << "Trying to eliminate MemoryDefs at the end of the function\n"); for (MemoryDef *Def : llvm::reverse(MemDefs)) { - if (SkipStores.contains(Def) || !isRemovable(Def->getMemoryInst())) + if (SkipStores.contains(Def)) continue; Instruction *DefI = Def->getMemoryInst(); auto DefLoc = getLocForWriteEx(DefI); - if (!DefLoc) + if (!DefLoc || !isRemovable(DefI)) continue; // NOTE: Currently eliminating writes at the end of a function is limited @@ -1866,9 +1866,14 @@ struct DSEState { LLVM_DEBUG(dbgs() << "Trying to eliminate MemoryDefs that write the " "already existing value\n"); for (auto *Def : MemDefs) { - if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def) || - !isRemovable(Def->getMemoryInst())) + if (SkipStores.contains(Def) || MSSA.isLiveOnEntryDef(Def)) continue; + + Instruction *DefInst = Def->getMemoryInst(); + auto MaybeDefLoc = getLocForWriteEx(DefInst); + if (!MaybeDefLoc || !isRemovable(DefInst)) + return false; + MemoryDef *UpperDef; // To conserve compile-time, we avoid walking to the next clobbering def. // Instead, we just try to get the optimized access, if it exists. DSE @@ -1880,17 +1885,14 @@ struct DSEState { if (!UpperDef || MSSA.isLiveOnEntryDef(UpperDef)) continue; - Instruction *DefInst = Def->getMemoryInst(); Instruction *UpperInst = UpperDef->getMemoryInst(); - auto IsRedundantStore = [this, DefInst, - UpperInst](MemoryLocation UpperLoc) { + auto IsRedundantStore = [&]() { if (DefInst->isIdenticalTo(UpperInst)) return true; if (auto *MemSetI = dyn_cast(UpperInst)) { if (auto *SI = dyn_cast(DefInst)) { - auto MaybeDefLoc = getLocForWriteEx(DefInst); - if (!MaybeDefLoc) - return false; + // MemSetInst must have a write location. + MemoryLocation UpperLoc = *getLocForWriteEx(UpperInst); int64_t InstWriteOffset = 0; int64_t DepWriteOffset = 0; auto OR = isOverwrite(UpperInst, DefInst, UpperLoc, *MaybeDefLoc, @@ -1903,9 +1905,7 @@ struct DSEState { return false; }; - auto MaybeUpperLoc = getLocForWriteEx(UpperInst); - if (!MaybeUpperLoc || !IsRedundantStore(*MaybeUpperLoc) || - isReadClobber(*MaybeUpperLoc, DefInst)) + if (!IsRedundantStore() || isReadClobber(*MaybeDefLoc, DefInst)) continue; LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *DefInst << '\n'); From 2b8a703858ea994f6665fc35220f2ea56cabe39d Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 24 Dec 2021 10:14:55 +0100 Subject: [PATCH 213/293] [DSE] Avoid calling isRemovable() on non-analyzable location (NFC) At this point the instruction may either have an analyzable write or be a terminator. For terminators, isRemovable() is not necessarily well-defined. Move the check until after we have ensured that it is not a terminator. --- .../Transforms/Scalar/DeadStoreElimination.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 016a59f8a069..19770c46b68c 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1704,13 +1704,19 @@ struct DSEState { /// \returns true if \p Def is a no-op store, either because it /// directly stores back a loaded value or stores zero to a calloced object. bool storeIsNoop(MemoryDef *Def, const Value *DefUO) { - StoreInst *Store = dyn_cast(Def->getMemoryInst()); - MemSetInst *MemSet = dyn_cast(Def->getMemoryInst()); + Instruction *DefI = Def->getMemoryInst(); + StoreInst *Store = dyn_cast(DefI); + MemSetInst *MemSet = dyn_cast(DefI); Constant *StoredConstant = nullptr; if (Store) StoredConstant = dyn_cast(Store->getOperand(0)); - if (MemSet) + else if (MemSet) StoredConstant = dyn_cast(MemSet->getValue()); + else + return false; + + if (!isRemovable(DefI)) + return false; if (StoredConstant && StoredConstant->isNullValue()) { auto *DefUOInst = dyn_cast(DefUO); @@ -2065,8 +2071,7 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, } // Check if the store is a no-op. - if (!Shortend && isRemovable(KillingI) && - State.storeIsNoop(KillingDef, KillingUndObj)) { + if (!Shortend && State.storeIsNoop(KillingDef, KillingUndObj)) { LLVM_DEBUG(dbgs() << "DSE: Remove No-Op Store:\n DEAD: " << *KillingI << '\n'); State.deleteDeadInstruction(KillingI); From 034e66e76c6f0f76aa7c67ba4edf4eba108bcf51 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 24 Dec 2021 09:37:25 +0100 Subject: [PATCH 214/293] [DSE] Assert analyzable write in isRemovable() (NFC) As requested on D116210. The function is not necessarily well-defined without this precondition. --- .../Scalar/DeadStoreElimination.cpp | 49 ++++++++++--------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 19770c46b68c..a5b69e863e7d 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -175,30 +175,6 @@ static cl::opt using OverlapIntervalsTy = std::map; using InstOverlapIntervalsTy = DenseMap; -/// If the value of this instruction and the memory it writes to is unused, may -/// we delete this instruction? -static bool isRemovable(Instruction *I) { - // Don't remove volatile/atomic stores. - if (StoreInst *SI = dyn_cast(I)) - return SI->isUnordered(); - - // Note: only get here for calls with analyzable writes. - if (auto *CB = dyn_cast(I)) { - // Don't remove volatile memory intrinsics. - if (auto *MI = dyn_cast(CB)) - return !MI->isVolatile(); - - // Never remove dead lifetime intrinsics, e.g. because they are followed by - // a free. - if (CB->isLifetimeStartOrEnd()) - return false; - - return CB->use_empty() && CB->willReturn() && CB->doesNotThrow(); - } - - return false; -} - /// Returns true if the end of this instruction can be safely shortened in /// length. static bool isShortenableAtTheEnd(Instruction *I) { @@ -1024,6 +1000,31 @@ struct DSEState { return MemoryLocation::getOrNone(I); } + /// Assuming this instruction has a dead analyzable write, can we delete + /// this instruction? + bool isRemovable(Instruction *I) { + assert(getLocForWriteEx(I) && "Must have analyzable write"); + + // Don't remove volatile/atomic stores. + if (StoreInst *SI = dyn_cast(I)) + return SI->isUnordered(); + + if (auto *CB = dyn_cast(I)) { + // Don't remove volatile memory intrinsics. + if (auto *MI = dyn_cast(CB)) + return !MI->isVolatile(); + + // Never remove dead lifetime intrinsics, e.g. because they are followed + // by a free. + if (CB->isLifetimeStartOrEnd()) + return false; + + return CB->use_empty() && CB->willReturn() && CB->doesNotThrow(); + } + + return false; + } + /// Returns true if \p UseInst completely overwrites \p DefLoc /// (stored by \p DefInst). bool isCompleteOverwrite(const MemoryLocation &DefLoc, Instruction *DefInst, From 24c68ea1eb4fc0d0e782424ddb02da9e8c53ddf5 Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Fri, 24 Dec 2021 17:12:56 +0800 Subject: [PATCH 215/293] Reland "[X86][MS-InlineAsm] Use exact conditions to recognize MS global variables" This reverts commit a954558e878ed9e97e99036229e99af8c6b6c881. Thanks Yuanfang's help. I think I found the root cause of the buildbot fail. The failed test has both Memory and Immediate X86Operand. All data of different operand kinds share the same memory space by a union definition. So it has chance we get the wrong result if we don't check the operand kind. It's probably it happen to be the correct value in my local environment so that I can't reproduce the fail. Differential Revision: https://reviews.llvm.org/D116090 --- clang/test/CodeGen/ms-inline-asm-functions.c | 2 +- llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp | 3 ++- llvm/lib/Target/X86/AsmParser/X86Operand.h | 7 +++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/test/CodeGen/ms-inline-asm-functions.c b/clang/test/CodeGen/ms-inline-asm-functions.c index c958d8803871..1a6ead9286df 100644 --- a/clang/test/CodeGen/ms-inline-asm-functions.c +++ b/clang/test/CodeGen/ms-inline-asm-functions.c @@ -39,7 +39,7 @@ int bar() { int baz() { // CHECK-LABEL: _baz: __asm mov eax, k; - // CHECK: movl k, %eax + // CHECK: movl _k, %eax __asm mov eax, kptr; // CHECK: movl _kptr, %eax } diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 6bae55695f3d..2ba0b97229cc 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1759,7 +1759,8 @@ bool X86AsmParser::CreateMemForMSInlineAsm( // registers in a mmory expression, and though unaccessible via rip/eip. if (IsGlobalLV && (BaseReg || IndexReg)) { Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start, - End, Size, Identifier, Decl)); + End, Size, Identifier, Decl, + FrontendSize)); return false; } // Otherwise, we set the base register to a non-zero value diff --git a/llvm/lib/Target/X86/AsmParser/X86Operand.h b/llvm/lib/Target/X86/AsmParser/X86Operand.h index 6d20c6af5fd2..67b1244708a8 100644 --- a/llvm/lib/Target/X86/AsmParser/X86Operand.h +++ b/llvm/lib/Target/X86/AsmParser/X86Operand.h @@ -286,10 +286,9 @@ struct X86Operand final : public MCParsedAsmOperand { bool isOffsetOfLocal() const override { return isImm() && Imm.LocalRef; } bool isMemPlaceholder(const MCInstrDesc &Desc) const override { - // Add more restrictions to avoid the use of global symbols. This helps - // with reducing the code size. - return !Desc.isRematerializable() && !Desc.isCall() && isMem() && - !Mem.BaseReg && !Mem.IndexReg; + // Only MS InlineAsm uses global variables with registers rather than + // rip/eip. + return isMem() && !Mem.DefaultBaseReg && Mem.FrontendSize; } bool needAddressOf() const override { return AddressOf; } From 72d22017856082e3fb79dac8454a4d2b5523d37b Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 24 Dec 2021 10:42:44 +0100 Subject: [PATCH 216/293] [DSE] Rename getLocForWriteEx() to getLocForWrite() (NFC) We used to have both getLocForWrite() and getLocForWriteEx(). Now that we only have a single method, the "ex" suffix no longer makes sense. --- .../Scalar/DeadStoreElimination.cpp | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index a5b69e863e7d..5e5abb9d33c5 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -797,7 +797,7 @@ struct DSEState { auto *MD = dyn_cast_or_null(MA); if (MD && MemDefs.size() < MemorySSADefsPerBlockLimit && - (getLocForWriteEx(&I) || isMemTerminatorInst(&I))) + (getLocForWrite(&I) || isMemTerminatorInst(&I))) MemDefs.push_back(MD); } } @@ -984,7 +984,7 @@ struct DSEState { return I.first->second; } - Optional getLocForWriteEx(Instruction *I) const { + Optional getLocForWrite(Instruction *I) const { if (!I->mayWriteToMemory()) return None; @@ -1003,7 +1003,7 @@ struct DSEState { /// Assuming this instruction has a dead analyzable write, can we delete /// this instruction? bool isRemovable(Instruction *I) { - assert(getLocForWriteEx(I) && "Must have analyzable write"); + assert(getLocForWrite(I) && "Must have analyzable write"); // Don't remove volatile/atomic stores. if (StoreInst *SI = dyn_cast(I)) @@ -1040,7 +1040,7 @@ struct DSEState { return false; int64_t InstWriteOffset, DepWriteOffset; - if (auto CC = getLocForWriteEx(UseInst)) + if (auto CC = getLocForWrite(UseInst)) return isOverwrite(UseInst, DefInst, *CC, DefLoc, InstWriteOffset, DepWriteOffset) == OW_Complete; return false; @@ -1052,7 +1052,7 @@ struct DSEState { << *Def->getMemoryInst() << ") is at the end the function \n"); - auto MaybeLoc = getLocForWriteEx(Def->getMemoryInst()); + auto MaybeLoc = getLocForWrite(Def->getMemoryInst()); if (!MaybeLoc) { LLVM_DEBUG(dbgs() << " ... could not get location for write.\n"); return false; @@ -1315,7 +1315,7 @@ struct DSEState { // If Current does not have an analyzable write location or is not // removable, skip it. - CurrentLoc = getLocForWriteEx(CurrentI); + CurrentLoc = getLocForWrite(CurrentI); if (!CurrentLoc || !isRemovable(CurrentI)) { CanOptimize = false; continue; @@ -1677,7 +1677,7 @@ struct DSEState { continue; Instruction *DefI = Def->getMemoryInst(); - auto DefLoc = getLocForWriteEx(DefI); + auto DefLoc = getLocForWrite(DefI); if (!DefLoc || !isRemovable(DefI)) continue; @@ -1850,7 +1850,7 @@ struct DSEState { bool Changed = false; for (auto OI : IOL) { Instruction *DeadI = OI.first; - MemoryLocation Loc = *getLocForWriteEx(DeadI); + MemoryLocation Loc = *getLocForWrite(DeadI); assert(isRemovable(DeadI) && "Expect only removable instruction"); const Value *Ptr = Loc.Ptr->stripPointerCasts(); @@ -1877,7 +1877,7 @@ struct DSEState { continue; Instruction *DefInst = Def->getMemoryInst(); - auto MaybeDefLoc = getLocForWriteEx(DefInst); + auto MaybeDefLoc = getLocForWrite(DefInst); if (!MaybeDefLoc || !isRemovable(DefInst)) return false; @@ -1899,7 +1899,7 @@ struct DSEState { if (auto *MemSetI = dyn_cast(UpperInst)) { if (auto *SI = dyn_cast(DefInst)) { // MemSetInst must have a write location. - MemoryLocation UpperLoc = *getLocForWriteEx(UpperInst); + MemoryLocation UpperLoc = *getLocForWrite(UpperInst); int64_t InstWriteOffset = 0; int64_t DepWriteOffset = 0; auto OR = isOverwrite(UpperInst, DefInst, UpperLoc, *MaybeDefLoc, @@ -1943,7 +1943,7 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, MaybeKillingLoc = State.getLocForTerminator(KillingI).map( [](const std::pair &P) { return P.first; }); else - MaybeKillingLoc = State.getLocForWriteEx(KillingI); + MaybeKillingLoc = State.getLocForWrite(KillingI); if (!MaybeKillingLoc) { LLVM_DEBUG(dbgs() << "Failed to find analyzable write location for " @@ -2007,7 +2007,7 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, if (!DebugCounter::shouldExecute(MemorySSACounter)) continue; - MemoryLocation DeadLoc = *State.getLocForWriteEx(DeadI); + MemoryLocation DeadLoc = *State.getLocForWrite(DeadI); if (IsMemTerm) { const Value *DeadUndObj = getUnderlyingObject(DeadLoc.Ptr); From 90095a0b65bdbb56165937d5b7c44e1bd2c251d6 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 24 Dec 2021 10:45:35 +0100 Subject: [PATCH 217/293] [DSE] Remove unnecessary check in getLocForWrite() (NFC) MemoryLocation::getForDest() checks this itself, call it directly. --- llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 5e5abb9d33c5..c80a2ef58e92 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -988,14 +988,8 @@ struct DSEState { if (!I->mayWriteToMemory()) return None; - if (auto *CB = dyn_cast(I)) { - // If the functions may write to memory we do not know about, bail out. - if (!CB->onlyAccessesArgMemory() && - !CB->onlyAccessesInaccessibleMemOrArgMem()) - return None; - + if (auto *CB = dyn_cast(I)) return MemoryLocation::getForDest(CB, TLI); - } return MemoryLocation::getOrNone(I); } From eb91d91b7a8e5a3f66657cb5f20284de0f9e0364 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 24 Dec 2021 11:25:25 +0100 Subject: [PATCH 218/293] [DSE] Fix typo in recent commit This fixes a typo in 81d69e1bda9e4b6a83f29ba1f614e43ab4700972. Of course we should only skip the particular store if it isn't removable, not bail out of the whole loop. Add a test to cover this case. --- .../Scalar/DeadStoreElimination.cpp | 2 +- .../stores-of-existing-values.ll | 24 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index c80a2ef58e92..eadbb4293539 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1873,7 +1873,7 @@ struct DSEState { Instruction *DefInst = Def->getMemoryInst(); auto MaybeDefLoc = getLocForWrite(DefInst); if (!MaybeDefLoc || !isRemovable(DefInst)) - return false; + continue; MemoryDef *UpperDef; // To conserve compile-time, we avoid walking to the next clobbering def. diff --git a/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll b/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll index f3943326fc6f..e5630cca1aba 100644 --- a/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll +++ b/llvm/test/Transforms/DeadStoreElimination/stores-of-existing-values.ll @@ -650,3 +650,27 @@ define i8 @memset_optimized_access(i8* noalias %dst, i8* noalias %src) { tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 16, i1 false) ret i8 %l } + +; The @use() call is a later non-removable store, but should not affect the +; removal of the store in the if block. +define void @later_non_removable_store(i1 %c, i8* %p) { +; CHECK-LABEL: @later_non_removable_store( +; CHECK-NEXT: store i8 1, i8* [[P:%.*]], align 1 +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[EXIT:%.*]] +; CHECK: if: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: call void @use(i8* [[P]]) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: ret void +; + store i8 1, i8* %p + br i1 %c, label %if, label %exit + +if: + store i8 1, i8* %p + br label %exit + +exit: + call void @use(i8* %p) argmemonly + ret void +} From 969a51ff363263a3b5f2df55eba6b4d392bf30c0 Mon Sep 17 00:00:00 2001 From: Krasimir Georgiev Date: Fri, 24 Dec 2021 12:01:36 +0100 Subject: [PATCH 219/293] Revert "[ASan] Moved optimized callbacks into a separate library." We need some internal updates for this, shared directly with the author. This reverts commit 71b3bfde9cd296525bbf5b1619e199074156d12b. --- clang/lib/Driver/ToolChains/CommonArgs.cpp | 5 ---- clang/test/Driver/sanitizer-ld.c | 2 +- compiler-rt/lib/asan/CMakeLists.txt | 33 +--------------------- compiler-rt/lib/asan/asan_rtl_static.cpp | 15 ---------- compiler-rt/lib/asan/tests/CMakeLists.txt | 2 -- 5 files changed, 2 insertions(+), 55 deletions(-) delete mode 100644 compiler-rt/lib/asan/asan_rtl_static.cpp diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 267625c70b25..407f81a2ae09 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -826,11 +826,6 @@ collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args, if (SanArgs.needsStatsRt() && SanArgs.linkRuntimes()) StaticRuntimes.push_back("stats_client"); - // Always link the static runtime regardless of DSO or executable. - if (SanArgs.needsAsanRt()) { - HelperStaticRuntimes.push_back("asan_static"); - } - // Collect static runtimes. if (Args.hasArg(options::OPT_shared)) { // Don't link static runtimes into DSOs. diff --git a/clang/test/Driver/sanitizer-ld.c b/clang/test/Driver/sanitizer-ld.c index ea8c49f2384a..d62e19fd4021 100644 --- a/clang/test/Driver/sanitizer-ld.c +++ b/clang/test/Driver/sanitizer-ld.c @@ -22,7 +22,7 @@ // RUN: --sysroot=%S/Inputs/basic_linux_tree \ // RUN: | FileCheck --check-prefix=CHECK-ASAN-NO-LINK-RUNTIME-LINUX %s // -// CHECK-ASAN-NO-LINK-RUNTIME-LINUX-NOT: libclang_rt.asan-x86_64 +// CHECK-ASAN-NO-LINK-RUNTIME-LINUX-NOT: libclang_rt.asan // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ // RUN: -target i386-unknown-linux -fuse-ld=ld -fsanitize=address -shared-libsan \ diff --git a/compiler-rt/lib/asan/CMakeLists.txt b/compiler-rt/lib/asan/CMakeLists.txt index b79b7278f6db..2e63c8d05168 100644 --- a/compiler-rt/lib/asan/CMakeLists.txt +++ b/compiler-rt/lib/asan/CMakeLists.txt @@ -34,6 +34,7 @@ set(ASAN_SOURCES if (NOT WIN32 AND NOT APPLE) list(APPEND ASAN_SOURCES + asan_rtl_x86_64.S asan_interceptors_vfork.S ) endif() @@ -42,16 +43,6 @@ set(ASAN_CXX_SOURCES asan_new_delete.cpp ) -set(ASAN_STATIC_SOURCES - asan_rtl_static.cpp - ) - -if (NOT WIN32 AND NOT APPLE) - list(APPEND ASAN_STATIC_SOURCES - asan_rtl_x86_64.S - ) -endif() - set(ASAN_PREINIT_SOURCES asan_preinit.cpp ) @@ -144,12 +135,6 @@ if(NOT APPLE) ADDITIONAL_HEADERS ${ASAN_HEADERS} CFLAGS ${ASAN_CFLAGS} DEFS ${ASAN_COMMON_DEFINITIONS}) - add_compiler_rt_object_libraries(RTAsan_static - ARCHS ${ASAN_SUPPORTED_ARCH} - SOURCES ${ASAN_STATIC_SOURCES} - ADDITIONAL_HEADERS ${ASAN_HEADERS} - CFLAGS ${ASAN_CFLAGS} - DEFS ${ASAN_COMMON_DEFINITIONS}) add_compiler_rt_object_libraries(RTAsan_preinit ARCHS ${ASAN_SUPPORTED_ARCH} SOURCES ${ASAN_PREINIT_SOURCES} @@ -191,14 +176,6 @@ if(APPLE) LINK_FLAGS ${WEAK_SYMBOL_LINK_FLAGS} DEFS ${ASAN_DYNAMIC_DEFINITIONS} PARENT_TARGET asan) - - add_compiler_rt_runtime(clang_rt.asan_static - STATIC - ARCHS ${ASAN_SUPPORTED_ARCH} - OBJECT_LIBS RTAsan_static - CFLAGS ${ASAN_CFLAGS} - DEFS ${ASAN_COMMON_DEFINITIONS} - PARENT_TARGET asan) else() # Build separate libraries for each target. @@ -230,14 +207,6 @@ else() DEFS ${ASAN_COMMON_DEFINITIONS} PARENT_TARGET asan) - add_compiler_rt_runtime(clang_rt.asan_static - STATIC - ARCHS ${ASAN_SUPPORTED_ARCH} - OBJECT_LIBS RTAsan_static - CFLAGS ${ASAN_CFLAGS} - DEFS ${ASAN_COMMON_DEFINITIONS} - PARENT_TARGET asan) - add_compiler_rt_runtime(clang_rt.asan-preinit STATIC ARCHS ${ASAN_SUPPORTED_ARCH} diff --git a/compiler-rt/lib/asan/asan_rtl_static.cpp b/compiler-rt/lib/asan/asan_rtl_static.cpp deleted file mode 100644 index 74e6eb0ddf1c..000000000000 --- a/compiler-rt/lib/asan/asan_rtl_static.cpp +++ /dev/null @@ -1,15 +0,0 @@ -//===-- asan_static_rtl.cpp -----------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file is a part of AddressSanitizer, an address sanity checker. -// -// Main file of the ASan run-time library. -//===----------------------------------------------------------------------===// - -// This file is empty for now. Main reason to have it is workaround for Windows -// build, which complains because no files are part of the asan_static lib. diff --git a/compiler-rt/lib/asan/tests/CMakeLists.txt b/compiler-rt/lib/asan/tests/CMakeLists.txt index 95a324766ae7..cc375acf2dfb 100644 --- a/compiler-rt/lib/asan/tests/CMakeLists.txt +++ b/compiler-rt/lib/asan/tests/CMakeLists.txt @@ -261,7 +261,6 @@ if(COMPILER_RT_CAN_EXECUTE_TESTS AND NOT ANDROID) set(ASAN_TEST_RUNTIME_OBJECTS $ $ - $ $ $ $ @@ -287,7 +286,6 @@ if(ANDROID) # Test w/o ASan instrumentation. Link it with ASan statically. add_executable(AsanNoinstTest # FIXME: .arch? $ - $ $ $ $ From 3b4c040cca9d35cfb3f0590fb15f7e7b4e2fe2c0 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Fri, 24 Dec 2021 11:05:23 +0000 Subject: [PATCH 220/293] [gn build] Port 969a51ff3632 --- llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn index f05168bd57c2..b397621d05d8 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn @@ -199,7 +199,7 @@ source_set("unused") { "asan_win_dynamic_runtime_thunk.cpp", # FIXME: These are in clang_rt.asan_static in the CMake build. - "asan_rtl_static.cpp", + "asan_rtl_x86_64.S", ] } From bb84dd81590bbcaa2277f7f15700ac842af8932e Mon Sep 17 00:00:00 2001 From: Alexandros Lamprineas Date: Fri, 24 Dec 2021 11:01:36 +0000 Subject: [PATCH 221/293] [AArch64] Add a tablegen pattern for RADDHN/RADDHN2. Converts RSHRN/RSHRN2 to RADDHN/RADDHN2 when the shift amount is half the width of the vector element. The latter has twice the throughput and half the latency on Arm out-of-order cores. Setting up the zero register adds no latency. Differential Revision: https://reviews.llvm.org/D116166 --- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 28 +++++++++++ .../CodeGen/AArch64/arm64-raddhn-combine.ll | 48 +++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/arm64-raddhn-combine.ll diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 27013b51d966..ebccc07edc7a 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -6603,6 +6603,34 @@ defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>; defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra", TriOpFrag<(add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >; +// RADDHN patterns for when RSHRN shifts by half the size of the vector element +def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))), + (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>; +def : Pat<(v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))), + (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>; +def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))), + (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>; + +// RADDHN2 patterns for when RSHRN shifts by half the size of the vector element +def : Pat<(v16i8 (concat_vectors + (v8i8 V64:$Vd), + (v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))))), + (RADDHNv8i16_v16i8 + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, + (v8i16 (MOVIv2d_ns (i32 0))))>; +def : Pat<(v8i16 (concat_vectors + (v4i16 V64:$Vd), + (v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))))), + (RADDHNv4i32_v8i16 + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, + (v4i32 (MOVIv2d_ns (i32 0))))>; +def : Pat<(v4i32 (concat_vectors + (v2i32 V64:$Vd), + (v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))))), + (RADDHNv2i64_v4i32 + (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn, + (v2i64 (MOVIv2d_ns (i32 0))))>; + // SHRN patterns for when a logical right shift was used instead of arithmetic // (the immediate guarantees no sign bits actually end up in the result so it // doesn't matter). diff --git a/llvm/test/CodeGen/AArch64/arm64-raddhn-combine.ll b/llvm/test/CodeGen/AArch64/arm64-raddhn-combine.ll new file mode 100644 index 000000000000..a2547782f6b2 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-raddhn-combine.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple aarch64-none-linux-gnu | FileCheck %s + +define <16 x i8> @test_combine_v8i16_to_v16i8(<8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: test_combine_v8i16_to_v16i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: raddhn v0.8b, v0.8h, v2.8h +; CHECK-NEXT: raddhn2 v0.16b, v1.8h, v2.8h +; CHECK-NEXT: ret +entry: + %res = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %x, i32 8) + %res2 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %y, i32 8) + %shuffle = shufflevector <8 x i8> %res, <8 x i8> %res2, <16 x i32> + ret <16 x i8> %shuffle +} + +define <8 x i16> @test_combine_v4i32_to_v8i16(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: test_combine_v4i32_to_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: raddhn v0.4h, v0.4s, v2.4s +; CHECK-NEXT: raddhn2 v0.8h, v1.4s, v2.4s +; CHECK-NEXT: ret +entry: + %res = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %x, i32 16) + %res2 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %y, i32 16) + %shuffle = shufflevector <4 x i16> %res, <4 x i16> %res2, <8 x i32> + ret <8 x i16> %shuffle +} + +define <4 x i32> @test_combine_v2i64_to_v4i32(<2 x i64> %x, <2 x i64> %y) { +; CHECK-LABEL: test_combine_v2i64_to_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: raddhn v0.2s, v0.2d, v2.2d +; CHECK-NEXT: raddhn2 v0.4s, v1.2d, v2.2d +; CHECK-NEXT: ret +entry: + %res = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %x, i32 32) + %res2 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %y, i32 32) + %shuffle = shufflevector <2 x i32> %res, <2 x i32> %res2, <4 x i32> + ret <4 x i32> %shuffle +} + +declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) +declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32) +declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) From c7a589a2c4e2db496d732821a8dba59508326250 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Fri, 24 Dec 2021 08:16:33 -0500 Subject: [PATCH 222/293] [Clang][OpenMP] Add the support for atomic compare in parser This patch adds the support for `atomic compare` in parser. The support in Sema and CodeGen will come soon. For now, it simply eimits an error when it is encountered. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D115561 --- clang/include/clang/AST/OpenMPClause.h | 41 +++++++++++++++++++ clang/include/clang/AST/RecursiveASTVisitor.h | 5 +++ .../clang/Basic/DiagnosticSemaKinds.td | 2 +- clang/include/clang/Sema/Sema.h | 3 ++ clang/lib/AST/OpenMPClause.cpp | 6 +++ clang/lib/AST/StmtProfile.cpp | 2 + clang/lib/Basic/OpenMPKinds.cpp | 2 + clang/lib/CodeGen/CGStmtOpenMP.cpp | 3 ++ clang/lib/Parse/ParseOpenMP.cpp | 1 + clang/lib/Sema/SemaOpenMP.cpp | 39 ++++++++++++++---- clang/lib/Sema/TreeTransform.h | 7 ++++ clang/lib/Serialization/ASTReader.cpp | 5 +++ clang/lib/Serialization/ASTWriter.cpp | 2 + clang/test/OpenMP/atomic_messages.cpp | 29 +++++++++---- clang/tools/libclang/CIndex.cpp | 2 + flang/lib/Semantics/check-omp-structure.cpp | 1 + llvm/include/llvm/Frontend/OpenMP/OMP.td | 2 + 17 files changed, 134 insertions(+), 18 deletions(-) diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 565eb0c9cf99..3fd1b6d30080 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -2224,6 +2224,47 @@ class OMPCaptureClause : public OMPClause { } }; +/// This represents 'compare' clause in the '#pragma omp atomic' +/// directive. +/// +/// \code +/// #pragma omp atomic compare +/// \endcode +/// In this example directive '#pragma omp atomic' has 'compare' clause. +class OMPCompareClause final : public OMPClause { +public: + /// Build 'compare' clause. + /// + /// \param StartLoc Starting location of the clause. + /// \param EndLoc Ending location of the clause. + OMPCompareClause(SourceLocation StartLoc, SourceLocation EndLoc) + : OMPClause(llvm::omp::OMPC_compare, StartLoc, EndLoc) {} + + /// Build an empty clause. + OMPCompareClause() + : OMPClause(llvm::omp::OMPC_compare, SourceLocation(), SourceLocation()) { + } + + child_range children() { + return child_range(child_iterator(), child_iterator()); + } + + const_child_range children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + child_range used_children() { + return child_range(child_iterator(), child_iterator()); + } + const_child_range used_children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + static bool classof(const OMPClause *T) { + return T->getClauseKind() == llvm::omp::OMPC_compare; + } +}; + /// This represents 'seq_cst' clause in the '#pragma omp atomic' /// directive. /// diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 8bcee8790e7b..f62dc36de556 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3234,6 +3234,11 @@ bool RecursiveASTVisitor::VisitOMPCaptureClause(OMPCaptureClause *) { return true; } +template +bool RecursiveASTVisitor::VisitOMPCompareClause(OMPCompareClause *) { + return true; +} + template bool RecursiveASTVisitor::VisitOMPSeqCstClause(OMPSeqCstClause *) { return true; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 3b6341d2232d..f2089bfda04d 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -10509,7 +10509,7 @@ def err_omp_atomic_capture_not_compound_statement : Error< def note_omp_atomic_capture: Note< "%select{expected assignment expression|expected compound statement|expected exactly two expression statements|expected in right hand side of the first expression}0">; def err_omp_atomic_several_clauses : Error< - "directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause">; + "directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause">; def err_omp_several_mem_order_clauses : Error< "directive '#pragma omp %0' cannot contain more than one %select{'seq_cst', 'relaxed', |}1'acq_rel', 'acquire' or 'release' clause">; def err_omp_atomic_incompatible_mem_order_clause : Error< diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 55171767da10..79834554a50d 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11190,6 +11190,9 @@ class Sema final { /// Called on well-formed 'capture' clause. OMPClause *ActOnOpenMPCaptureClause(SourceLocation StartLoc, SourceLocation EndLoc); + /// Called on well-formed 'compare' clause. + OMPClause *ActOnOpenMPCompareClause(SourceLocation StartLoc, + SourceLocation EndLoc); /// Called on well-formed 'seq_cst' clause. OMPClause *ActOnOpenMPSeqCstClause(SourceLocation StartLoc, SourceLocation EndLoc); diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 56e140f10710..1bd049b88005 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -126,6 +126,7 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) { case OMPC_write: case OMPC_update: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -217,6 +218,7 @@ const OMPClauseWithPostUpdate *OMPClauseWithPostUpdate::get(const OMPClause *C) case OMPC_write: case OMPC_update: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -1792,6 +1794,10 @@ void OMPClausePrinter::VisitOMPCaptureClause(OMPCaptureClause *) { OS << "capture"; } +void OMPClausePrinter::VisitOMPCompareClause(OMPCompareClause *) { + OS << "compare"; +} + void OMPClausePrinter::VisitOMPSeqCstClause(OMPSeqCstClause *) { OS << "seq_cst"; } diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 4339c249e027..09853e0f0e49 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -551,6 +551,8 @@ void OMPClauseProfiler::VisitOMPUpdateClause(const OMPUpdateClause *) {} void OMPClauseProfiler::VisitOMPCaptureClause(const OMPCaptureClause *) {} +void OMPClauseProfiler::VisitOMPCompareClause(const OMPCompareClause *) {} + void OMPClauseProfiler::VisitOMPSeqCstClause(const OMPSeqCstClause *) {} void OMPClauseProfiler::VisitOMPAcqRelClause(const OMPAcqRelClause *) {} diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 9e74e05bd863..1761c6d3d89b 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -163,6 +163,7 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind, StringRef Str, case OMPC_read: case OMPC_write: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -428,6 +429,7 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind, case OMPC_read: case OMPC_write: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index ba0ced7d8f97..564c3a591f16 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -5967,6 +5967,9 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE, IsXLHSInRHSPart, Loc); break; + case OMPC_compare: + // Do nothing here as we already emit an error. + break; case OMPC_if: case OMPC_final: case OMPC_num_threads: diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 613ad742c93f..300b022d83b9 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3192,6 +3192,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, case OMPC_read: case OMPC_write: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index a5962ec3b1d9..ba0481874577 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -6354,6 +6354,7 @@ StmtResult Sema::ActOnOpenMPExecutableDirective( case OMPC_write: case OMPC_update: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -10939,7 +10940,8 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, case OMPC_read: case OMPC_write: case OMPC_update: - case OMPC_capture: { + case OMPC_capture: + case OMPC_compare: { if (AtomicKind != OMPC_unknown) { Diag(C->getBeginLoc(), diag::err_omp_atomic_several_clauses) << SourceRange(C->getBeginLoc(), C->getEndLoc()); @@ -11383,15 +11385,21 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef Clauses, SourceRange(Body->getBeginLoc(), Body->getBeginLoc()); ErrorFound = NotACompoundStatement; } - if (ErrorFound != NoError) { - Diag(ErrorLoc, diag::err_omp_atomic_capture_not_compound_statement) - << ErrorRange; - Diag(NoteLoc, diag::note_omp_atomic_capture) << ErrorFound << NoteRange; - return StmtError(); - } - if (CurContext->isDependentContext()) - UE = V = E = X = nullptr; } + if (ErrorFound != NoError) { + Diag(ErrorLoc, diag::err_omp_atomic_capture_not_compound_statement) + << ErrorRange; + Diag(NoteLoc, diag::note_omp_atomic_capture) << ErrorFound << NoteRange; + return StmtError(); + } + if (CurContext->isDependentContext()) + UE = V = E = X = nullptr; + } else if (AtomicKind == OMPC_compare) { + // TODO: For now we emit an error here and in emitOMPAtomicExpr we ignore + // code gen. + unsigned DiagID = Diags.getCustomDiagID( + DiagnosticsEngine::Error, "atomic compare is not supported for now"); + Diag(AtomicKindLoc, DiagID); } setFunctionHasBranchProtectedScope(); @@ -13472,6 +13480,7 @@ OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, case OMPC_write: case OMPC_update: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -14303,6 +14312,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( case OMPC_write: case OMPC_update: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -14764,6 +14774,7 @@ OMPClause *Sema::ActOnOpenMPSimpleClause( case OMPC_read: case OMPC_write: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -15069,6 +15080,7 @@ OMPClause *Sema::ActOnOpenMPSingleExprWithArgClause( case OMPC_write: case OMPC_update: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: @@ -15257,6 +15269,9 @@ OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind, case OMPC_capture: Res = ActOnOpenMPCaptureClause(StartLoc, EndLoc); break; + case OMPC_compare: + Res = ActOnOpenMPCompareClause(StartLoc, EndLoc); + break; case OMPC_seq_cst: Res = ActOnOpenMPSeqCstClause(StartLoc, EndLoc); break; @@ -15403,6 +15418,11 @@ OMPClause *Sema::ActOnOpenMPCaptureClause(SourceLocation StartLoc, return new (Context) OMPCaptureClause(StartLoc, EndLoc); } +OMPClause *Sema::ActOnOpenMPCompareClause(SourceLocation StartLoc, + SourceLocation EndLoc) { + return new (Context) OMPCompareClause(StartLoc, EndLoc); +} + OMPClause *Sema::ActOnOpenMPSeqCstClause(SourceLocation StartLoc, SourceLocation EndLoc) { return new (Context) OMPSeqCstClause(StartLoc, EndLoc); @@ -15871,6 +15891,7 @@ OMPClause *Sema::ActOnOpenMPVarListClause( case OMPC_write: case OMPC_update: case OMPC_capture: + case OMPC_compare: case OMPC_seq_cst: case OMPC_acq_rel: case OMPC_acquire: diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 39b659753dfa..298a3f7a83d8 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -9460,6 +9460,13 @@ TreeTransform::TransformOMPCaptureClause(OMPCaptureClause *C) { return C; } +template +OMPClause * +TreeTransform::TransformOMPCompareClause(OMPCompareClause *C) { + // No need to rebuild this clause, no template-dependent parameters. + return C; +} + template OMPClause * TreeTransform::TransformOMPSeqCstClause(OMPSeqCstClause *C) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index d16697b48ca9..f93e0d2ed1c4 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11765,6 +11765,9 @@ OMPClause *OMPClauseReader::readClause() { case llvm::omp::OMPC_capture: C = new (Context) OMPCaptureClause(); break; + case llvm::omp::OMPC_compare: + C = new (Context) OMPCompareClause(); + break; case llvm::omp::OMPC_seq_cst: C = new (Context) OMPSeqCstClause(); break; @@ -12123,6 +12126,8 @@ void OMPClauseReader::VisitOMPUpdateClause(OMPUpdateClause *C) { void OMPClauseReader::VisitOMPCaptureClause(OMPCaptureClause *) {} +void OMPClauseReader::VisitOMPCompareClause(OMPCompareClause *) {} + void OMPClauseReader::VisitOMPSeqCstClause(OMPSeqCstClause *) {} void OMPClauseReader::VisitOMPAcqRelClause(OMPAcqRelClause *) {} diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 3e4153b3b612..65a780e67510 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -6252,6 +6252,8 @@ void OMPClauseWriter::VisitOMPUpdateClause(OMPUpdateClause *C) { void OMPClauseWriter::VisitOMPCaptureClause(OMPCaptureClause *) {} +void OMPClauseWriter::VisitOMPCompareClause(OMPCompareClause *) {} + void OMPClauseWriter::VisitOMPSeqCstClause(OMPSeqCstClause *) {} void OMPClauseWriter::VisitOMPAcqRelClause(OMPAcqRelClause *) {} diff --git a/clang/test/OpenMP/atomic_messages.cpp b/clang/test/OpenMP/atomic_messages.cpp index 4a8ca7ba82ce..608847f87dad 100644 --- a/clang/test/OpenMP/atomic_messages.cpp +++ b/clang/test/OpenMP/atomic_messages.cpp @@ -1,8 +1,10 @@ // RUN: %clang_cc1 -verify=expected,omp45 -fopenmp -fopenmp-version=45 -ferror-limit 150 %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,omp50 -fopenmp -ferror-limit 150 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50,omp51 -fopenmp -fopenmp-version=51 -ferror-limit 150 %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-simd -fopenmp-version=45 -ferror-limit 150 %s -Wuninitialized // RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-simd -ferror-limit 150 %s -Wuninitialized +// RUN: %clang_cc1 -verify=expected,omp50,omp51 -fopenmp-simd -fopenmp-version=51 -ferror-limit 150 %s -Wuninitialized int foo() { L1: @@ -896,19 +898,19 @@ int relaxed() { template T mixed() { T a, b = T(); -// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 2 {{'read' clause used here}} #pragma omp atomic read write a = b; -// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 2 {{'write' clause used here}} #pragma omp atomic write read a = b; -// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 2 {{'update' clause used here}} #pragma omp atomic update read a += b; -// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 2 {{'capture' clause used here}} #pragma omp atomic capture read a = ++b; @@ -917,19 +919,19 @@ T mixed() { int mixed() { int a, b = 0; -// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 {{'read' clause used here}} #pragma omp atomic read write a = b; -// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 {{'write' clause used here}} #pragma omp atomic write read a = b; -// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 {{'write' clause used here}} #pragma omp atomic write update a = b; -// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update' or 'capture' clause}} +// expected-error@+2 {{directive '#pragma omp atomic' cannot contain more than one 'read', 'write', 'update', 'capture', or 'compare' clause}} // expected-note@+1 {{'write' clause used here}} #pragma omp atomic write capture a = b; @@ -937,3 +939,14 @@ int mixed() { return mixed(); } +#if _OPENMP >= 202011 +int compare() { + int a, b, c; +// omp51-error@+1 {{atomic compare is not supported for now}} +#pragma omp atomic compare + { + if (a == b) + a = c; + } +} +#endif diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index c59c5709b6fa..4722bece7a1d 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2275,6 +2275,8 @@ void OMPClauseEnqueue::VisitOMPUpdateClause(const OMPUpdateClause *) {} void OMPClauseEnqueue::VisitOMPCaptureClause(const OMPCaptureClause *) {} +void OMPClauseEnqueue::VisitOMPCompareClause(const OMPCompareClause *) {} + void OMPClauseEnqueue::VisitOMPSeqCstClause(const OMPSeqCstClause *) {} void OMPClauseEnqueue::VisitOMPAcqRelClause(const OMPAcqRelClause *) {} diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 2698cafca8eb..16efa1edf8f9 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -1482,6 +1482,7 @@ CHECK_SIMPLE_CLAUSE(AppendArgs, OMPC_append_args) CHECK_SIMPLE_CLAUSE(MemoryOrder, OMPC_memory_order) CHECK_SIMPLE_CLAUSE(Bind, OMPC_bind) CHECK_SIMPLE_CLAUSE(Align, OMPC_align) +CHECK_SIMPLE_CLAUSE(Compare, OMPC_compare) CHECK_REQ_SCALAR_INT_CLAUSE(Grainsize, OMPC_grainsize) CHECK_REQ_SCALAR_INT_CLAUSE(NumTasks, OMPC_num_tasks) diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index eab8d03f7316..18d577dff497 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -180,6 +180,7 @@ def OMPC_Read : Clause<"read"> { let clangClass = "OMPReadClause"; } def OMPC_Write : Clause<"write"> { let clangClass = "OMPWriteClause"; } def OMPC_Update : Clause<"update"> { let clangClass = "OMPUpdateClause"; } def OMPC_Capture : Clause<"capture"> { let clangClass = "OMPCaptureClause"; } +def OMPC_Compare : Clause<"compare"> { let clangClass = "OMPCompareClause"; } def OMPC_SeqCst : Clause<"seq_cst"> { let clangClass = "OMPSeqCstClause"; } def OMPC_AcqRel : Clause<"acq_rel"> { let clangClass = "OMPAcqRelClause"; } def OMPC_Acquire : Clause<"acquire"> { let clangClass = "OMPAcquireClause"; } @@ -536,6 +537,7 @@ def OMP_Atomic : Directive<"atomic"> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause ]; let allowedOnceClauses = [ VersionedClause, From 159da567378ab5a4bf9b62162d16caccf3db16f9 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 23 Dec 2021 18:53:06 +0000 Subject: [PATCH 223/293] [X86] Enable v32i16 ISD::ROTL/ROTR lowering on AVX512BW targets --- llvm/lib/Target/X86/X86ISelLowering.cpp | 25 +++---- llvm/test/CodeGen/X86/vector-fshl-rot-512.ll | 52 +++++++-------- llvm/test/CodeGen/X86/vector-fshr-rot-512.ll | 68 +++++++++----------- llvm/test/CodeGen/X86/vector-rotate-512.ll | 28 ++++---- 4 files changed, 76 insertions(+), 97 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 34ea8901fb3a..6f6361b6757b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1654,6 +1654,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SRL, VT, Custom); setOperationAction(ISD::SHL, VT, Custom); setOperationAction(ISD::SRA, VT, Custom); + setOperationAction(ISD::ROTL, VT, Custom); + setOperationAction(ISD::ROTR, VT, Custom); setOperationAction(ISD::SETCC, VT, Custom); // The condition codes aren't legal in SSE/AVX and under AVX512 we use @@ -1668,21 +1670,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UMIN, VT, Legal); setOperationAction(ISD::ABS, VT, Legal); setOperationAction(ISD::CTPOP, VT, Custom); - setOperationAction(ISD::ROTL, VT, Custom); - setOperationAction(ISD::ROTR, VT, Custom); setOperationAction(ISD::STRICT_FSETCC, VT, Custom); setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); } - // With BWI, expanding (and promoting the shifts) is the better. - if (!Subtarget.useBWIRegs()) { - setOperationAction(ISD::ROTL, MVT::v32i16, Custom); - setOperationAction(ISD::ROTR, MVT::v32i16, Custom); - } - - setOperationAction(ISD::ROTL, MVT::v64i8, Custom); - setOperationAction(ISD::ROTR, MVT::v64i8, Custom); - for (auto VT : { MVT::v64i8, MVT::v32i16 }) { setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom); setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom); @@ -29894,12 +29885,12 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, if (VT.is512BitVector() && !Subtarget.useBWIRegs()) return splitVectorIntBinary(Op, DAG); - assert((VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 || - ((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8) && - Subtarget.hasAVX2()) || - (VT == MVT::v32i16 && !Subtarget.useBWIRegs()) || - (VT == MVT::v64i8 && Subtarget.useBWIRegs())) && - "Only vXi32/vXi16/vXi8 vector rotates supported"); + assert( + (VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 || + ((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8) && + Subtarget.hasAVX2()) || + ((VT == MVT::v32i16 || VT == MVT::v64i8) && Subtarget.useBWIRegs())) && + "Only vXi32/vXi16/vXi8 vector rotates supported"); MVT ExtSVT = MVT::getIntegerVT(2 * EltSizeInBits); MVT ExtVT = MVT::getVectorVT(ExtSVT, NumElts / 2); diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll index df579191c87f..0e8cceb4db3f 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-512.ll @@ -90,26 +90,22 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind { ; ; AVX512BW-LABEL: var_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm3 -; AVX512BW-NEXT: vpsllvw %zmm3, %zmm0, %zmm3 -; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512BW-NEXT: vpsubw %zmm1, %zmm4, %zmm1 -; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512BW-NEXT: vpsubw %zmm1, %zmm3, %zmm1 ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: var_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm3 -; AVX512VLBW-NEXT: vpsllvw %zmm3, %zmm0, %zmm3 -; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm4, %zmm1 -; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm1 +; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 +; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm3, %zmm1 ; AVX512VLBW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: var_funnnel_v32i16: @@ -334,30 +330,26 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw ; ; AVX512BW-LABEL: splatvar_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero -; AVX512BW-NEXT: vpsllw %xmm3, %zmm0, %zmm3 -; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512BW-NEXT: vpsubw %xmm1, %xmm4, %xmm1 -; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] -; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm0, %zmm3 -; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm4, %xmm1 -; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v32i16: diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll index bd458426f114..d7ace82e7f08 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-512.ll @@ -90,26 +90,22 @@ define <32 x i16> @var_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounwind { ; ; AVX512BW-LABEL: var_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm3 -; AVX512BW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3 -; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512BW-NEXT: vpsubw %zmm1, %zmm4, %zmm1 -; AVX512BW-NEXT: vpandq %zmm2, %zmm1, %zmm1 +; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512BW-NEXT: vpsubw %zmm1, %zmm3, %zmm1 ; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: var_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm3 -; AVX512VLBW-NEXT: vpsrlvw %zmm3, %zmm0, %zmm3 -; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm4, %zmm1 -; AVX512VLBW-NEXT: vpandq %zmm2, %zmm1, %zmm1 +; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512VLBW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 +; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm3, %zmm1 ; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: var_funnnel_v32i16: @@ -336,30 +332,26 @@ define <32 x i16> @splatvar_funnnel_v32i16(<32 x i16> %x, <32 x i16> %amt) nounw ; ; AVX512BW-LABEL: splatvar_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero -; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3 -; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512BW-NEXT: vpsubw %xmm1, %xmm4, %xmm1 -; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vpsrlw %xmm2, %zmm0, %zmm2 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15] -; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 -; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm3 -; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm4, %xmm1 -; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsrlw %xmm2, %zmm0, %zmm2 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero ; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: splatvar_funnnel_v32i16: @@ -536,15 +528,15 @@ define <32 x i16> @constant_funnnel_v32i16(<32 x i16> %x) nounwind { ; ; AVX512BW-LABEL: constant_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 -; AVX512BW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 +; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: constant_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 -; AVX512VLBW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 +; AVX512VLBW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq ; @@ -706,15 +698,15 @@ define <32 x i16> @splatconstant_funnnel_v32i16(<32 x i16> %x) nounwind { ; ; AVX512BW-LABEL: splatconstant_funnnel_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsllw $9, %zmm0, %zmm1 -; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm1 +; AVX512BW-NEXT: vpsllw $9, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_funnnel_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsllw $9, %zmm0, %zmm1 -; AVX512VLBW-NEXT: vpsrlw $7, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpsrlw $7, %zmm0, %zmm1 +; AVX512VLBW-NEXT: vpsllw $9, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-rotate-512.ll b/llvm/test/CodeGen/X86/vector-rotate-512.ll index a54988c0870d..4427d3b2c79f 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-512.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-512.ll @@ -91,20 +91,22 @@ define <32 x i16> @var_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind { ; ; AVX512BW-LABEL: var_rotate_v32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512BW-NEXT: vpsubw %zmm1, %zmm2, %zmm2 -; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm1 -; AVX512BW-NEXT: vpsrlvw %zmm2, %zmm0, %zmm0 -; AVX512BW-NEXT: vporq %zmm0, %zmm1, %zmm0 +; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512BW-NEXT: vpsubw %zmm1, %zmm3, %zmm1 +; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: var_rotate_v32i16: ; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm2, %zmm2 -; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm1 -; AVX512VLBW-NEXT: vpsrlvw %zmm2, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vporq %zmm0, %zmm1, %zmm0 +; AVX512VLBW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512VLBW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 +; AVX512VLBW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] +; AVX512VLBW-NEXT: vpsubw %zmm1, %zmm3, %zmm1 +; AVX512VLBW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq ; ; AVX512VBMI2-LABEL: var_rotate_v32i16: @@ -341,22 +343,24 @@ define <32 x i16> @splatvar_rotate_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind ; ; AVX512BW-LABEL: splatvar_rotate_v32i16: ; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512BW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_rotate_v32i16: ; AVX512VLBW: # %bb.0: +; AVX512VLBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero +; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16] ; AVX512VLBW-NEXT: vpsubw %xmm1, %xmm3, %xmm1 ; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero -; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm2 ; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq From 286237962ad35bc4756947c32310838ad4c76451 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 16 Dec 2021 13:47:57 -0500 Subject: [PATCH 224/293] InstCombine: Gracefully handle more allocas in the wrong address space Officially this is currently required to always use the datalayout's alloca address space. This may change in the future, and it's cleaner to propagate the existing alloca's addrspace anyway. This is a triple fix. Initially the change in simplifyAllocaArraySize would drop the address space, but produce output. Fixing this hit an assertion in the cast combine. This patch also makes the changes to handle this situation from a33e12801279a947c74fdee2655b24480941fb39 dead, so eliminate it. InstCombine should not take it upon itself to introduce addrspacecasts, and preserve the original address space instead. --- llvm/include/llvm/IR/Instructions.h | 5 ++++ .../InstCombine/InstCombineCasts.cpp | 2 +- .../InstCombineLoadStoreAlloca.cpp | 17 ++++-------- .../InstCombine/alloca-in-non-alloca-as.ll | 27 +++++++++++++++---- 4 files changed, 33 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index 046e9b5e809e..ccf17628e265 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -105,6 +105,11 @@ class AllocaInst : public UnaryInstruction { return cast(Instruction::getType()); } + /// Return the address space for the allocation. + unsigned getAddressSpace() const { + return getType()->getAddressSpace(); + } + /// Get allocation size in bits. Returns None if size can't be determined, /// e.g. in case of a VLA. Optional getAllocationSizeInBits(const DataLayout &DL) const; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index 18eb245779bf..8df4a4529f47 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -157,7 +157,7 @@ Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI, Amt = Builder.CreateAdd(Amt, Off); } - AllocaInst *New = Builder.CreateAlloca(CastElTy, Amt); + AllocaInst *New = Builder.CreateAlloca(CastElTy, AI.getAddressSpace(), Amt); New->setAlignment(AI.getAlign()); New->takeName(&AI); New->setUsedWithInAlloca(AI.isUsedWithInAlloca()); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 286a947fc603..0dbfdba353c4 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -183,7 +183,8 @@ static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC, if (const ConstantInt *C = dyn_cast(AI.getArraySize())) { if (C->getValue().getActiveBits() <= 64) { Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); - AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName()); + AllocaInst *New = IC.Builder.CreateAlloca(NewTy, AI.getAddressSpace(), + nullptr, AI.getName()); New->setAlignment(AI.getAlign()); // Scan to the end of the allocation instructions, to skip over a block of @@ -199,21 +200,13 @@ static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC, Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType()); Value *NullIdx = Constant::getNullValue(IdxTy); Value *Idx[2] = {NullIdx, NullIdx}; - Instruction *NewI = GetElementPtrInst::CreateInBounds( + Instruction *GEP = GetElementPtrInst::CreateInBounds( NewTy, New, Idx, New->getName() + ".sub"); - IC.InsertNewInstBefore(NewI, *It); - - // Gracefully handle allocas in other address spaces. - if (AI.getType()->getPointerAddressSpace() != - NewI->getType()->getPointerAddressSpace()) { - NewI = - CastInst::CreatePointerBitCastOrAddrSpaceCast(NewI, AI.getType()); - IC.InsertNewInstBefore(NewI, *It); - } + IC.InsertNewInstBefore(GEP, *It); // Now make everything use the getelementptr instead of the original // allocation. - return IC.replaceInstUsesWith(AI, NewI); + return IC.replaceInstUsesWith(AI, GEP); } } diff --git a/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll b/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll index c1c45b8e5ab3..c8987e11a723 100644 --- a/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll +++ b/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll @@ -11,11 +11,9 @@ declare void @use(i8*, i32**) define weak amdgpu_kernel void @__omp_offloading_802_ea0109_main_l8(i32* %a) { ; CHECK-LABEL: @__omp_offloading_802_ea0109_main_l8( ; CHECK-NEXT: .master: -; CHECK-NEXT: [[TMP0:%.*]] = alloca i32*, align 1, addrspace(5) -; CHECK-NEXT: [[DOTSUB:%.*]] = bitcast i32* addrspace(5)* [[TMP0]] to i8 addrspace(5)* -; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast i8 addrspace(5)* [[DOTSUB]] to i8* -; CHECK-NEXT: [[A_ON_STACK:%.*]] = addrspacecast i32* addrspace(5)* [[TMP0]] to i32** -; CHECK-NEXT: call void @use(i8* [[TMP1]], i32** [[A_ON_STACK]]) +; CHECK-NEXT: [[TMP0:%.*]] = alloca i32*, align 1 +; CHECK-NEXT: [[DOTSUB:%.*]] = bitcast i32** [[TMP0]] to i8* +; CHECK-NEXT: call void @use(i8* [[DOTSUB]], i32** [[TMP0]]) ; CHECK-NEXT: ret void ; .master: @@ -25,3 +23,22 @@ define weak amdgpu_kernel void @__omp_offloading_802_ea0109_main_l8(i32* %a) { call void @use(i8* %0, i32** %a_on_stack) ret void } + +%struct.widget = type { [8 x i8] } + +define void @spam(i64* %arg1) { +; CHECK-LABEL: @spam( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[ALLOCA1:%.*]] = alloca [0 x [30 x %struct.widget]], align 16 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [0 x [30 x %struct.widget]], [0 x [30 x %struct.widget]]* [[ALLOCA1]], i64 0, i64 0, i64 0 +; CHECK-NEXT: call void @zot(%struct.widget* [[GEP]]) +; CHECK-NEXT: ret void +; +bb: + %alloca = alloca [30 x %struct.widget], i32 0, align 16 + %gep = getelementptr inbounds [30 x %struct.widget], [30 x %struct.widget]* %alloca, i64 0, i64 0 + call void @zot(%struct.widget* %gep) + ret void +} + +declare hidden void @zot(%struct.widget*) From 8020458c5dc2be841c07d26ff75b5471314e6631 Mon Sep 17 00:00:00 2001 From: alex-t Date: Fri, 24 Dec 2021 01:01:07 +0300 Subject: [PATCH 225/293] [AMDGPU] Changing S_AND_B32 to V_AND_B32_e64 in the divergent 'trunc' to i1 pattern In 'trunc' i16/32/64 to i1 pattern the 'and $src, 1' node supply operand to 'setcc'. The latter is selected to S_CMP_EQ/V_CMP_EQ dependent on the divergence. In case the 'and' is scalar and 'setcc' is divergent, we need VGPR to SGPR copy to adjust input operand for V_CMP_EQ. This patch changes the S_AND_B32 to V_AND_B32_e64 in the 'trunc to i1' divergent patterns. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D116241 --- llvm/lib/Target/AMDGPU/SIInstructions.td | 12 ++-- llvm/lib/Target/AMDGPU/VOPInstructions.td | 12 ++++ .../AMDGPU/divergence-driven-trunc-to-i1.ll | 59 +++++++++++++++++++ 3 files changed, 77 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 95744b6390c5..636337ede000 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2212,18 +2212,18 @@ def : GCNPat < >; def : GCNPat < - (i1 (trunc i32:$a)), - (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1)) + (i1 (DivergentUnaryFrag i32:$a)), + (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1)) >; def : GCNPat < - (i1 (trunc i16:$a)), - (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1)) + (i1 (DivergentUnaryFrag i16:$a)), + (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1)) >; def : GCNPat < - (i1 (trunc i64:$a)), - (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), + (i1 (DivergentUnaryFrag i64:$a)), + (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), (i32 (EXTRACT_SUBREG $a, sub0))), (i32 1)) >; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index a3eccf13cd71..a8368892c565 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -794,6 +794,18 @@ class VOPPatGen { list ret = [!con(Outs, (set Ins))]; } +class DivergentUnaryFrag : PatFrag < + (ops node:$src0), + (Op $src0), + [{ return N->isDivergent(); }]> { + // This check is unnecessary as it's captured by the result register + // bank constraint. + // + // FIXME: Should add a way for the emitter to recognize this is a + // trivially true predicate to eliminate the check. + let GISelPredicateCode = [{return true;}]; +} + class VOPPatOrNull { list ret = !if(!ne(P.NeedPatGen,PatGenMode.NoPattern), VOPPatGen.ret, []); } diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll new file mode 100644 index 000000000000..4429ee6f3ba6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll @@ -0,0 +1,59 @@ +; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: name: uniform_trunc_i16_to_i1 +; GCN: S_AND_B32 1 +; GCN: S_CMP_EQ_U32 +define amdgpu_kernel void @uniform_trunc_i16_to_i1(i1 addrspace(1)* %out, i16 %x, i1 %z) { + %setcc = icmp slt i16 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + store i1 %select, i1 addrspace(1)* %out + ret void +} + +; GCN-LABEL: name: divergent_trunc_i16_to_i1 +; GCN: V_AND_B32_e64 1 +; GCN: V_CMP_EQ_U32_e64 +define i1 @divergent_trunc_i16_to_i1(i1 addrspace(1)* %out, i16 %x, i1 %z) { + %setcc = icmp slt i16 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + ret i1 %select +} + +; GCN-LABEL: name: uniform_trunc_i32_to_i1 +; GCN: S_AND_B32 1 +; GCN: S_CMP_EQ_U32 +define amdgpu_kernel void @uniform_trunc_i32_to_i1(i1 addrspace(1)* %out, i32 %x, i1 %z) { + %setcc = icmp slt i32 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + store i1 %select, i1 addrspace(1)* %out + ret void +} + +; GCN-LABEL: name: divergent_trunc_i32_to_i1 +; GCN: V_AND_B32_e64 1 +; GCN: V_CMP_EQ_U32_e64 +define i1 @divergent_trunc_i32_to_i1(i1 addrspace(1)* %out, i32 %x, i1 %z) { + %setcc = icmp slt i32 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + ret i1 %select +} + +; GCN-LABEL: name: uniform_trunc_i64_to_i1 +; GCN: S_AND_B32 1 +; GCN: S_CMP_EQ_U32 +define amdgpu_kernel void @uniform_trunc_i64_to_i1(i1 addrspace(1)* %out, i64 %x, i1 %z) { + %setcc = icmp slt i64 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + store i1 %select, i1 addrspace(1)* %out + ret void +} + +; GCN-LABEL: name: divergent_trunc_i64_to_i1 +; GCN: V_AND_B32_e64 1 +; GCN: V_CMP_EQ_U32_e64 +define i1 @divergent_trunc_i64_to_i1(i1 addrspace(1)* %out, i64 %x, i1 %z) { + %setcc = icmp slt i64 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + ret i1 %select +} + From d5dc3964a7417a34fe581d93ff9642923f8a634d Mon Sep 17 00:00:00 2001 From: Alexey Zhikhartsev Date: Wed, 15 Dec 2021 12:14:13 -0500 Subject: [PATCH 226/293] [DFAJumpThreading] Determinator BB should precede switch-defining BB Otherwise, it is possible that the state defined in the determinator block defines the state for the next iteration of the loop, rather than for the current one. Fixes llvm-test-suite's SingleSource/Regression/C/gcc-c-torture/execute/pr80421.c Differential Revision: https://reviews.llvm.org/D115832 --- .../Transforms/Scalar/DFAJumpThreading.cpp | 58 ++++++++++++++++++- .../dfa-jump-threading-analysis.ll | 14 +++-- 2 files changed, 67 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp index 8c4523206070..dda1a2f08076 100644 --- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp @@ -588,7 +588,7 @@ struct AllSwitchPaths { PrevBB = BB; } - if (TPath.isExitValueSet()) + if (TPath.isExitValueSet() && isSupported(TPath)) TPaths.push_back(TPath); } } @@ -683,6 +683,62 @@ struct AllSwitchPaths { return Res; } + /// The determinator BB should precede the switch-defining BB. + /// + /// Otherwise, it is possible that the state defined in the determinator block + /// defines the state for the next iteration of the loop, rather than for the + /// current one. + /// + /// Currently supported paths: + /// \code + /// < switch bb1 determ def > [ 42, determ ] + /// < switch_and_def bb1 determ > [ 42, determ ] + /// < switch_and_def_and_determ bb1 > [ 42, switch_and_def_and_determ ] + /// \endcode + /// + /// Unsupported paths: + /// \code + /// < switch bb1 def determ > [ 43, determ ] + /// < switch_and_determ bb1 def > [ 43, switch_and_determ ] + /// \endcode + bool isSupported(const ThreadingPath &TPath) { + Instruction *SwitchCondI = dyn_cast(Switch->getCondition()); + assert(SwitchCondI); + if (!SwitchCondI) + return false; + + const BasicBlock *SwitchCondDefBB = SwitchCondI->getParent(); + const BasicBlock *SwitchCondUseBB = Switch->getParent(); + const BasicBlock *DeterminatorBB = TPath.getDeterminatorBB(); + + assert( + SwitchCondUseBB == TPath.getPath().front() && + "The first BB in a threading path should have the switch instruction"); + if (SwitchCondUseBB != TPath.getPath().front()) + return false; + + // Make DeterminatorBB the first element in Path. + PathType Path = TPath.getPath(); + auto ItDet = std::find(Path.begin(), Path.end(), DeterminatorBB); + std::rotate(Path.begin(), ItDet, Path.end()); + + bool IsDetBBSeen = false; + bool IsDefBBSeen = false; + bool IsUseBBSeen = false; + for (BasicBlock *BB : Path) { + if (BB == DeterminatorBB) + IsDetBBSeen = true; + if (BB == SwitchCondDefBB) + IsDefBBSeen = true; + if (BB == SwitchCondUseBB) + IsUseBBSeen = true; + if (IsDetBBSeen && IsUseBBSeen && !IsDefBBSeen) + return false; + } + + return true; + } + SwitchInst *Switch; BasicBlock *SwitchBlock; OptimizationRemarkEmitter *ORE; diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll index 7dfa1fcc3a77..ccc9b38759c4 100644 --- a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll +++ b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll @@ -109,10 +109,16 @@ exit: declare void @baz() -; Verify that having the switch block as a determinator is handled correctly. -define i32 @main() { -; CHECK: < bb43 bb59 bb3 bb31 bb41 > [ 77, bb43 ] -; CHECK-NEXT: < bb43 bb49 bb59 bb3 bb31 bb41 > [ 77, bb43 ] +; Do not jump-thread those paths where the determinator basic block does not +; precede the basic block that defines the switch condition. +; +; Otherwise, it is possible that the state defined in the determinator block +; defines the state for the next iteration of the loop, rather than for the +; current one. +define i32 @wrong_bb_order() { +; CHECK-LABEL: DFA Jump threading: wrong_bb_order +; CHECK-NOT: < bb43 bb59 bb3 bb31 bb41 > [ 77, bb43 ] +; CHECK-NOT: < bb43 bb49 bb59 bb3 bb31 bb41 > [ 77, bb43 ] bb: %i = alloca [420 x i8], align 1 %i2 = getelementptr inbounds [420 x i8], [420 x i8]* %i, i64 0, i64 390 From dc8f9fb196dab8ca31361928bd6a361dc80d8ade Mon Sep 17 00:00:00 2001 From: Anastasia Stulova Date: Fri, 24 Dec 2021 16:21:34 +0000 Subject: [PATCH 227/293] [Docs] Minor fix in clang user manual --- clang/docs/UsersManual.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 3f9947afc29b..26da5a0ff255 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -3537,7 +3537,7 @@ should be built or installed. Please refer to `the following instructions `_ for more details. Clang will expects the ``llvm-spirv`` executable to be present in the ``PATH`` environment variable. Clang uses ``llvm-spirv`` -with `the conformant assembly syntax package +with `the widely adopted assembly syntax package `_. `The versioning From 6c0eaefaf832745f509841afe4dd8a698671b86e Mon Sep 17 00:00:00 2001 From: Groverkss Date: Sat, 25 Dec 2021 00:11:35 +0530 Subject: [PATCH 228/293] [MLIR][FlatAffineConstraints][NFC] Move some static functions to be available to Presburger/ This patch moves some static functions from AffineStructures.cpp to Presburger/Utils.cpp and some to be private members of FlatAffineConstraints (which will later be moved to IntegerPolyhedron) to allow for a smoother transition for moving FlatAffineConstraints math functionality to Presburger/IntegerPolyhedron. This patch is part of a series of patches for moving math functionality to Presburger directory. Reviewed By: arjunp, bondhugula Differential Revision: https://reviews.llvm.org/D115869 --- mlir/include/mlir/Analysis/AffineStructures.h | 10 + mlir/include/mlir/Analysis/Presburger/Utils.h | 40 ++++ mlir/lib/Analysis/AffineStructures.cpp | 177 ++---------------- mlir/lib/Analysis/Presburger/CMakeLists.txt | 1 + mlir/lib/Analysis/Presburger/Utils.cpp | 154 +++++++++++++++ 5 files changed, 223 insertions(+), 159 deletions(-) create mode 100644 mlir/include/mlir/Analysis/Presburger/Utils.h create mode 100644 mlir/lib/Analysis/Presburger/Utils.cpp diff --git a/mlir/include/mlir/Analysis/AffineStructures.h b/mlir/include/mlir/Analysis/AffineStructures.h index 9e7ffb125f7e..089d8a9f1eeb 100644 --- a/mlir/include/mlir/Analysis/AffineStructures.h +++ b/mlir/include/mlir/Analysis/AffineStructures.h @@ -419,6 +419,16 @@ class FlatAffineConstraints : public IntegerPolyhedron { /// Normalized each constraints by the GCD of its coefficients. void normalizeConstraintsByGCD(); + /// Searches for a constraint with a non-zero coefficient at `colIdx` in + /// equality (isEq=true) or inequality (isEq=false) constraints. + /// Returns true and sets row found in search in `rowIdx`, false otherwise. + bool findConstraintWithNonZeroAt(unsigned colIdx, bool isEq, + unsigned *rowIdx) const; + + /// Returns true if the pos^th column is all zero for both inequalities and + /// equalities. + bool isColZero(unsigned pos) const; + /// A parameter that controls detection of an unrealistic number of /// constraints. If the number of constraints is this many times the number of /// variables, we consider such a system out of line with the intended use diff --git a/mlir/include/mlir/Analysis/Presburger/Utils.h b/mlir/include/mlir/Analysis/Presburger/Utils.h new file mode 100644 index 000000000000..6a72471d9697 --- /dev/null +++ b/mlir/include/mlir/Analysis/Presburger/Utils.h @@ -0,0 +1,40 @@ +//===- Utils.h - General utilities for Presburger library ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utility functions required by the Presburger Library. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_ANALYSIS_PRESBURGER_UTILS_H +#define MLIR_ANALYSIS_PRESBURGER_UTILS_H + +#include "mlir/Support/LLVM.h" + +namespace mlir { + +class FlatAffineConstraints; + +namespace presburger_utils { + +/// Check if the pos^th identifier can be expressed as a floordiv of an affine +/// function of other identifiers (where the divisor is a positive constant). +/// `foundRepr` contains a boolean for each identifier indicating if the +/// explicit representation for that identifier has already been computed. +/// Returns the upper and lower bound inequalities using which the floordiv +/// can be computed. If the representation could be computed, `dividend` and +/// `denominator` are set. If the representation could not be computed, +/// `llvm::None` is returned. +Optional> +computeSingleVarRepr(const FlatAffineConstraints &cst, ArrayRef foundRepr, + unsigned pos, SmallVector ÷nd, + unsigned &divisor); + +} // namespace presburger_utils +} // namespace mlir + +#endif // MLIR_ANALYSIS_PRESBURGER_UTILS_H diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp index f4d857479cde..8fee0c8f4f3e 100644 --- a/mlir/lib/Analysis/AffineStructures.cpp +++ b/mlir/lib/Analysis/AffineStructures.cpp @@ -13,6 +13,7 @@ #include "mlir/Analysis/AffineStructures.h" #include "mlir/Analysis/LinearTransform.h" #include "mlir/Analysis/Presburger/Simplex.h" +#include "mlir/Analysis/Presburger/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/IR/AffineValueMap.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" @@ -700,14 +701,13 @@ void FlatAffineValueConstraints::addAffineIfOpDomain(AffineIfOp ifOp) { // Searches for a constraint with a non-zero coefficient at `colIdx` in // equality (isEq=true) or inequality (isEq=false) constraints. // Returns true and sets row found in search in `rowIdx`, false otherwise. -static bool findConstraintWithNonZeroAt(const FlatAffineConstraints &cst, - unsigned colIdx, bool isEq, - unsigned *rowIdx) { - assert(colIdx < cst.getNumCols() && "position out of bounds"); +bool FlatAffineConstraints::findConstraintWithNonZeroAt( + unsigned colIdx, bool isEq, unsigned *rowIdx) const { + assert(colIdx < getNumCols() && "position out of bounds"); auto at = [&](unsigned rowIdx) -> int64_t { - return isEq ? cst.atEq(rowIdx, colIdx) : cst.atIneq(rowIdx, colIdx); + return isEq ? atEq(rowIdx, colIdx) : atIneq(rowIdx, colIdx); }; - unsigned e = isEq ? cst.getNumEqualities() : cst.getNumInequalities(); + unsigned e = isEq ? getNumEqualities() : getNumInequalities(); for (*rowIdx = 0; *rowIdx < e; ++(*rowIdx)) { if (at(*rowIdx) != 0) { return true; @@ -1203,145 +1203,6 @@ bool FlatAffineConstraints::containsPoint(ArrayRef point) const { return true; } -/// Check if the pos^th identifier can be represented as a division using upper -/// bound inequality at position `ubIneq` and lower bound inequality at position -/// `lbIneq`. -/// -/// Let `id` be the pos^th identifier, then `id` is equivalent to -/// `expr floordiv divisor` if there are constraints of the form: -/// 0 <= expr - divisor * id <= divisor - 1 -/// Rearranging, we have: -/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' -/// -divisor * id + expr >= 0 <-- Upper bound for 'id' -/// -/// For example: -/// 32*k >= 16*i + j - 31 <-- Lower bound for 'k' -/// 32*k <= 16*i + j <-- Upper bound for 'k' -/// expr = 16*i + j, divisor = 32 -/// k = ( 16*i + j ) floordiv 32 -/// -/// 4q >= i + j - 2 <-- Lower bound for 'q' -/// 4q <= i + j + 1 <-- Upper bound for 'q' -/// expr = i + j + 1, divisor = 4 -/// q = (i + j + 1) floordiv 4 -// -/// This function also supports detecting divisions from bounds that are -/// strictly tighter than the division bounds described above, since tighter -/// bounds imply the division bounds. For example: -/// 4q - i - j + 2 >= 0 <-- Lower bound for 'q' -/// -4q + i + j >= 0 <-- Tight upper bound for 'q' -/// -/// To extract floor divisions with tighter bounds, we assume that that the -/// constraints are of the form: -/// c <= expr - divisior * id <= divisor - 1, where 0 <= c <= divisor - 1 -/// Rearranging, we have: -/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' -/// -divisor * id + expr - c >= 0 <-- Upper bound for 'id' -/// -/// If successful, `expr` is set to dividend of the division and `divisor` is -/// set to the denominator of the division. -static LogicalResult getDivRepr(const FlatAffineConstraints &cst, unsigned pos, - unsigned ubIneq, unsigned lbIneq, - SmallVector &expr, - unsigned &divisor) { - - assert(pos <= cst.getNumIds() && "Invalid identifier position"); - assert(ubIneq <= cst.getNumInequalities() && - "Invalid upper bound inequality position"); - assert(lbIneq <= cst.getNumInequalities() && - "Invalid upper bound inequality position"); - - // Extract divisor from the lower bound. - divisor = cst.atIneq(lbIneq, pos); - - // First, check if the constraints are opposite of each other except the - // constant term. - unsigned i = 0, e = 0; - for (i = 0, e = cst.getNumIds(); i < e; ++i) - if (cst.atIneq(ubIneq, i) != -cst.atIneq(lbIneq, i)) - break; - - if (i < e) - return failure(); - - // Then, check if the constant term is of the proper form. - // Due to the form of the upper/lower bound inequalities, the sum of their - // constants is `divisor - 1 - c`. From this, we can extract c: - int64_t constantSum = cst.atIneq(lbIneq, cst.getNumCols() - 1) + - cst.atIneq(ubIneq, cst.getNumCols() - 1); - int64_t c = divisor - 1 - constantSum; - - // Check if `c` satisfies the condition `0 <= c <= divisor - 1`. This also - // implictly checks that `divisor` is positive. - if (!(c >= 0 && c <= divisor - 1)) - return failure(); - - // The inequality pair can be used to extract the division. - // Set `expr` to the dividend of the division except the constant term, which - // is set below. - expr.resize(cst.getNumCols(), 0); - for (i = 0, e = cst.getNumIds(); i < e; ++i) - if (i != pos) - expr[i] = cst.atIneq(ubIneq, i); - - // From the upper bound inequality's form, its constant term is equal to the - // constant term of `expr`, minus `c`. From this, - // constant term of `expr` = constant term of upper bound + `c`. - expr.back() = cst.atIneq(ubIneq, cst.getNumCols() - 1) + c; - - return success(); -} - -/// Check if the pos^th identifier can be expressed as a floordiv of an affine -/// function of other identifiers (where the divisor is a positive constant). -/// `foundRepr` contains a boolean for each identifier indicating if the -/// explicit representation for that identifier has already been computed. -/// Returns the upper and lower bound inequalities using which the floordiv can -/// be computed. If the representation could be computed, `dividend` and -/// `denominator` are set. If the representation could not be computed, -/// `llvm::None` is returned. -static Optional> -computeSingleVarRepr(const FlatAffineConstraints &cst, - const SmallVector &foundRepr, unsigned pos, - SmallVector ÷nd, unsigned &divisor) { - assert(pos < cst.getNumIds() && "invalid position"); - assert(foundRepr.size() == cst.getNumIds() && - "Size of foundRepr does not match total number of variables"); - - SmallVector lbIndices, ubIndices; - cst.getLowerAndUpperBoundIndices(pos, &lbIndices, &ubIndices); - - for (unsigned ubPos : ubIndices) { - for (unsigned lbPos : lbIndices) { - // Attempt to get divison representation from ubPos, lbPos. - if (failed(getDivRepr(cst, pos, ubPos, lbPos, dividend, divisor))) - continue; - - // Check if the inequalities depend on a variable for which - // an explicit representation has not been found yet. - // Exit to avoid circular dependencies between divisions. - unsigned c, f; - for (c = 0, f = cst.getNumIds(); c < f; ++c) { - if (c == pos) - continue; - if (!foundRepr[c] && dividend[c] != 0) - break; - } - - // Expression can't be constructed as it depends on a yet unknown - // identifier. - // TODO: Visit/compute the identifiers in an order so that this doesn't - // happen. More complex but much more efficient. - if (c < f) - continue; - - return std::make_pair(ubPos, lbPos); - } - } - - return llvm::None; -} - void FlatAffineConstraints::getLocalReprs( std::vector>> &repr) const { std::vector> dividends(getNumLocalIds()); @@ -1378,8 +1239,9 @@ void FlatAffineConstraints::getLocalReprs( changed = false; for (unsigned i = 0, e = getNumLocalIds(); i < e; ++i) { if (!foundRepr[i + divOffset]) { - if (auto res = computeSingleVarRepr(*this, foundRepr, divOffset + i, - dividends[i], denominators[i])) { + if (auto res = presburger_utils::computeSingleVarRepr( + *this, foundRepr, divOffset + i, dividends[i], + denominators[i])) { foundRepr[i + divOffset] = true; repr[i] = res; changed = true; @@ -1437,11 +1299,9 @@ unsigned FlatAffineConstraints::gaussianEliminateIds(unsigned posStart, for (pivotCol = posStart; pivotCol < posLimit; ++pivotCol) { // Find a row which has a non-zero coefficient in column 'j'. unsigned pivotRow; - if (!findConstraintWithNonZeroAt(*this, pivotCol, /*isEq=*/true, - &pivotRow)) { + if (!findConstraintWithNonZeroAt(pivotCol, /*isEq=*/true, &pivotRow)) { // No pivot row in equalities with non-zero at 'pivotCol'. - if (!findConstraintWithNonZeroAt(*this, pivotCol, /*isEq=*/false, - &pivotRow)) { + if (!findConstraintWithNonZeroAt(pivotCol, /*isEq=*/false, &pivotRow)) { // If inequalities are also non-zero in 'pivotCol', it can be // eliminated. continue; @@ -1670,7 +1530,8 @@ static bool detectAsFloorDiv(const FlatAffineConstraints &cst, unsigned pos, SmallVector dividend; unsigned divisor; - auto ulPair = computeSingleVarRepr(cst, foundRepr, pos, dividend, divisor); + auto ulPair = presburger_utils::computeSingleVarRepr(cst, foundRepr, pos, + dividend, divisor); // No upper-lower bound pair found for this var. if (!ulPair) @@ -2109,7 +1970,7 @@ void FlatAffineConstraints::getSliceBounds(unsigned offset, unsigned num, // Detect an identifier as an expression of other identifiers. unsigned idx; - if (!findConstraintWithNonZeroAt(*this, pos, /*isEq=*/true, &idx)) { + if (!findConstraintWithNonZeroAt(pos, /*isEq=*/true, &idx)) { continue; } @@ -3447,12 +3308,10 @@ void FlatAffineValueConstraints::getIneqAsAffineValueMap( vmap.reset(AffineMap::get(numDims - 1, numSyms, boundExpr), operands); } -/// Returns true if the pos^th column is all zero for both inequalities and -/// equalities.. -static bool isColZero(const FlatAffineConstraints &cst, unsigned pos) { +bool FlatAffineConstraints::isColZero(unsigned pos) const { unsigned rowPos; - return !findConstraintWithNonZeroAt(cst, pos, /*isEq=*/false, &rowPos) && - !findConstraintWithNonZeroAt(cst, pos, /*isEq=*/true, &rowPos); + return !findConstraintWithNonZeroAt(pos, /*isEq=*/false, &rowPos) && + !findConstraintWithNonZeroAt(pos, /*isEq=*/true, &rowPos); } IntegerSet FlatAffineConstraints::getAsIntegerSet(MLIRContext *context) const { @@ -3471,7 +3330,7 @@ IntegerSet FlatAffineConstraints::getAsIntegerSet(MLIRContext *context) const { SmallVector noLocalRepVars; unsigned numDimsSymbols = getNumDimAndSymbolIds(); for (unsigned i = numDimsSymbols, e = getNumIds(); i < e; ++i) { - if (!memo[i] && !isColZero(*this, /*pos=*/i)) + if (!memo[i] && !isColZero(/*pos=*/i)) noLocalRepVars.push_back(i - numDimsSymbols); } if (!noLocalRepVars.empty()) { diff --git a/mlir/lib/Analysis/Presburger/CMakeLists.txt b/mlir/lib/Analysis/Presburger/CMakeLists.txt index dd8c8d96d872..0b84f031b4c0 100644 --- a/mlir/lib/Analysis/Presburger/CMakeLists.txt +++ b/mlir/lib/Analysis/Presburger/CMakeLists.txt @@ -2,6 +2,7 @@ add_mlir_library(MLIRPresburger IntegerPolyhedron.cpp Matrix.cpp Simplex.cpp + Utils.cpp DEPENDS MLIRBuiltinLocationAttributesIncGen diff --git a/mlir/lib/Analysis/Presburger/Utils.cpp b/mlir/lib/Analysis/Presburger/Utils.cpp new file mode 100644 index 000000000000..14d04d36c24e --- /dev/null +++ b/mlir/lib/Analysis/Presburger/Utils.cpp @@ -0,0 +1,154 @@ +//===- Utils.cpp - General utilities for Presburger library ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utility functions required by the Presburger Library. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Analysis/Presburger/Utils.h" +#include "mlir/Analysis/AffineStructures.h" + +using namespace mlir; + +/// Check if the pos^th identifier can be represented as a division using upper +/// bound inequality at position `ubIneq` and lower bound inequality at position +/// `lbIneq`. +/// +/// Let `id` be the pos^th identifier, then `id` is equivalent to +/// `expr floordiv divisor` if there are constraints of the form: +/// 0 <= expr - divisor * id <= divisor - 1 +/// Rearranging, we have: +/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' +/// -divisor * id + expr >= 0 <-- Upper bound for 'id' +/// +/// For example: +/// 32*k >= 16*i + j - 31 <-- Lower bound for 'k' +/// 32*k <= 16*i + j <-- Upper bound for 'k' +/// expr = 16*i + j, divisor = 32 +/// k = ( 16*i + j ) floordiv 32 +/// +/// 4q >= i + j - 2 <-- Lower bound for 'q' +/// 4q <= i + j + 1 <-- Upper bound for 'q' +/// expr = i + j + 1, divisor = 4 +/// q = (i + j + 1) floordiv 4 +// +/// This function also supports detecting divisions from bounds that are +/// strictly tighter than the division bounds described above, since tighter +/// bounds imply the division bounds. For example: +/// 4q - i - j + 2 >= 0 <-- Lower bound for 'q' +/// -4q + i + j >= 0 <-- Tight upper bound for 'q' +/// +/// To extract floor divisions with tighter bounds, we assume that that the +/// constraints are of the form: +/// c <= expr - divisior * id <= divisor - 1, where 0 <= c <= divisor - 1 +/// Rearranging, we have: +/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' +/// -divisor * id + expr - c >= 0 <-- Upper bound for 'id' +/// +/// If successful, `expr` is set to dividend of the division and `divisor` is +/// set to the denominator of the division. +static LogicalResult getDivRepr(const FlatAffineConstraints &cst, unsigned pos, + unsigned ubIneq, unsigned lbIneq, + SmallVector &expr, + unsigned &divisor) { + + assert(pos <= cst.getNumIds() && "Invalid identifier position"); + assert(ubIneq <= cst.getNumInequalities() && + "Invalid upper bound inequality position"); + assert(lbIneq <= cst.getNumInequalities() && + "Invalid upper bound inequality position"); + + // Extract divisor from the lower bound. + divisor = cst.atIneq(lbIneq, pos); + + // First, check if the constraints are opposite of each other except the + // constant term. + unsigned i = 0, e = 0; + for (i = 0, e = cst.getNumIds(); i < e; ++i) + if (cst.atIneq(ubIneq, i) != -cst.atIneq(lbIneq, i)) + break; + + if (i < e) + return failure(); + + // Then, check if the constant term is of the proper form. + // Due to the form of the upper/lower bound inequalities, the sum of their + // constants is `divisor - 1 - c`. From this, we can extract c: + int64_t constantSum = cst.atIneq(lbIneq, cst.getNumCols() - 1) + + cst.atIneq(ubIneq, cst.getNumCols() - 1); + int64_t c = divisor - 1 - constantSum; + + // Check if `c` satisfies the condition `0 <= c <= divisor - 1`. This also + // implictly checks that `divisor` is positive. + if (!(c >= 0 && c <= divisor - 1)) + return failure(); + + // The inequality pair can be used to extract the division. + // Set `expr` to the dividend of the division except the constant term, which + // is set below. + expr.resize(cst.getNumCols(), 0); + for (i = 0, e = cst.getNumIds(); i < e; ++i) + if (i != pos) + expr[i] = cst.atIneq(ubIneq, i); + + // From the upper bound inequality's form, its constant term is equal to the + // constant term of `expr`, minus `c`. From this, + // constant term of `expr` = constant term of upper bound + `c`. + expr.back() = cst.atIneq(ubIneq, cst.getNumCols() - 1) + c; + + return success(); +} + +/// Check if the pos^th identifier can be expressed as a floordiv of an affine +/// function of other identifiers (where the divisor is a positive constant). +/// `foundRepr` contains a boolean for each identifier indicating if the +/// explicit representation for that identifier has already been computed. +/// Returns the upper and lower bound inequalities using which the floordiv can +/// be computed. If the representation could be computed, `dividend` and +/// `denominator` are set. If the representation could not be computed, +/// `llvm::None` is returned. +Optional> presburger_utils::computeSingleVarRepr( + const FlatAffineConstraints &cst, ArrayRef foundRepr, unsigned pos, + SmallVector ÷nd, unsigned &divisor) { + assert(pos < cst.getNumIds() && "invalid position"); + assert(foundRepr.size() == cst.getNumIds() && + "Size of foundRepr does not match total number of variables"); + + SmallVector lbIndices, ubIndices; + cst.getLowerAndUpperBoundIndices(pos, &lbIndices, &ubIndices); + + for (unsigned ubPos : ubIndices) { + for (unsigned lbPos : lbIndices) { + // Attempt to get divison representation from ubPos, lbPos. + if (failed(getDivRepr(cst, pos, ubPos, lbPos, dividend, divisor))) + continue; + + // Check if the inequalities depend on a variable for which + // an explicit representation has not been found yet. + // Exit to avoid circular dependencies between divisions. + unsigned c, f; + for (c = 0, f = cst.getNumIds(); c < f; ++c) { + if (c == pos) + continue; + if (!foundRepr[c] && dividend[c] != 0) + break; + } + + // Expression can't be constructed as it depends on a yet unknown + // identifier. + // TODO: Visit/compute the identifiers in an order so that this doesn't + // happen. More complex but much more efficient. + if (c < f) + continue; + + return std::make_pair(ubPos, lbPos); + } + } + + return llvm::None; +} From 27a0718ad0a4a566720fc11a080a47752725e747 Mon Sep 17 00:00:00 2001 From: Groverkss Date: Sat, 25 Dec 2021 00:39:27 +0530 Subject: [PATCH 229/293] Revert "[MLIR][FlatAffineConstraints][NFC] Move some static functions to be available to Presburger/" This reverts commit 6c0eaefaf832745f509841afe4dd8a698671b86e. --- mlir/include/mlir/Analysis/AffineStructures.h | 10 - mlir/include/mlir/Analysis/Presburger/Utils.h | 40 ---- mlir/lib/Analysis/AffineStructures.cpp | 177 ++++++++++++++++-- mlir/lib/Analysis/Presburger/CMakeLists.txt | 1 - mlir/lib/Analysis/Presburger/Utils.cpp | 154 --------------- 5 files changed, 159 insertions(+), 223 deletions(-) delete mode 100644 mlir/include/mlir/Analysis/Presburger/Utils.h delete mode 100644 mlir/lib/Analysis/Presburger/Utils.cpp diff --git a/mlir/include/mlir/Analysis/AffineStructures.h b/mlir/include/mlir/Analysis/AffineStructures.h index 089d8a9f1eeb..9e7ffb125f7e 100644 --- a/mlir/include/mlir/Analysis/AffineStructures.h +++ b/mlir/include/mlir/Analysis/AffineStructures.h @@ -419,16 +419,6 @@ class FlatAffineConstraints : public IntegerPolyhedron { /// Normalized each constraints by the GCD of its coefficients. void normalizeConstraintsByGCD(); - /// Searches for a constraint with a non-zero coefficient at `colIdx` in - /// equality (isEq=true) or inequality (isEq=false) constraints. - /// Returns true and sets row found in search in `rowIdx`, false otherwise. - bool findConstraintWithNonZeroAt(unsigned colIdx, bool isEq, - unsigned *rowIdx) const; - - /// Returns true if the pos^th column is all zero for both inequalities and - /// equalities. - bool isColZero(unsigned pos) const; - /// A parameter that controls detection of an unrealistic number of /// constraints. If the number of constraints is this many times the number of /// variables, we consider such a system out of line with the intended use diff --git a/mlir/include/mlir/Analysis/Presburger/Utils.h b/mlir/include/mlir/Analysis/Presburger/Utils.h deleted file mode 100644 index 6a72471d9697..000000000000 --- a/mlir/include/mlir/Analysis/Presburger/Utils.h +++ /dev/null @@ -1,40 +0,0 @@ -//===- Utils.h - General utilities for Presburger library ------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Utility functions required by the Presburger Library. -// -//===----------------------------------------------------------------------===// - -#ifndef MLIR_ANALYSIS_PRESBURGER_UTILS_H -#define MLIR_ANALYSIS_PRESBURGER_UTILS_H - -#include "mlir/Support/LLVM.h" - -namespace mlir { - -class FlatAffineConstraints; - -namespace presburger_utils { - -/// Check if the pos^th identifier can be expressed as a floordiv of an affine -/// function of other identifiers (where the divisor is a positive constant). -/// `foundRepr` contains a boolean for each identifier indicating if the -/// explicit representation for that identifier has already been computed. -/// Returns the upper and lower bound inequalities using which the floordiv -/// can be computed. If the representation could be computed, `dividend` and -/// `denominator` are set. If the representation could not be computed, -/// `llvm::None` is returned. -Optional> -computeSingleVarRepr(const FlatAffineConstraints &cst, ArrayRef foundRepr, - unsigned pos, SmallVector ÷nd, - unsigned &divisor); - -} // namespace presburger_utils -} // namespace mlir - -#endif // MLIR_ANALYSIS_PRESBURGER_UTILS_H diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp index 8fee0c8f4f3e..f4d857479cde 100644 --- a/mlir/lib/Analysis/AffineStructures.cpp +++ b/mlir/lib/Analysis/AffineStructures.cpp @@ -13,7 +13,6 @@ #include "mlir/Analysis/AffineStructures.h" #include "mlir/Analysis/LinearTransform.h" #include "mlir/Analysis/Presburger/Simplex.h" -#include "mlir/Analysis/Presburger/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/IR/AffineValueMap.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" @@ -701,13 +700,14 @@ void FlatAffineValueConstraints::addAffineIfOpDomain(AffineIfOp ifOp) { // Searches for a constraint with a non-zero coefficient at `colIdx` in // equality (isEq=true) or inequality (isEq=false) constraints. // Returns true and sets row found in search in `rowIdx`, false otherwise. -bool FlatAffineConstraints::findConstraintWithNonZeroAt( - unsigned colIdx, bool isEq, unsigned *rowIdx) const { - assert(colIdx < getNumCols() && "position out of bounds"); +static bool findConstraintWithNonZeroAt(const FlatAffineConstraints &cst, + unsigned colIdx, bool isEq, + unsigned *rowIdx) { + assert(colIdx < cst.getNumCols() && "position out of bounds"); auto at = [&](unsigned rowIdx) -> int64_t { - return isEq ? atEq(rowIdx, colIdx) : atIneq(rowIdx, colIdx); + return isEq ? cst.atEq(rowIdx, colIdx) : cst.atIneq(rowIdx, colIdx); }; - unsigned e = isEq ? getNumEqualities() : getNumInequalities(); + unsigned e = isEq ? cst.getNumEqualities() : cst.getNumInequalities(); for (*rowIdx = 0; *rowIdx < e; ++(*rowIdx)) { if (at(*rowIdx) != 0) { return true; @@ -1203,6 +1203,145 @@ bool FlatAffineConstraints::containsPoint(ArrayRef point) const { return true; } +/// Check if the pos^th identifier can be represented as a division using upper +/// bound inequality at position `ubIneq` and lower bound inequality at position +/// `lbIneq`. +/// +/// Let `id` be the pos^th identifier, then `id` is equivalent to +/// `expr floordiv divisor` if there are constraints of the form: +/// 0 <= expr - divisor * id <= divisor - 1 +/// Rearranging, we have: +/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' +/// -divisor * id + expr >= 0 <-- Upper bound for 'id' +/// +/// For example: +/// 32*k >= 16*i + j - 31 <-- Lower bound for 'k' +/// 32*k <= 16*i + j <-- Upper bound for 'k' +/// expr = 16*i + j, divisor = 32 +/// k = ( 16*i + j ) floordiv 32 +/// +/// 4q >= i + j - 2 <-- Lower bound for 'q' +/// 4q <= i + j + 1 <-- Upper bound for 'q' +/// expr = i + j + 1, divisor = 4 +/// q = (i + j + 1) floordiv 4 +// +/// This function also supports detecting divisions from bounds that are +/// strictly tighter than the division bounds described above, since tighter +/// bounds imply the division bounds. For example: +/// 4q - i - j + 2 >= 0 <-- Lower bound for 'q' +/// -4q + i + j >= 0 <-- Tight upper bound for 'q' +/// +/// To extract floor divisions with tighter bounds, we assume that that the +/// constraints are of the form: +/// c <= expr - divisior * id <= divisor - 1, where 0 <= c <= divisor - 1 +/// Rearranging, we have: +/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' +/// -divisor * id + expr - c >= 0 <-- Upper bound for 'id' +/// +/// If successful, `expr` is set to dividend of the division and `divisor` is +/// set to the denominator of the division. +static LogicalResult getDivRepr(const FlatAffineConstraints &cst, unsigned pos, + unsigned ubIneq, unsigned lbIneq, + SmallVector &expr, + unsigned &divisor) { + + assert(pos <= cst.getNumIds() && "Invalid identifier position"); + assert(ubIneq <= cst.getNumInequalities() && + "Invalid upper bound inequality position"); + assert(lbIneq <= cst.getNumInequalities() && + "Invalid upper bound inequality position"); + + // Extract divisor from the lower bound. + divisor = cst.atIneq(lbIneq, pos); + + // First, check if the constraints are opposite of each other except the + // constant term. + unsigned i = 0, e = 0; + for (i = 0, e = cst.getNumIds(); i < e; ++i) + if (cst.atIneq(ubIneq, i) != -cst.atIneq(lbIneq, i)) + break; + + if (i < e) + return failure(); + + // Then, check if the constant term is of the proper form. + // Due to the form of the upper/lower bound inequalities, the sum of their + // constants is `divisor - 1 - c`. From this, we can extract c: + int64_t constantSum = cst.atIneq(lbIneq, cst.getNumCols() - 1) + + cst.atIneq(ubIneq, cst.getNumCols() - 1); + int64_t c = divisor - 1 - constantSum; + + // Check if `c` satisfies the condition `0 <= c <= divisor - 1`. This also + // implictly checks that `divisor` is positive. + if (!(c >= 0 && c <= divisor - 1)) + return failure(); + + // The inequality pair can be used to extract the division. + // Set `expr` to the dividend of the division except the constant term, which + // is set below. + expr.resize(cst.getNumCols(), 0); + for (i = 0, e = cst.getNumIds(); i < e; ++i) + if (i != pos) + expr[i] = cst.atIneq(ubIneq, i); + + // From the upper bound inequality's form, its constant term is equal to the + // constant term of `expr`, minus `c`. From this, + // constant term of `expr` = constant term of upper bound + `c`. + expr.back() = cst.atIneq(ubIneq, cst.getNumCols() - 1) + c; + + return success(); +} + +/// Check if the pos^th identifier can be expressed as a floordiv of an affine +/// function of other identifiers (where the divisor is a positive constant). +/// `foundRepr` contains a boolean for each identifier indicating if the +/// explicit representation for that identifier has already been computed. +/// Returns the upper and lower bound inequalities using which the floordiv can +/// be computed. If the representation could be computed, `dividend` and +/// `denominator` are set. If the representation could not be computed, +/// `llvm::None` is returned. +static Optional> +computeSingleVarRepr(const FlatAffineConstraints &cst, + const SmallVector &foundRepr, unsigned pos, + SmallVector ÷nd, unsigned &divisor) { + assert(pos < cst.getNumIds() && "invalid position"); + assert(foundRepr.size() == cst.getNumIds() && + "Size of foundRepr does not match total number of variables"); + + SmallVector lbIndices, ubIndices; + cst.getLowerAndUpperBoundIndices(pos, &lbIndices, &ubIndices); + + for (unsigned ubPos : ubIndices) { + for (unsigned lbPos : lbIndices) { + // Attempt to get divison representation from ubPos, lbPos. + if (failed(getDivRepr(cst, pos, ubPos, lbPos, dividend, divisor))) + continue; + + // Check if the inequalities depend on a variable for which + // an explicit representation has not been found yet. + // Exit to avoid circular dependencies between divisions. + unsigned c, f; + for (c = 0, f = cst.getNumIds(); c < f; ++c) { + if (c == pos) + continue; + if (!foundRepr[c] && dividend[c] != 0) + break; + } + + // Expression can't be constructed as it depends on a yet unknown + // identifier. + // TODO: Visit/compute the identifiers in an order so that this doesn't + // happen. More complex but much more efficient. + if (c < f) + continue; + + return std::make_pair(ubPos, lbPos); + } + } + + return llvm::None; +} + void FlatAffineConstraints::getLocalReprs( std::vector>> &repr) const { std::vector> dividends(getNumLocalIds()); @@ -1239,9 +1378,8 @@ void FlatAffineConstraints::getLocalReprs( changed = false; for (unsigned i = 0, e = getNumLocalIds(); i < e; ++i) { if (!foundRepr[i + divOffset]) { - if (auto res = presburger_utils::computeSingleVarRepr( - *this, foundRepr, divOffset + i, dividends[i], - denominators[i])) { + if (auto res = computeSingleVarRepr(*this, foundRepr, divOffset + i, + dividends[i], denominators[i])) { foundRepr[i + divOffset] = true; repr[i] = res; changed = true; @@ -1299,9 +1437,11 @@ unsigned FlatAffineConstraints::gaussianEliminateIds(unsigned posStart, for (pivotCol = posStart; pivotCol < posLimit; ++pivotCol) { // Find a row which has a non-zero coefficient in column 'j'. unsigned pivotRow; - if (!findConstraintWithNonZeroAt(pivotCol, /*isEq=*/true, &pivotRow)) { + if (!findConstraintWithNonZeroAt(*this, pivotCol, /*isEq=*/true, + &pivotRow)) { // No pivot row in equalities with non-zero at 'pivotCol'. - if (!findConstraintWithNonZeroAt(pivotCol, /*isEq=*/false, &pivotRow)) { + if (!findConstraintWithNonZeroAt(*this, pivotCol, /*isEq=*/false, + &pivotRow)) { // If inequalities are also non-zero in 'pivotCol', it can be // eliminated. continue; @@ -1530,8 +1670,7 @@ static bool detectAsFloorDiv(const FlatAffineConstraints &cst, unsigned pos, SmallVector dividend; unsigned divisor; - auto ulPair = presburger_utils::computeSingleVarRepr(cst, foundRepr, pos, - dividend, divisor); + auto ulPair = computeSingleVarRepr(cst, foundRepr, pos, dividend, divisor); // No upper-lower bound pair found for this var. if (!ulPair) @@ -1970,7 +2109,7 @@ void FlatAffineConstraints::getSliceBounds(unsigned offset, unsigned num, // Detect an identifier as an expression of other identifiers. unsigned idx; - if (!findConstraintWithNonZeroAt(pos, /*isEq=*/true, &idx)) { + if (!findConstraintWithNonZeroAt(*this, pos, /*isEq=*/true, &idx)) { continue; } @@ -3308,10 +3447,12 @@ void FlatAffineValueConstraints::getIneqAsAffineValueMap( vmap.reset(AffineMap::get(numDims - 1, numSyms, boundExpr), operands); } -bool FlatAffineConstraints::isColZero(unsigned pos) const { +/// Returns true if the pos^th column is all zero for both inequalities and +/// equalities.. +static bool isColZero(const FlatAffineConstraints &cst, unsigned pos) { unsigned rowPos; - return !findConstraintWithNonZeroAt(pos, /*isEq=*/false, &rowPos) && - !findConstraintWithNonZeroAt(pos, /*isEq=*/true, &rowPos); + return !findConstraintWithNonZeroAt(cst, pos, /*isEq=*/false, &rowPos) && + !findConstraintWithNonZeroAt(cst, pos, /*isEq=*/true, &rowPos); } IntegerSet FlatAffineConstraints::getAsIntegerSet(MLIRContext *context) const { @@ -3330,7 +3471,7 @@ IntegerSet FlatAffineConstraints::getAsIntegerSet(MLIRContext *context) const { SmallVector noLocalRepVars; unsigned numDimsSymbols = getNumDimAndSymbolIds(); for (unsigned i = numDimsSymbols, e = getNumIds(); i < e; ++i) { - if (!memo[i] && !isColZero(/*pos=*/i)) + if (!memo[i] && !isColZero(*this, /*pos=*/i)) noLocalRepVars.push_back(i - numDimsSymbols); } if (!noLocalRepVars.empty()) { diff --git a/mlir/lib/Analysis/Presburger/CMakeLists.txt b/mlir/lib/Analysis/Presburger/CMakeLists.txt index 0b84f031b4c0..dd8c8d96d872 100644 --- a/mlir/lib/Analysis/Presburger/CMakeLists.txt +++ b/mlir/lib/Analysis/Presburger/CMakeLists.txt @@ -2,7 +2,6 @@ add_mlir_library(MLIRPresburger IntegerPolyhedron.cpp Matrix.cpp Simplex.cpp - Utils.cpp DEPENDS MLIRBuiltinLocationAttributesIncGen diff --git a/mlir/lib/Analysis/Presburger/Utils.cpp b/mlir/lib/Analysis/Presburger/Utils.cpp deleted file mode 100644 index 14d04d36c24e..000000000000 --- a/mlir/lib/Analysis/Presburger/Utils.cpp +++ /dev/null @@ -1,154 +0,0 @@ -//===- Utils.cpp - General utilities for Presburger library ---------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Utility functions required by the Presburger Library. -// -//===----------------------------------------------------------------------===// - -#include "mlir/Analysis/Presburger/Utils.h" -#include "mlir/Analysis/AffineStructures.h" - -using namespace mlir; - -/// Check if the pos^th identifier can be represented as a division using upper -/// bound inequality at position `ubIneq` and lower bound inequality at position -/// `lbIneq`. -/// -/// Let `id` be the pos^th identifier, then `id` is equivalent to -/// `expr floordiv divisor` if there are constraints of the form: -/// 0 <= expr - divisor * id <= divisor - 1 -/// Rearranging, we have: -/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' -/// -divisor * id + expr >= 0 <-- Upper bound for 'id' -/// -/// For example: -/// 32*k >= 16*i + j - 31 <-- Lower bound for 'k' -/// 32*k <= 16*i + j <-- Upper bound for 'k' -/// expr = 16*i + j, divisor = 32 -/// k = ( 16*i + j ) floordiv 32 -/// -/// 4q >= i + j - 2 <-- Lower bound for 'q' -/// 4q <= i + j + 1 <-- Upper bound for 'q' -/// expr = i + j + 1, divisor = 4 -/// q = (i + j + 1) floordiv 4 -// -/// This function also supports detecting divisions from bounds that are -/// strictly tighter than the division bounds described above, since tighter -/// bounds imply the division bounds. For example: -/// 4q - i - j + 2 >= 0 <-- Lower bound for 'q' -/// -4q + i + j >= 0 <-- Tight upper bound for 'q' -/// -/// To extract floor divisions with tighter bounds, we assume that that the -/// constraints are of the form: -/// c <= expr - divisior * id <= divisor - 1, where 0 <= c <= divisor - 1 -/// Rearranging, we have: -/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' -/// -divisor * id + expr - c >= 0 <-- Upper bound for 'id' -/// -/// If successful, `expr` is set to dividend of the division and `divisor` is -/// set to the denominator of the division. -static LogicalResult getDivRepr(const FlatAffineConstraints &cst, unsigned pos, - unsigned ubIneq, unsigned lbIneq, - SmallVector &expr, - unsigned &divisor) { - - assert(pos <= cst.getNumIds() && "Invalid identifier position"); - assert(ubIneq <= cst.getNumInequalities() && - "Invalid upper bound inequality position"); - assert(lbIneq <= cst.getNumInequalities() && - "Invalid upper bound inequality position"); - - // Extract divisor from the lower bound. - divisor = cst.atIneq(lbIneq, pos); - - // First, check if the constraints are opposite of each other except the - // constant term. - unsigned i = 0, e = 0; - for (i = 0, e = cst.getNumIds(); i < e; ++i) - if (cst.atIneq(ubIneq, i) != -cst.atIneq(lbIneq, i)) - break; - - if (i < e) - return failure(); - - // Then, check if the constant term is of the proper form. - // Due to the form of the upper/lower bound inequalities, the sum of their - // constants is `divisor - 1 - c`. From this, we can extract c: - int64_t constantSum = cst.atIneq(lbIneq, cst.getNumCols() - 1) + - cst.atIneq(ubIneq, cst.getNumCols() - 1); - int64_t c = divisor - 1 - constantSum; - - // Check if `c` satisfies the condition `0 <= c <= divisor - 1`. This also - // implictly checks that `divisor` is positive. - if (!(c >= 0 && c <= divisor - 1)) - return failure(); - - // The inequality pair can be used to extract the division. - // Set `expr` to the dividend of the division except the constant term, which - // is set below. - expr.resize(cst.getNumCols(), 0); - for (i = 0, e = cst.getNumIds(); i < e; ++i) - if (i != pos) - expr[i] = cst.atIneq(ubIneq, i); - - // From the upper bound inequality's form, its constant term is equal to the - // constant term of `expr`, minus `c`. From this, - // constant term of `expr` = constant term of upper bound + `c`. - expr.back() = cst.atIneq(ubIneq, cst.getNumCols() - 1) + c; - - return success(); -} - -/// Check if the pos^th identifier can be expressed as a floordiv of an affine -/// function of other identifiers (where the divisor is a positive constant). -/// `foundRepr` contains a boolean for each identifier indicating if the -/// explicit representation for that identifier has already been computed. -/// Returns the upper and lower bound inequalities using which the floordiv can -/// be computed. If the representation could be computed, `dividend` and -/// `denominator` are set. If the representation could not be computed, -/// `llvm::None` is returned. -Optional> presburger_utils::computeSingleVarRepr( - const FlatAffineConstraints &cst, ArrayRef foundRepr, unsigned pos, - SmallVector ÷nd, unsigned &divisor) { - assert(pos < cst.getNumIds() && "invalid position"); - assert(foundRepr.size() == cst.getNumIds() && - "Size of foundRepr does not match total number of variables"); - - SmallVector lbIndices, ubIndices; - cst.getLowerAndUpperBoundIndices(pos, &lbIndices, &ubIndices); - - for (unsigned ubPos : ubIndices) { - for (unsigned lbPos : lbIndices) { - // Attempt to get divison representation from ubPos, lbPos. - if (failed(getDivRepr(cst, pos, ubPos, lbPos, dividend, divisor))) - continue; - - // Check if the inequalities depend on a variable for which - // an explicit representation has not been found yet. - // Exit to avoid circular dependencies between divisions. - unsigned c, f; - for (c = 0, f = cst.getNumIds(); c < f; ++c) { - if (c == pos) - continue; - if (!foundRepr[c] && dividend[c] != 0) - break; - } - - // Expression can't be constructed as it depends on a yet unknown - // identifier. - // TODO: Visit/compute the identifiers in an order so that this doesn't - // happen. More complex but much more efficient. - if (c < f) - continue; - - return std::make_pair(ubPos, lbPos); - } - } - - return llvm::None; -} From 8ea64d5585ec3a0a52db20c9e57ac9bed9e80fc2 Mon Sep 17 00:00:00 2001 From: Gabriel Smith Date: Fri, 24 Dec 2021 11:38:55 -0800 Subject: [PATCH 230/293] [clang-format] Fix short enums getting wrapped even when denied Single-variant enums were still getting placed on a single line even when AllowShortEnumsOnASingleLine was false. This fixes that by checking that setting when looking to merge lines. Differential Revision: https://reviews.llvm.org/D116188 --- clang/lib/Format/UnwrappedLineFormatter.cpp | 9 ++++++++- clang/unittests/Format/FormatTest.cpp | 15 +++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index 3d4c1a4f903b..f652a4e7088f 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -393,11 +393,18 @@ class LineJoiner { // Try to merge a block with left brace wrapped that wasn't yet covered if (TheLine->Last->is(tok::l_brace)) { + const FormatToken *Tok = TheLine->First; bool ShouldMerge = false; - if (TheLine->First->isOneOf(tok::kw_class, tok::kw_struct)) { + if (Tok->is(tok::kw_typedef)) { + Tok = Tok->getNextNonComment(); + assert(Tok); + } + if (Tok->isOneOf(tok::kw_class, tok::kw_struct)) { ShouldMerge = !Style.BraceWrapping.AfterClass || (I[1]->First->is(tok::r_brace) && !Style.BraceWrapping.SplitEmptyRecord); + } else if (Tok->is(tok::kw_enum)) { + ShouldMerge = Style.AllowShortEnumsOnASingleLine; } else { ShouldMerge = !Style.BraceWrapping.AfterFunction || (I[1]->First->is(tok::r_brace) && diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index ee486f452194..374f3865acc3 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -2504,6 +2504,7 @@ TEST_F(FormatTest, ShortEnums) { FormatStyle Style = getLLVMStyle(); Style.AllowShortEnumsOnASingleLine = true; verifyFormat("enum { A, B, C } ShortEnum1, ShortEnum2;", Style); + verifyFormat("typedef enum { A, B, C } ShortEnum1, ShortEnum2;", Style); Style.AllowShortEnumsOnASingleLine = false; verifyFormat("enum {\n" " A,\n" @@ -2511,6 +2512,20 @@ TEST_F(FormatTest, ShortEnums) { " C\n" "} ShortEnum1, ShortEnum2;", Style); + verifyFormat("typedef enum {\n" + " A,\n" + " B,\n" + " C\n" + "} ShortEnum1, ShortEnum2;", + Style); + verifyFormat("enum {\n" + " A,\n" + "} ShortEnum1, ShortEnum2;", + Style); + verifyFormat("typedef enum {\n" + " A,\n" + "} ShortEnum1, ShortEnum2;", + Style); Style.BreakBeforeBraces = FormatStyle::BS_Custom; Style.BraceWrapping.AfterEnum = true; verifyFormat("enum\n" From e1b6b5be462ee2f197737162fc2a7d23e9a2eab6 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 24 Dec 2021 12:09:48 -0800 Subject: [PATCH 231/293] [ELF] Avoid referencing SectionBase::repl after ICF It is fairly easy to forget SectionBase::repl after ICF. Let ICF rewrite a Defined symbol's `section` field to avoid references to SectionBase::repl in subsequent passes. This slightly improves the --icf=none performance due to less indirection (maybe for --icf={safe,all} as well if most symbols are Defined). With this change, there is only one reference to `repl` (--gdb-index D89751). We can undo f4fb5fd7523f8e3c3b3966d43c0a28457b59d1d8 (`Move Repl to SectionBase.`) but move `repl` to `InputSection` instead. Reviewed By: ikudrin Differential Revision: https://reviews.llvm.org/D116093 --- lld/ELF/CallGraphSort.cpp | 4 ++-- lld/ELF/ICF.cpp | 16 ++++++++++++++++ lld/ELF/InputSection.cpp | 13 ++++++------- lld/ELF/InputSection.h | 17 +++++++---------- lld/ELF/Relocations.cpp | 4 ++-- lld/ELF/Symbols.cpp | 3 +-- lld/ELF/Symbols.h | 8 ++++++-- lld/ELF/SyntheticSections.cpp | 2 +- lld/ELF/Writer.cpp | 5 ++--- 9 files changed, 43 insertions(+), 29 deletions(-) diff --git a/lld/ELF/CallGraphSort.cpp b/lld/ELF/CallGraphSort.cpp index aa00d6eadbf9..5b07f0e18c8a 100644 --- a/lld/ELF/CallGraphSort.cpp +++ b/lld/ELF/CallGraphSort.cpp @@ -114,8 +114,8 @@ CallGraphSort::CallGraphSort() { // Create the graph. for (std::pair &c : profile) { - const auto *fromSB = cast(c.first.first->repl); - const auto *toSB = cast(c.first.second->repl); + const auto *fromSB = cast(c.first.first); + const auto *toSB = cast(c.first.second); uint64_t weight = c.second; // Ignore edges between input sections belonging to different output diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp index 0ec748e8f990..ec63d2ef4d6f 100644 --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -550,6 +550,22 @@ template void ICF::run() { } }); + // Change Defined symbol's section field to the canonical one. + auto fold = [](Symbol *sym) { + if (auto *d = dyn_cast(sym)) + if (auto *sec = dyn_cast_or_null(d->section)) + if (sec->repl != d->section) { + d->section = sec->repl; + d->folded = true; + } + }; + for (Symbol *sym : symtab->symbols()) + fold(sym); + parallelForEach(objectFiles, [&](ELFFileBase *file) { + for (Symbol *sym : file->getLocalSymbols()) + fold(sym); + }); + // InputSectionDescription::sections is populated by processSectionCommands(). // ICF may fold some input sections assigned to output sections. Remove them. for (SectionCommand *cmd : script->sectionCommands) diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index df8cc5f221a0..e3871260fe5b 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -442,7 +442,7 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef rels) { p->setSymbolAndType(0, 0, false); continue; } - SectionBase *section = d->section->repl; + SectionBase *section = d->section; if (!section->isLive()) { p->setSymbolAndType(0, 0, false); continue; @@ -948,10 +948,10 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef rels) { // // If the referenced symbol is discarded (made Undefined), or the // section defining the referenced symbol is garbage collected, - // sym.getOutputSection() is nullptr. `ds->section->repl != ds->section` - // catches the ICF folded case. However, resolving a relocation in - // .debug_line to -1 would stop debugger users from setting breakpoints on - // the folded-in function, so exclude .debug_line. + // sym.getOutputSection() is nullptr. `ds->folded` catches the ICF folded + // case. However, resolving a relocation in .debug_line to -1 would stop + // debugger users from setting breakpoints on the folded-in function, so + // exclude .debug_line. // // For pre-DWARF-v5 .debug_loc and .debug_ranges, -1 is a reserved value // (base address selection entry), use 1 (which is used by GNU ld for @@ -960,8 +960,7 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef rels) { // TODO To reduce disruption, we use 0 instead of -1 as the tombstone // value. Enable -1 in a future release. auto *ds = dyn_cast(&sym); - if (!sym.getOutputSection() || - (ds && ds->section->repl != ds->section && !isDebugLine)) { + if (!sym.getOutputSection() || (ds && ds->folded && !isDebugLine)) { // If -z dead-reloc-in-nonalloc= is specified, respect it. const uint64_t value = tombstone ? SignExtend64(*tombstone) : (isDebugLocOrRanges ? 1 : 0); diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index 5a0dd78f0e55..5319830b5d80 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -52,13 +52,6 @@ class SectionBase { StringRef name; - // This pointer points to the "real" instance of this instance. - // Usually Repl == this. However, if ICF merges two sections, - // Repl pointer of one section points to another section. So, - // if you need to get a pointer to this instance, do not use - // this but instead this->Repl. - SectionBase *repl; - uint8_t sectionKind : 3; // The next two bit fields are only used by InputSectionBase, but we @@ -102,9 +95,9 @@ class SectionBase { constexpr SectionBase(Kind sectionKind, StringRef name, uint64_t flags, uint32_t entsize, uint32_t alignment, uint32_t type, uint32_t info, uint32_t link) - : name(name), repl(this), sectionKind(sectionKind), bss(false), - keepUnique(false), partition(0), alignment(alignment), flags(flags), - entsize(entsize), type(type), link(link), info(info) {} + : name(name), sectionKind(sectionKind), bss(false), keepUnique(false), + partition(0), alignment(alignment), flags(flags), entsize(entsize), + type(type), link(link), info(info) {} }; // This corresponds to a section of an input file. @@ -367,6 +360,10 @@ class InputSection : public InputSectionBase { template void relocateNonAlloc(uint8_t *buf, llvm::ArrayRef rels); + // Points to the canonical section. If ICF folds two sections, repl pointer of + // one section points to the other. + InputSection *repl = this; + // Used by ICF. uint32_t eqClass[2] = {0, 0}; diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index c4438be0cb59..38e0d84e6271 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -2004,8 +2004,8 @@ std::pair ThunkCreator::getThunk(InputSection *isec, // non-Thunk target, so we cannot fold offset + addend. if (auto *d = dyn_cast(rel.sym)) if (!d->isInPlt() && d->section) - thunkVec = &thunkedSymbolsBySectionAndAddend[{ - {d->section->repl, d->value}, keyAddend}]; + thunkVec = &thunkedSymbolsBySectionAndAddend[{{d->section, d->value}, + keyAddend}]; if (!thunkVec) thunkVec = &thunkedSymbols[{rel.sym, keyAddend}]; diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index f6f0ad0087d7..23f8d3ef545e 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -78,7 +78,6 @@ static uint64_t getSymVA(const Symbol &sym, int64_t addend) { return d.value; assert(isec != &InputSection::discarded); - isec = isec->repl; uint64_t offset = d.value; @@ -348,7 +347,7 @@ void elf::maybeWarnUnorderableSymbol(const Symbol *sym) { report(": unable to order absolute symbol: "); else if (d && isa(d->section)) report(": unable to order synthetic symbol: "); - else if (d && !d->section->repl->isLive()) + else if (d && !d->section->isLive()) report(": unable to order discarded symbol: "); } diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h index beb45ec14147..e5fe53c6c496 100644 --- a/lld/ELF/Symbols.h +++ b/lld/ELF/Symbols.h @@ -248,8 +248,9 @@ class Symbol { exportDynamic(isExportDynamic(k, visibility)), inDynamicList(false), canInline(false), referenced(false), traced(false), isInIplt(false), gotInIgot(false), isPreemptible(false), used(!config->gcSections), - needsTocRestore(false), scriptDefined(false), needsCopy(false), - needsGot(false), needsPlt(false), hasDirectReloc(false) {} + folded(false), needsTocRestore(false), scriptDefined(false), + needsCopy(false), needsGot(false), needsPlt(false), + hasDirectReloc(false) {} public: // True if this symbol is in the Iplt sub-section of the Plt and the Igot @@ -269,6 +270,9 @@ class Symbol { // which are referenced by relocations when -r or --emit-relocs is given. uint8_t used : 1; + // True if defined relative to a section discarded by ICF. + uint8_t folded : 1; + // True if a call to this symbol needs to be followed by a restore of the // PPC64 toc pointer. uint8_t needsTocRestore : 1; diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 872dd0e612af..e480118f5ae9 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -391,7 +391,7 @@ Defined *EhFrameSection::isFdeLive(EhSectionPiece &fde, ArrayRef rels) { // FDEs for garbage-collected or merged-by-ICF sections, or sections in // another partition, are dead. if (auto *d = dyn_cast(&b)) - if (d->section && d->section->partition == partition) + if (!d->folded && d->section && d->section->partition == partition) return d; return nullptr; } diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 221601d15c2f..497e56886b72 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -679,7 +679,6 @@ static bool includeInSymtab(const Symbol &b) { SectionBase *sec = d->section; if (!sec) return true; - sec = sec->repl; // Exclude symbols pointing to garbage-collected sections. if (isa(sec) && !sec->isLive()) @@ -1302,7 +1301,7 @@ static DenseMap buildSectionOrder() { if (auto *d = dyn_cast(&sym)) { if (auto *sec = dyn_cast_or_null(d->section)) { - int &priority = sectionOrder[cast(sec->repl)]; + int &priority = sectionOrder[cast(sec)]; priority = std::min(priority, ent.priority); } } @@ -1725,7 +1724,7 @@ static void fixSymbolsAfterShrinking() { if (!sec) return; - const InputSectionBase *inputSec = dyn_cast(sec->repl); + const InputSectionBase *inputSec = dyn_cast(sec); if (!inputSec || !inputSec->bytesDropped) return; From e694180033d1e9d6e215bbc2f956092d30c1e3cd Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 24 Dec 2021 12:28:59 -0800 Subject: [PATCH 232/293] [ELF] Optimize --wrap to only check non-local symbols --- lld/ELF/Driver.cpp | 9 ++++----- lld/ELF/InputFiles.h | 8 +++++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 7d01b7f33dec..e4d3e1d50b0f 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -2120,11 +2120,10 @@ static void redirectSymbols(ArrayRef wrapped) { return; // Update pointers in input files. - parallelForEach(objectFiles, [&](InputFile *file) { - MutableArrayRef syms = file->getMutableSymbols(); - for (size_t i = 0, e = syms.size(); i != e; ++i) - if (Symbol *s = map.lookup(syms[i])) - syms[i] = s; + parallelForEach(objectFiles, [&](ELFFileBase *file) { + for (Symbol *&sym : file->getMutableGlobalSymbols()) + if (Symbol *s = map.lookup(sym)) + sym = s; }); // Update pointers in the symbol table. diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index a031ef94c14d..6111df521840 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -91,9 +91,7 @@ class InputFile { // Returns object file symbols. It is a runtime error to call this // function on files of other types. - ArrayRef getSymbols() { return getMutableSymbols(); } - - MutableArrayRef getMutableSymbols() { + ArrayRef getSymbols() const { assert(fileKind == BinaryKind || fileKind == ObjKind || fileKind == BitcodeKind); return symbols; @@ -186,6 +184,10 @@ class ELFFileBase : public InputFile { ArrayRef getGlobalSymbols() { return llvm::makeArrayRef(symbols).slice(firstGlobal); } + MutableArrayRef getMutableGlobalSymbols() { + return llvm::makeMutableArrayRef(symbols.data(), symbols.size()) + .slice(firstGlobal); + } template typename ELFT::SymRange getELFSyms() const { return typename ELFT::SymRange( From 5e3403bd22039d043d3ffc8ab23255f8b9207b2b Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 24 Dec 2021 13:16:34 -0800 Subject: [PATCH 233/293] [ELF] parseLazy: skip local symbols --- lld/ELF/InputFiles.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index f2128c84f453..964898fb790e 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1800,7 +1800,7 @@ template void ObjFile::parseLazy() { // resolve() may trigger this->extract() if an existing symbol is an undefined // symbol. If that happens, this function has served its purpose, and we can // exit from the loop early. - for (Symbol *sym : symbols) + for (Symbol *sym : makeArrayRef(symbols).slice(firstGlobal)) if (sym) { sym->resolve(LazyObject{*this, sym->getName()}); if (!lazy) From a9e8b1ee7fd44b53c555a7823ae8fd1a8209c520 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Markus=20B=C3=B6ck?= Date: Fri, 24 Dec 2021 22:25:32 +0100 Subject: [PATCH 234/293] [mlir] Fully qualify default types used in parser code --- mlir/tools/mlir-tblgen/OpFormatGen.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index 2b5767e32567..adbd8407af99 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -1291,7 +1291,7 @@ void OperationFormat::genElementParser(Element *element, MethodBody &body, llvm::raw_string_ostream os(attrTypeStr); os << tgfmt(*typeBuilder, &attrTypeCtx); } else { - attrTypeStr = "Type{}"; + attrTypeStr = "::mlir::Type{}"; } if (var->attr.isOptional()) { body << formatv(optionalAttrParserCode, var->name, attrTypeStr); @@ -1375,7 +1375,7 @@ void OperationFormat::genElementParser(Element *element, MethodBody &body, listName); }) .Default([&](auto operand) { - body << formatv(typeParserCode, "Type", listName); + body << formatv(typeParserCode, "::mlir::Type", listName); }); } } else if (auto *dir = dyn_cast(element)) { @@ -1517,7 +1517,7 @@ void OperationFormat::genParserOperandTypeResolution( // once. Use llvm::concat to perform the merge. llvm::concat does not allow // the case of a single range, so guard it here. if (op.getNumOperands() > 1) { - body << "::llvm::concat("; + body << "::llvm::concat("; llvm::interleaveComma( llvm::seq(0, op.getNumOperands()), body, [&](int i) { body << "::llvm::ArrayRef<::mlir::Type>("; From d63016a86548e8231002a760bbe9eb817cd1eb00 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 24 Dec 2021 15:41:56 -0800 Subject: [PATCH 235/293] [CMake] Revert -Wl,-O3 This reverts 8cb7876cb366b5803ca35d92313ea00eadf29b78 and follow-ups. GNU ld/gold/ld.lld -O has nothing to do with any code related linker optimizations. It has very small benefit (save 144Ki (.hash, .gnu_hash) with GNU ld, save 0.7% .debug_str with gold/ld.lld) while it makes gold/ld.lld significantly slower when linking RelWithDebInfo clang (gold: 16.437 vs 19.488; ld.lld: 1.882 vs 4.881). --- llvm/cmake/modules/AddLLVM.cmake | 8 -------- 1 file changed, 8 deletions(-) diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index 1d97626d69cc..327b8e0ba2e7 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -254,14 +254,6 @@ function(add_link_opts target_name) # Don't use linker optimizations in debug builds since it slows down the # linker in a context where the optimizations are not important. if (NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG") - - # Pass -O3 to the linker. This enabled different optimizations on different - # linkers. - if(NOT (CMAKE_SYSTEM_NAME MATCHES "Darwin|SunOS|AIX|OS390" OR WIN32) AND in_distribution) - set_property(TARGET ${target_name} APPEND_STRING PROPERTY - LINK_FLAGS " -Wl,-O3") - endif() - if(NOT LLVM_NO_DEAD_STRIP) if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") # ld64's implementation of -dead_strip breaks tools that use plugins. From 2709fd1520bca98667db9c10b3156cac892949bc Mon Sep 17 00:00:00 2001 From: "William S. Moses" Date: Fri, 24 Dec 2021 16:51:54 -0500 Subject: [PATCH 236/293] [MLIR][LLVM] Add MemmoveOp to LLVM Dialect LLVM Dialect in MLIR doesn't have a memmove op. This adds one. Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D116274 --- mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 4 ++++ mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index eaa1c0b42bac..f671c3ca1dae 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -1447,6 +1447,10 @@ def LLVM_MemcpyInlineOp : LLVM_ZeroResultIntrOp<"memcpy.inline", [0, 1, 2]> { let arguments = (ins LLVM_Type:$dst, LLVM_Type:$src, LLVM_Type:$len, LLVM_Type:$isVolatile); } +def LLVM_MemmoveOp : LLVM_ZeroResultIntrOp<"memmove", [0, 1, 2]> { + let arguments = (ins LLVM_Type:$dst, LLVM_Type:$src, LLVM_Type:$len, + LLVM_Type:$isVolatile); +} def LLVM_MemsetOp : LLVM_ZeroResultIntrOp<"memset", [0, 2]> { let arguments = (ins LLVM_Type:$dst, LLVM_Type:$val, LLVM_Type:$len, diff --git a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir index ebb59aee530e..e1eff69a4251 100644 --- a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir @@ -350,6 +350,14 @@ llvm.func @memcpy_test(%arg0: i32, %arg2: !llvm.ptr, %arg3: !llvm.ptr) { llvm.return } +// CHECK-LABEL: @memmove_test +llvm.func @memmove_test(%arg0: i32, %arg2: !llvm.ptr, %arg3: !llvm.ptr) { + %i1 = llvm.mlir.constant(false) : i1 + // CHECK: call void @llvm.memmove.p0i8.p0i8.i32(i8* %{{.*}}, i8* %{{.*}}, i32 %{{.*}}, i1 {{.*}}) + "llvm.intr.memmove"(%arg2, %arg3, %arg0, %i1) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () + llvm.return +} + // CHECK-LABEL: @memset_test llvm.func @memset_test(%arg0: i32, %arg2: !llvm.ptr, %arg3: i8) { %i1 = llvm.mlir.constant(false) : i1 From b5a0f0f397c778cc7db71754c1b9c939f669568e Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 24 Dec 2021 17:10:38 -0800 Subject: [PATCH 237/293] [ELF] Add ELFFileBase::{elfShdrs,numELFShdrs} to avoid duplicate llvm::object::ELFFile::sections() This mainly avoid `relsOrRelas` cost in `InputSectionBase::relocate`. `llvm::object::ELFFile::sections()` has redundant and expensive checks. --- lld/ELF/DWARF.cpp | 3 +-- lld/ELF/Driver.cpp | 9 ++++----- lld/ELF/InputFiles.cpp | 9 +++++---- lld/ELF/InputFiles.h | 6 ++++++ lld/ELF/InputSection.cpp | 11 +++++------ lld/ELF/Relocations.cpp | 4 ++-- 6 files changed, 23 insertions(+), 19 deletions(-) diff --git a/lld/ELF/DWARF.cpp b/lld/ELF/DWARF.cpp index 4d84c09a0185..789820ba7a8e 100644 --- a/lld/ELF/DWARF.cpp +++ b/lld/ELF/DWARF.cpp @@ -27,8 +27,7 @@ using namespace lld::elf; template LLDDwarfObj::LLDDwarfObj(ObjFile *obj) { // Get the ELF sections to retrieve sh_flags. See the SHF_GROUP comment below. - ArrayRef objSections = - CHECK(obj->getObj().sections(), obj); + ArrayRef objSections = obj->template getELFShdrs(); assert(objSections.size() == obj->getSections().size()); for (auto it : llvm::enumerate(obj->getSections())) { InputSectionBase *sec = it.value(); diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index e4d3e1d50b0f..19266cb280b9 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -846,14 +846,13 @@ static bool processCallGraphRelocations(SmallVector &symbolIndices, ArrayRef &cgProfile, ObjFile *inputObj) { - symbolIndices.clear(); - const ELFFile &obj = inputObj->getObj(); - ArrayRef> objSections = - CHECK(obj.sections(), "could not retrieve object sections"); - if (inputObj->cgProfileSectionIndex == SHN_UNDEF) return false; + ArrayRef> objSections = + inputObj->template getELFShdrs(); + symbolIndices.clear(); + const ELFFile &obj = inputObj->getObj(); cgProfile = check(obj.template getSectionContentsAsArray( objSections[inputObj->cgProfileSectionIndex])); diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 964898fb790e..0badf2c55e5b 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -370,6 +370,8 @@ template void ELFFileBase::init() { abiVersion = obj.getHeader().e_ident[llvm::ELF::EI_ABIVERSION]; ArrayRef sections = CHECK(obj.sections(), this); + elfShdrs = sections.data(); + numELFShdrs = sections.size(); // Find a symbol table. bool isDSO = @@ -477,8 +479,7 @@ bool ObjFile::shouldMerge(const Elf_Shdr &sec, StringRef name) { // When the option is given, we link "just symbols". The section table is // initialized with null pointers. template void ObjFile::initializeJustSymbols() { - ArrayRef sections = CHECK(this->getObj().sections(), this); - this->sections.resize(sections.size()); + sections.resize(numELFShdrs); } // An ELF object file may contain a `.deplibs` section. If it exists, the @@ -544,7 +545,7 @@ template void ObjFile::initializeSections(bool ignoreComdats) { const ELFFile &obj = this->getObj(); - ArrayRef objSections = CHECK(obj.sections(), this); + ArrayRef objSections = getELFShdrs(); StringRef shstrtab = CHECK(obj.getSectionStringTable(objSections), this); uint64_t size = objSections.size(); this->sections.resize(size); @@ -1410,7 +1411,7 @@ template void SharedFile::parse() { ArrayRef dynamicTags; const ELFFile obj = this->getObj(); - ArrayRef sections = CHECK(obj.sections(), this); + ArrayRef sections = getELFShdrs(); const Elf_Shdr *versymSec = nullptr; const Elf_Shdr *verdefSec = nullptr; diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index 6111df521840..f58e76e48433 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -189,6 +189,10 @@ class ELFFileBase : public InputFile { .slice(firstGlobal); } + template typename ELFT::ShdrRange getELFShdrs() const { + return typename ELFT::ShdrRange( + reinterpret_cast(elfShdrs), numELFShdrs); + } template typename ELFT::SymRange getELFSyms() const { return typename ELFT::SymRange( reinterpret_cast(elfSyms), numELFSyms); @@ -201,7 +205,9 @@ class ELFFileBase : public InputFile { // Initializes this class's member variables. template void init(); + const void *elfShdrs = nullptr; const void *elfSyms = nullptr; + uint32_t numELFShdrs = 0; uint32_t numELFSyms = 0; uint32_t firstGlobal = 0; StringRef stringTable; diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index e3871260fe5b..33a42ff3f4a5 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -163,16 +163,16 @@ template RelsOrRelas InputSectionBase::relsOrRelas() const { if (relSecIdx == 0) return {}; RelsOrRelas ret; - const ELFFile obj = cast(file)->getObj(); - typename ELFT::Shdr shdr = cantFail(obj.sections())[relSecIdx]; + typename ELFT::Shdr shdr = + cast(file)->getELFShdrs()[relSecIdx]; if (shdr.sh_type == SHT_REL) { ret.rels = makeArrayRef(reinterpret_cast( - obj.base() + shdr.sh_offset), + file->mb.getBufferStart() + shdr.sh_offset), shdr.sh_size / sizeof(typename ELFT::Rel)); } else { assert(shdr.sh_type == SHT_RELA); ret.relas = makeArrayRef(reinterpret_cast( - obj.base() + shdr.sh_offset), + file->mb.getBufferStart() + shdr.sh_offset), shdr.sh_size / sizeof(typename ELFT::Rela)); } return ret; @@ -433,8 +433,7 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef rels) { sec->name != ".gcc_except_table" && sec->name != ".got2" && sec->name != ".toc") { uint32_t secIdx = cast(sym).discardedSecIdx; - Elf_Shdr_Impl sec = - CHECK(file->getObj().sections(), file)[secIdx]; + Elf_Shdr_Impl sec = file->template getELFShdrs()[secIdx]; warn("relocation refers to a discarded section: " + CHECK(file->getObj().getSectionName(sec), file) + "\n>>> referenced by " + getObjMsg(p->r_offset)); diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 38e0d84e6271..23612ec48ded 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -472,8 +472,8 @@ static std::string maybeReportDiscarded(Undefined &sym) { if (!file || !sym.discardedSecIdx || file->getSections()[sym.discardedSecIdx] != &InputSection::discarded) return ""; - ArrayRef> objSections = - CHECK(file->getObj().sections(), file); + ArrayRef objSections = + file->template getELFShdrs(); std::string msg; if (sym.type == ELF::STT_SECTION) { From 745420d3f4b050282e65cdf7893050fc90bf9c8a Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 24 Dec 2021 17:54:12 -0800 Subject: [PATCH 238/293] [ELF] Cache global variable `target` in relocate* This avoid repeated load of the unique_ptr in hot paths. --- lld/ELF/InputSection.cpp | 45 +++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 33a42ff3f4a5..e1ee3def89f3 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -395,6 +395,7 @@ InputSectionBase *InputSection::getRelocatedSection() const { // for each relocation. So we copy relocations one by one. template void InputSection::copyRelocations(uint8_t *buf, ArrayRef rels) { + const TargetInfo &target = *elf::target; InputSectionBase *sec = getRelocatedSection(); for (const RelTy &rel : rels) { @@ -450,10 +451,10 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef rels) { int64_t addend = getAddend(rel); const uint8_t *bufLoc = sec->data().begin() + rel.r_offset; if (!RelTy::IsRela) - addend = target->getImplicitAddend(bufLoc, type); + addend = target.getImplicitAddend(bufLoc, type); if (config->emachine == EM_MIPS && - target->getRelExpr(type, sym, bufLoc) == R_MIPS_GOTREL) { + target.getRelExpr(type, sym, bufLoc) == R_MIPS_GOTREL) { // Some MIPS relocations depend on "gp" value. By default, // this value has 0x7ff0 offset from a .got section. But // relocatable files produced by a compiler or a linker @@ -470,7 +471,7 @@ void InputSection::copyRelocations(uint8_t *buf, ArrayRef rels) { if (RelTy::IsRela) p->r_addend = sym.getVA(addend) - section->getOutputSection()->addr; - else if (config->relocatable && type != target->noneRel) + else if (config->relocatable && type != target.noneRel) sec->relocations.push_back({R_ABS, type, rel.r_offset, addend, &sym}); } else if (config->emachine == EM_PPC && type == R_PPC_PLTREL24 && p->r_addend >= 0x8000 && sec->file->ppc32Got2) { @@ -864,6 +865,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, template void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef rels) { const unsigned bits = sizeof(typename ELFT::uint) * 8; + const TargetInfo &target = *elf::target; const bool isDebug = isDebugSection(*this); const bool isDebugLocOrRanges = isDebug && (name == ".debug_loc" || name == ".debug_ranges"); @@ -889,16 +891,16 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef rels) { uint8_t *bufLoc = buf + offset; int64_t addend = getAddend(rel); if (!RelTy::IsRela) - addend += target->getImplicitAddend(bufLoc, type); + addend += target.getImplicitAddend(bufLoc, type); Symbol &sym = getFile()->getRelocTargetSym(rel); - RelExpr expr = target->getRelExpr(type, sym, bufLoc); + RelExpr expr = target.getRelExpr(type, sym, bufLoc); if (expr == R_NONE) continue; if (expr == R_SIZE) { - target->relocateNoSym(bufLoc, type, - SignExtend64(sym.getSize() + addend)); + target.relocateNoSym(bufLoc, type, + SignExtend64(sym.getSize() + addend)); continue; } @@ -922,14 +924,14 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef rels) { // address 0. For bug-compatibilty, we accept them with warnings. We // know Steel Bank Common Lisp as of 2018 have this bug. warn(msg); - target->relocateNoSym( + target.relocateNoSym( bufLoc, type, SignExtend64(sym.getVA(addend - offset - outSecOff))); continue; } if (tombstone || - (isDebug && (type == target->symbolicRel || expr == R_DTPREL))) { + (isDebug && (type == target.symbolicRel || expr == R_DTPREL))) { // Resolve relocations in .debug_* referencing (discarded symbols or ICF // folded section symbols) to a tombstone value. Resolving to addend is // unsatisfactory because the result address range may collide with a @@ -963,11 +965,11 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef rels) { // If -z dead-reloc-in-nonalloc= is specified, respect it. const uint64_t value = tombstone ? SignExtend64(*tombstone) : (isDebugLocOrRanges ? 1 : 0); - target->relocateNoSym(bufLoc, type, value); + target.relocateNoSym(bufLoc, type, value); continue; } } - target->relocateNoSym(bufLoc, type, SignExtend64(sym.getVA(addend))); + target.relocateNoSym(bufLoc, type, SignExtend64(sym.getVA(addend))); } } @@ -1013,6 +1015,7 @@ void InputSectionBase::relocate(uint8_t *buf, uint8_t *bufEnd) { void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { assert(flags & SHF_ALLOC); const unsigned bits = config->wordsize * 8; + const TargetInfo &target = *elf::target; uint64_t lastPPCRelaxedRelocOff = UINT64_C(-1); for (const Relocation &rel : relocations) { @@ -1031,7 +1034,7 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { switch (rel.expr) { case R_RELAX_GOT_PC: case R_RELAX_GOT_PC_NOPIC: - target->relaxGot(bufLoc, rel, targetVA); + target.relaxGot(bufLoc, rel, targetVA); break; case R_PPC64_RELAX_GOT_PC: { // The R_PPC64_PCREL_OPT relocation must appear immediately after @@ -1044,7 +1047,7 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { lastPPCRelaxedRelocOff = offset; if (rel.type == R_PPC64_PCREL_OPT && offset != lastPPCRelaxedRelocOff) break; - target->relaxGot(bufLoc, rel, targetVA); + target.relaxGot(bufLoc, rel, targetVA); break; } case R_PPC64_RELAX_TOC: @@ -1055,25 +1058,25 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { // opportunities but is safe. if (ppc64noTocRelax.count({rel.sym, rel.addend}) || !tryRelaxPPC64TocIndirection(rel, bufLoc)) - target->relocate(bufLoc, rel, targetVA); + target.relocate(bufLoc, rel, targetVA); break; case R_RELAX_TLS_IE_TO_LE: - target->relaxTlsIeToLe(bufLoc, rel, targetVA); + target.relaxTlsIeToLe(bufLoc, rel, targetVA); break; case R_RELAX_TLS_LD_TO_LE: case R_RELAX_TLS_LD_TO_LE_ABS: - target->relaxTlsLdToLe(bufLoc, rel, targetVA); + target.relaxTlsLdToLe(bufLoc, rel, targetVA); break; case R_RELAX_TLS_GD_TO_LE: case R_RELAX_TLS_GD_TO_LE_NEG: - target->relaxTlsGdToLe(bufLoc, rel, targetVA); + target.relaxTlsGdToLe(bufLoc, rel, targetVA); break; case R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC: case R_RELAX_TLS_GD_TO_IE: case R_RELAX_TLS_GD_TO_IE_ABS: case R_RELAX_TLS_GD_TO_IE_GOT_OFF: case R_RELAX_TLS_GD_TO_IE_GOTPLT: - target->relaxTlsGdToIe(bufLoc, rel, targetVA); + target.relaxTlsGdToIe(bufLoc, rel, targetVA); break; case R_PPC64_CALL: // If this is a call to __tls_get_addr, it may be part of a TLS @@ -1098,10 +1101,10 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { } write32(bufLoc + 4, 0xe8410018); // ld %r2, 24(%r1) } - target->relocate(bufLoc, rel, targetVA); + target.relocate(bufLoc, rel, targetVA); break; default: - target->relocate(bufLoc, rel, targetVA); + target.relocate(bufLoc, rel, targetVA); break; } } @@ -1114,7 +1117,7 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) { for (const JumpInstrMod &jumpMod : jumpInstrMods) { uint64_t offset = jumpMod.offset; uint8_t *bufLoc = buf + offset; - target->applyJumpInstrMod(bufLoc, jumpMod.original, jumpMod.size); + target.applyJumpInstrMod(bufLoc, jumpMod.original, jumpMod.size); } } } From 40fae4d8fcbd6224f16fdf3ba84a0d89b713e7d5 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 24 Dec 2021 19:01:50 -0800 Subject: [PATCH 239/293] [ELF] Optimize replaceCommonSymbols This decreases the 0.2% time (no debug info) to nearly no. --- lld/ELF/Driver.cpp | 22 +++++++++++++--------- lld/ELF/InputFiles.cpp | 1 + lld/ELF/InputFiles.h | 5 ++++- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 19266cb280b9..6b689f50cce7 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1821,17 +1821,21 @@ static void writeDependencyFile() { // symbols of type CommonSymbol. static void replaceCommonSymbols() { llvm::TimeTraceScope timeScope("Replace common symbols"); - for (Symbol *sym : symtab->symbols()) { - auto *s = dyn_cast(sym); - if (!s) + for (ELFFileBase *file : objectFiles) { + if (!file->hasCommonSyms) continue; + for (Symbol *sym : file->getGlobalSymbols()) { + auto *s = dyn_cast(sym); + if (!s) + continue; - auto *bss = make("COMMON", s->size, s->alignment); - bss->file = s->file; - bss->markDead(); - inputSections.push_back(bss); - s->replace(Defined{s->file, s->getName(), s->binding, s->stOther, s->type, - /*value=*/0, s->size, bss}); + auto *bss = make("COMMON", s->size, s->alignment); + bss->file = s->file; + bss->markDead(); + inputSections.push_back(bss); + s->replace(Defined{s->file, s->getName(), s->binding, s->stOther, s->type, + /*value=*/0, s->size, bss}); + } } } diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 0badf2c55e5b..e321b0d82920 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -1123,6 +1123,7 @@ template void ObjFile::initializeSymbols() { if (value == 0 || value >= UINT32_MAX) fatal(toString(this) + ": common symbol '" + name + "' has invalid alignment: " + Twine(value)); + hasCommonSyms = true; sym->resolve( CommonSymbol{this, name, binding, stOther, type, value, size}); continue; diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index f58e76e48433..d622390fcade 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -205,12 +205,15 @@ class ELFFileBase : public InputFile { // Initializes this class's member variables. template void init(); + StringRef stringTable; const void *elfShdrs = nullptr; const void *elfSyms = nullptr; uint32_t numELFShdrs = 0; uint32_t numELFSyms = 0; uint32_t firstGlobal = 0; - StringRef stringTable; + +public: + bool hasCommonSyms = false; }; // .o file. From a8cbddc99411f9734958f974263aed7bf0113ec9 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 24 Dec 2021 19:51:10 -0800 Subject: [PATCH 240/293] [CodeGen] Fix a memory leak --- polly/lib/CodeGen/IslAst.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polly/lib/CodeGen/IslAst.cpp b/polly/lib/CodeGen/IslAst.cpp index ab0bcdaf8ef5..6497275df610 100644 --- a/polly/lib/CodeGen/IslAst.cpp +++ b/polly/lib/CodeGen/IslAst.cpp @@ -671,7 +671,7 @@ IslAstInfo IslAstAnalysis::run(Scop &S, ScopAnalysisManager &SAM, return SAM.getResult(S, SAR).getDependences(Lvl); }; - return std::move(*runIslAst(S, GetDeps).release()); + return std::move(*runIslAst(S, GetDeps)); } static __isl_give isl_printer *cbPrintUser(__isl_take isl_printer *P, From 9c0a4227a9ca7a2e4dc63ae27ee3868efdedb7ea Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 24 Dec 2021 20:57:40 -0800 Subject: [PATCH 241/293] Use Optional::getValueOr (NFC) --- clang/include/clang/APINotes/Types.h | 10 +++++----- clang/include/clang/AST/AbstractBasicReader.h | 2 +- clang/include/clang/AST/DeclTemplate.h | 11 +++++------ clang/lib/ASTMatchers/Dynamic/Parser.cpp | 2 +- clang/lib/CodeGen/CGObjC.cpp | 8 ++++---- lld/ELF/LinkerScript.cpp | 2 +- lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h | 2 +- .../ctf/CommandObjectThreadTraceExportCTF.cpp | 3 +-- llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp | 2 +- llvm/lib/IR/Instructions.cpp | 2 +- llvm/lib/MC/MachObjectWriter.cpp | 4 ++-- 11 files changed, 23 insertions(+), 25 deletions(-) diff --git a/clang/include/clang/APINotes/Types.h b/clang/include/clang/APINotes/Types.h index d9bf2f07291f..0d97e9ad8623 100644 --- a/clang/include/clang/APINotes/Types.h +++ b/clang/include/clang/APINotes/Types.h @@ -240,7 +240,7 @@ class ObjCContextInfo : public CommonTypeInfo { } void setSwiftImportAsNonGeneric(llvm::Optional Value) { SwiftImportAsNonGenericSpecified = Value.hasValue(); - SwiftImportAsNonGeneric = Value.hasValue() ? *Value : false; + SwiftImportAsNonGeneric = Value.getValueOr(false); } llvm::Optional getSwiftObjCMembers() const { @@ -249,7 +249,7 @@ class ObjCContextInfo : public CommonTypeInfo { } void setSwiftObjCMembers(llvm::Optional Value) { SwiftObjCMembersSpecified = Value.hasValue(); - SwiftObjCMembers = Value.hasValue() ? *Value : false; + SwiftObjCMembers = Value.getValueOr(false); } /// Strip off any information within the class information structure that is @@ -368,7 +368,7 @@ class ObjCPropertyInfo : public VariableInfo { } void setSwiftImportAsAccessors(llvm::Optional Value) { SwiftImportAsAccessorsSpecified = Value.hasValue(); - SwiftImportAsAccessors = Value.hasValue() ? *Value : false; + SwiftImportAsAccessors = Value.getValueOr(false); } friend bool operator==(const ObjCPropertyInfo &, const ObjCPropertyInfo &); @@ -433,7 +433,7 @@ class ParamInfo : public VariableInfo { } void setNoEscape(llvm::Optional Value) { NoEscapeSpecified = Value.hasValue(); - NoEscape = Value.hasValue() ? *Value : false; + NoEscape = Value.getValueOr(false); } llvm::Optional getRetainCountConvention() const { @@ -671,7 +671,7 @@ class TagInfo : public CommonTypeInfo { } void setFlagEnum(llvm::Optional Value) { HasFlagEnum = Value.hasValue(); - IsFlagEnum = Value.hasValue() ? *Value : false; + IsFlagEnum = Value.getValueOr(false); } TagInfo &operator|=(const TagInfo &RHS) { diff --git a/clang/include/clang/AST/AbstractBasicReader.h b/clang/include/clang/AST/AbstractBasicReader.h index 5505d661b44e..442039044cfe 100644 --- a/clang/include/clang/AST/AbstractBasicReader.h +++ b/clang/include/clang/AST/AbstractBasicReader.h @@ -21,7 +21,7 @@ inline T makeNullableFromOptional(const Optional &value) { template inline T *makePointerFromOptional(Optional value) { - return (value ? *value : nullptr); + return value.getValueOr(nullptr); } // PropertyReader is a class concept that requires the following method: diff --git a/clang/include/clang/AST/DeclTemplate.h b/clang/include/clang/AST/DeclTemplate.h index d33babef958e..f7a2e3146d06 100644 --- a/clang/include/clang/AST/DeclTemplate.h +++ b/clang/include/clang/AST/DeclTemplate.h @@ -1211,13 +1211,12 @@ class TemplateTypeParmDecl final : public TypeDecl, DefArgStorage DefaultArgument; TemplateTypeParmDecl(DeclContext *DC, SourceLocation KeyLoc, - SourceLocation IdLoc, IdentifierInfo *Id, - bool Typename, bool HasTypeConstraint, - Optional NumExpanded) + SourceLocation IdLoc, IdentifierInfo *Id, bool Typename, + bool HasTypeConstraint, Optional NumExpanded) : TypeDecl(TemplateTypeParm, DC, IdLoc, Id, KeyLoc), Typename(Typename), - HasTypeConstraint(HasTypeConstraint), TypeConstraintInitialized(false), - ExpandedParameterPack(NumExpanded), - NumExpanded(NumExpanded ? *NumExpanded : 0) {} + HasTypeConstraint(HasTypeConstraint), TypeConstraintInitialized(false), + ExpandedParameterPack(NumExpanded), + NumExpanded(NumExpanded.getValueOr(0)) {} public: static TemplateTypeParmDecl *Create(const ASTContext &C, DeclContext *DC, diff --git a/clang/lib/ASTMatchers/Dynamic/Parser.cpp b/clang/lib/ASTMatchers/Dynamic/Parser.cpp index c6a77bb6c2e0..cab1476acf94 100644 --- a/clang/lib/ASTMatchers/Dynamic/Parser.cpp +++ b/clang/lib/ASTMatchers/Dynamic/Parser.cpp @@ -645,7 +645,7 @@ bool Parser::parseMatcherExpressionImpl(const TokenInfo &NameToken, Tokenizer->SkipNewlines(); { - ScopedContextEntry SCE(this, Ctor ? *Ctor : nullptr); + ScopedContextEntry SCE(this, Ctor.getValueOr(nullptr)); while (Tokenizer->nextTokenKind() != TokenInfo::TK_Eof) { if (Tokenizer->nextTokenKind() == TokenInfo::TK_CloseParen) { diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp index ac26f0d4232c..b5bcf157036d 100644 --- a/clang/lib/CodeGen/CGObjC.cpp +++ b/clang/lib/CodeGen/CGObjC.cpp @@ -3915,8 +3915,8 @@ static llvm::Value *emitIsPlatformVersionAtLeast(CodeGenFunction &CGF, Args.push_back( llvm::ConstantInt::get(CGM.Int32Ty, getBaseMachOPlatformID(TT))); Args.push_back(llvm::ConstantInt::get(CGM.Int32Ty, Version.getMajor())); - Args.push_back(llvm::ConstantInt::get(CGM.Int32Ty, Min ? *Min : 0)); - Args.push_back(llvm::ConstantInt::get(CGM.Int32Ty, SMin ? *SMin : 0)); + Args.push_back(llvm::ConstantInt::get(CGM.Int32Ty, Min.getValueOr(0))); + Args.push_back(llvm::ConstantInt::get(CGM.Int32Ty, SMin.getValueOr(0))); }; assert(!Version.empty() && "unexpected empty version"); @@ -3952,8 +3952,8 @@ CodeGenFunction::EmitBuiltinAvailable(const VersionTuple &Version) { Optional Min = Version.getMinor(), SMin = Version.getSubminor(); llvm::Value *Args[] = { llvm::ConstantInt::get(CGM.Int32Ty, Version.getMajor()), - llvm::ConstantInt::get(CGM.Int32Ty, Min ? *Min : 0), - llvm::ConstantInt::get(CGM.Int32Ty, SMin ? *SMin : 0), + llvm::ConstantInt::get(CGM.Int32Ty, Min.getValueOr(0)), + llvm::ConstantInt::get(CGM.Int32Ty, SMin.getValueOr(0)) }; llvm::Value *CallRes = diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp index 999bb94d6416..e8f2ce4fdf1f 100644 --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -1338,7 +1338,7 @@ std::vector LinkerScript::createPhdrs() { // Process PHDRS and FILEHDR keywords because they are not // real output sections and cannot be added in the following loop. for (const PhdrsCommand &cmd : phdrsCommands) { - PhdrEntry *phdr = make(cmd.type, cmd.flags ? *cmd.flags : PF_R); + PhdrEntry *phdr = make(cmd.type, cmd.flags.getValueOr(PF_R)); if (cmd.hasFilehdr) phdr->add(Out::elfHeader); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h index cece29dcf9ac..71d4c1e6c52f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFUnit.h @@ -153,7 +153,7 @@ class DWARFUnit : public lldb_private::UserID { const DWARFAbbreviationDeclarationSet *GetAbbreviations() const; dw_offset_t GetAbbrevOffset() const; uint8_t GetAddressByteSize() const { return m_header.GetAddressByteSize(); } - dw_addr_t GetAddrBase() const { return m_addr_base ? *m_addr_base : 0; } + dw_addr_t GetAddrBase() const { return m_addr_base.getValueOr(0); } dw_addr_t GetBaseAddress() const { return m_base_addr; } dw_offset_t GetLineTableOffset(); dw_addr_t GetRangesBase() const { return m_ranges_base; } diff --git a/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp b/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp index 919cdf46a5c0..a72e46a0b703 100644 --- a/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp +++ b/lldb/source/Plugins/TraceExporter/ctf/CommandObjectThreadTraceExportCTF.cpp @@ -73,8 +73,7 @@ bool CommandObjectThreadTraceExportCTF::DoExecute(Args &command, if (thread == nullptr) { const uint32_t num_threads = process->GetThreadList().GetSize(); - size_t tid = m_options.m_thread_index ? *m_options.m_thread_index - : LLDB_INVALID_THREAD_ID; + size_t tid = m_options.m_thread_index.getValueOr(LLDB_INVALID_THREAD_ID); result.AppendErrorWithFormatv( "Thread index {0} is out of range (valid values are 1 - {1}).\n", tid, num_threads); diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp index 9b7ffed2ca67..eed0a60ec75e 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -607,7 +607,7 @@ bool DWARFUnit::parseDWO() { DWO->setAddrOffsetSection(AddrOffsetSection, *AddrOffsetSectionBase); if (getVersion() == 4) { auto DWORangesBase = UnitDie.getRangesBaseAttribute(); - DWO->setRangesSection(RangeSection, DWORangesBase ? *DWORangesBase : 0); + DWO->setRangesSection(RangeSection, DWORangesBase.getValueOr(0)); } return true; diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index ce349f66c916..fb6105712d1a 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -4407,7 +4407,7 @@ void SwitchInstProfUpdateWrapper::addCase( Weights.getValue()[SI.getNumSuccessors() - 1] = *W; } else if (Weights) { Changed = true; - Weights.getValue().push_back(W ? *W : 0); + Weights.getValue().push_back(W.getValueOr(0)); } if (Weights) assert(SI.getNumSuccessors() == Weights->size() && diff --git a/llvm/lib/MC/MachObjectWriter.cpp b/llvm/lib/MC/MachObjectWriter.cpp index d18579ad4bfc..16941b1cb727 100644 --- a/llvm/lib/MC/MachObjectWriter.cpp +++ b/llvm/lib/MC/MachObjectWriter.cpp @@ -877,8 +877,8 @@ uint64_t MachObjectWriter::writeObject(MCAssembler &Asm, [&](const MCAssembler::VersionInfoType &VersionInfo) { auto EncodeVersion = [](VersionTuple V) -> uint32_t { assert(!V.empty() && "empty version"); - unsigned Update = V.getSubminor() ? *V.getSubminor() : 0; - unsigned Minor = V.getMinor() ? *V.getMinor() : 0; + unsigned Update = V.getSubminor().getValueOr(0); + unsigned Minor = V.getMinor().getValueOr(0); assert(Update < 256 && "unencodable update target version"); assert(Minor < 256 && "unencodable minor target version"); assert(V.getMajor() < 65536 && "unencodable major target version"); From 62e48ed10f9d2328331378f7c070487e58346a7e Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 24 Dec 2021 21:22:27 -0800 Subject: [PATCH 242/293] Use isa instead of dyn_cast (NFC) --- clang-tools-extra/clang-doc/Mapper.cpp | 2 +- clang-tools-extra/clang-doc/Serialize.cpp | 6 +++--- clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp | 2 +- clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h | 2 +- clang/lib/Sema/SemaExprCXX.cpp | 2 +- lld/COFF/Driver.cpp | 2 +- lld/ELF/Symbols.cpp | 4 ++-- .../Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp | 2 +- .../Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp | 2 +- lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp | 2 +- lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp | 6 +++--- lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp | 2 +- 12 files changed, 17 insertions(+), 17 deletions(-) diff --git a/clang-tools-extra/clang-doc/Mapper.cpp b/clang-tools-extra/clang-doc/Mapper.cpp index 790f11bb69c5..de7e4c341086 100644 --- a/clang-tools-extra/clang-doc/Mapper.cpp +++ b/clang-tools-extra/clang-doc/Mapper.cpp @@ -68,7 +68,7 @@ bool MapASTVisitor::VisitCXXMethodDecl(const CXXMethodDecl *D) { bool MapASTVisitor::VisitFunctionDecl(const FunctionDecl *D) { // Don't visit CXXMethodDecls twice - if (dyn_cast(D)) + if (isa(D)) return true; return mapDecl(D); } diff --git a/clang-tools-extra/clang-doc/Serialize.cpp b/clang-tools-extra/clang-doc/Serialize.cpp index e132c56cb000..29762b6b54b1 100644 --- a/clang-tools-extra/clang-doc/Serialize.cpp +++ b/clang-tools-extra/clang-doc/Serialize.cpp @@ -382,7 +382,7 @@ populateParentNamespaces(llvm::SmallVector &Namespaces, // corresponds to a Record and if it doesn't have any namespace (because this // means it's in the global namespace). Also if its outermost namespace is a // record because that record matches the previous condition mentioned. - if ((Namespaces.empty() && dyn_cast(D)) || + if ((Namespaces.empty() && isa(D)) || (!Namespaces.empty() && Namespaces.back().RefType == InfoType::IT_record)) Namespaces.emplace_back(SymbolID(), "GlobalNamespace", InfoType::IT_namespace); @@ -419,10 +419,10 @@ static void populateFunctionInfo(FunctionInfo &I, const FunctionDecl *D, populateSymbolInfo(I, D, FC, LineNumber, Filename, IsFileInRootDir, IsInAnonymousNamespace); if (const auto *T = getDeclForType(D->getReturnType())) { - if (dyn_cast(T)) + if (isa(T)) I.ReturnType = TypeInfo(getUSRForDecl(T), T->getNameAsString(), InfoType::IT_enum, getInfoRelativePath(T)); - else if (dyn_cast(T)) + else if (isa(T)) I.ReturnType = TypeInfo(getUSRForDecl(T), T->getNameAsString(), InfoType::IT_record, getInfoRelativePath(T)); } else { diff --git a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp index 432d929057d1..b0b882be1a6c 100644 --- a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp @@ -829,7 +829,7 @@ bool LoopConvertCheck::isConvertible(ASTContext *Context, } else if (FixerKind == LFK_PseudoArray) { // This call is required to obtain the container. const auto *EndCall = Nodes.getNodeAs(EndCallName); - if (!EndCall || !dyn_cast(EndCall->getCallee())) + if (!EndCall || !isa(EndCall->getCallee())) return false; } return true; diff --git a/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h b/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h index 4a58fe870944..a0ae44131b45 100644 --- a/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h +++ b/clang/include/clang/Analysis/Analyses/ThreadSafetyCommon.h @@ -155,7 +155,7 @@ class CFGWalker { return false; // Ignore anonymous functions. - if (!dyn_cast_or_null(AC.getDecl())) + if (!isa_and_nonnull(AC.getDecl())) return false; SortedGraph = AC.getAnalysis(); diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index d25f329f85e4..54f0242d2ca1 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -1346,7 +1346,7 @@ bool Sema::CheckCXXThisCapture(SourceLocation Loc, const bool Explicit, // implicitly capturing the *enclosing object* by reference (see loop // above)). assert((!ByCopy || - dyn_cast(FunctionScopes[MaxFunctionScopesIndex])) && + isa(FunctionScopes[MaxFunctionScopesIndex])) && "Only a lambda can capture the enclosing object (referred to by " "*this) by copy"); QualType ThisTy = getCurrentThisType(); diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index f1b0c5c0707d..07b60673577e 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -2085,7 +2085,7 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { if (args.hasArg(OPT_include_optional)) { // Handle /includeoptional for (auto *arg : args.filtered(OPT_include_optional)) - if (dyn_cast_or_null(ctx.symtab.find(arg->getValue()))) + if (isa_and_nonnull(ctx.symtab.find(arg->getValue()))) addUndefined(arg->getValue()); while (run()); } diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp index 23f8d3ef545e..20301497a059 100644 --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -550,7 +550,7 @@ void Symbol::resolveUndefined(const Undefined &other) { } // Undefined symbols in a SharedFile do not change the binding. - if (dyn_cast_or_null(other.file)) + if (isa_and_nonnull(other.file)) return; if (isUndefined() || isShared()) { @@ -608,7 +608,7 @@ int Symbol::compare(const Symbol *other) const { auto *oldSym = cast(this); auto *newSym = cast(other); - if (dyn_cast_or_null(other->file)) + if (isa_and_nonnull(other->file)) return 0; if (!oldSym->section && !newSym->section && oldSym->value == newSym->value && diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp index 85e2fcfc838c..7844f27139cf 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ASTResultSynthesizer.cpp @@ -211,7 +211,7 @@ bool ASTResultSynthesizer::SynthesizeBodyResult(CompoundStmt *Body, Stmt **last_stmt_ptr = Body->body_end() - 1; Stmt *last_stmt = *last_stmt_ptr; - while (dyn_cast(last_stmt)) { + while (isa(last_stmt)) { if (last_stmt_ptr != Body->body_begin()) { last_stmt_ptr--; last_stmt = *last_stmt_ptr; diff --git a/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp b/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp index a6e36d81b950..8b132b54b7e6 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp @@ -353,7 +353,7 @@ class ValidPointerChecker : public Instrumenter { } bool InspectInstruction(llvm::Instruction &i) override { - if (dyn_cast(&i) || dyn_cast(&i)) + if (isa(&i) || isa(&i)) RegisterInstruction(i); return true; diff --git a/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp b/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp index f80dc2b14467..e0e41925f7ef 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/IRForTarget.cpp @@ -1255,7 +1255,7 @@ bool IRForTarget::MaybeHandleVariable(Value *llvm_value_ptr) { m_decl_map->AddValueToStruct(named_decl, lldb_private::ConstString(name), llvm_value_ptr, *value_size, value_alignment); - } else if (dyn_cast(llvm_value_ptr)) { + } else if (isa(llvm_value_ptr)) { LLDB_LOG(log, "Function pointers aren't handled right now"); return false; diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp index 7ce2f1451580..9473befa6cc3 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp @@ -1280,15 +1280,15 @@ clang::QualType PdbAstBuilder::CreateFunctionType( } static bool isTagDecl(clang::DeclContext &context) { - return !!llvm::dyn_cast(&context); + return llvm::isa(&context); } static bool isFunctionDecl(clang::DeclContext &context) { - return !!llvm::dyn_cast(&context); + return llvm::isa(&context); } static bool isBlockDecl(clang::DeclContext &context) { - return !!llvm::dyn_cast(&context); + return llvm::isa(&context); } void PdbAstBuilder::ParseAllNamespacesPlusChildrenOf( diff --git a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp index db0ae241be7e..c0547936b666 100644 --- a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp @@ -402,7 +402,7 @@ static size_t ParseFunctionBlocksForPDBSymbol( block = parent_block; else break; - } else if (llvm::dyn_cast(pdb_symbol)) { + } else if (llvm::isa(pdb_symbol)) { auto uid = pdb_symbol->getSymIndexId(); if (parent_block->FindBlockByID(uid)) break; From 76f0f1cc5c52359da5dd6dffff0c444400a1bca1 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 24 Dec 2021 21:43:06 -0800 Subject: [PATCH 243/293] Use {DenseSet,SetVector,SmallPtrSet}::contains (NFC) --- clang/lib/AST/ASTContext.cpp | 2 +- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 2 +- clang/lib/Frontend/CompilerInstance.cpp | 12 ++++++------ .../ExpressionParser/Clang/ClangASTImporter.cpp | 2 +- .../Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | 2 +- .../Plugins/Process/Linux/NativeProcessLinux.cpp | 2 +- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 ++-- polly/lib/Support/SCEVValidator.cpp | 2 +- polly/lib/Transform/ScopInliner.cpp | 2 +- 9 files changed, 15 insertions(+), 15 deletions(-) diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 58bd7b6a4a8c..008b703d4c1a 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -9272,7 +9272,7 @@ void getIntersectionOfProtocols(ASTContext &Context, // Remove any implied protocols from the list of inherited protocols. if (!ImpliedProtocols.empty()) { llvm::erase_if(IntersectionSet, [&](ObjCProtocolDecl *proto) -> bool { - return ImpliedProtocols.count(proto) > 0; + return ImpliedProtocols.contains(proto); }); } diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 0c71fee14025..e35c15421520 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -12788,7 +12788,7 @@ void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); for (const auto &Pair : It->DeclToUniqueName) { const auto *VD = cast(Pair.first->getCanonicalDecl()); - if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) + if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) continue; auto I = LPCI->getSecond().find(Pair.first); assert(I != LPCI->getSecond().end() && diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp index 1432607204bd..31e7ea3d243d 100644 --- a/clang/lib/Frontend/CompilerInstance.cpp +++ b/clang/lib/Frontend/CompilerInstance.cpp @@ -1154,12 +1154,12 @@ compileModuleImpl(CompilerInstance &ImportingInstance, SourceLocation ImportLoc, // Remove any macro definitions that are explicitly ignored by the module. // They aren't supposed to affect how the module is built anyway. HeaderSearchOptions &HSOpts = Invocation->getHeaderSearchOpts(); - llvm::erase_if( - PPOpts.Macros, [&HSOpts](const std::pair &def) { - StringRef MacroDef = def.first; - return HSOpts.ModulesIgnoreMacros.count( - llvm::CachedHashString(MacroDef.split('=').first)) > 0; - }); + llvm::erase_if(PPOpts.Macros, + [&HSOpts](const std::pair &def) { + StringRef MacroDef = def.first; + return HSOpts.ModulesIgnoreMacros.contains( + llvm::CachedHashString(MacroDef.split('=').first)); + }); // If the original compiler invocation had -fmodule-name, pass it through. Invocation->getLangOpts()->ModuleName = diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp index 80469e292580..719b35689feb 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp @@ -293,7 +293,7 @@ class CompleteTagDeclsScope : public ClangASTImporter::NewDeclListener { NamedDecl *to_named_decl = dyn_cast(to); // Check if we already completed this type. - if (m_decls_already_completed.count(to_named_decl) != 0) + if (m_decls_already_completed.contains(to_named_decl)) return; // Queue this type to be completed. m_decls_to_complete.insert(to_named_decl); diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 7668b68650b4..e72d55dd2aba 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -4647,7 +4647,7 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) { // Add symbols from the trie to the symbol table. for (auto &e : external_sym_trie_entries) { - if (symbols_added.find(e.entry.address) != symbols_added.end()) + if (symbols_added.contains(e.entry.address)) continue; // Find the section that this trie address is in, use that to annotate diff --git a/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp b/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp index 279ffb1f2a9c..d7651ce71da0 100644 --- a/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp +++ b/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp @@ -874,7 +874,7 @@ void NativeProcessLinux::MonitorSignal(const siginfo_t &info, // Check if debugger should stop at this signal or just ignore it and resume // the inferior. - if (m_signals_to_ignore.find(signo) != m_signals_to_ignore.end()) { + if (m_signals_to_ignore.contains(signo)) { ResumeThread(thread, thread.GetState(), signo); return; } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b3ba2aa13d37..4747f34fcc62 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2963,7 +2963,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, // poison-generating flags (nuw/nsw, exact, inbounds, etc.). The scalarized // instruction could feed a poison value to the base address of the widen // load/store. - if (State.MayGeneratePoisonRecipes.count(RepRecipe) > 0) + if (State.MayGeneratePoisonRecipes.contains(RepRecipe)) Cloned->dropPoisonGeneratingFlags(); State.Builder.SetInsertPoint(Builder.GetInsertBlock(), @@ -9533,7 +9533,7 @@ void VPWidenRecipe::execute(VPTransformState &State) { // exact, etc.). The control flow has been linearized and the // instruction is no longer guarded by the predicate, which could make // the flag properties to no longer hold. - if (State.MayGeneratePoisonRecipes.count(this) > 0) + if (State.MayGeneratePoisonRecipes.contains(this)) VecOp->dropPoisonGeneratingFlags(); } diff --git a/polly/lib/Support/SCEVValidator.cpp b/polly/lib/Support/SCEVValidator.cpp index 002674375df0..8f175596d711 100644 --- a/polly/lib/Support/SCEVValidator.cpp +++ b/polly/lib/Support/SCEVValidator.cpp @@ -472,7 +472,7 @@ class SCEVInRegionDependences { // are strictly not necessary by tracking the invariant load as a // scalar. LoadInst *LI = dyn_cast(Inst); - if (LI && ILS.count(LI) > 0) + if (LI && ILS.contains(LI)) return false; } diff --git a/polly/lib/Transform/ScopInliner.cpp b/polly/lib/Transform/ScopInliner.cpp index ed54731c6b2a..b35d3518e72d 100644 --- a/polly/lib/Transform/ScopInliner.cpp +++ b/polly/lib/Transform/ScopInliner.cpp @@ -84,7 +84,7 @@ class ScopInliner : public CallGraphSCCPass { ScopDetection &SD = FAM.getResult(*F); const bool HasScopAsTopLevelRegion = - SD.ValidRegions.count(RI.getTopLevelRegion()) > 0; + SD.ValidRegions.contains(RI.getTopLevelRegion()); bool Changed = false; if (HasScopAsTopLevelRegion) { From 3cfe375ae43139839af01e29c3ec03654e98186b Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 24 Dec 2021 22:05:34 -0800 Subject: [PATCH 244/293] Use StringRef::contains (NFC) --- clang-tools-extra/clang-tidy/android/CloexecCheck.cpp | 2 +- .../clang-tidy/bugprone/ReservedIdentifierCheck.cpp | 2 +- .../clang-tidy/modernize/RawStringLiteralCheck.cpp | 2 +- .../clang-tidy/readability/BracesAroundStatementsCheck.cpp | 2 +- .../clang-tidy/readability/NamedParameterCheck.cpp | 2 +- clang/tools/driver/driver.cpp | 2 +- llvm/lib/Support/RISCVISAInfo.cpp | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/clang-tools-extra/clang-tidy/android/CloexecCheck.cpp b/clang-tools-extra/clang-tidy/android/CloexecCheck.cpp index 64c8797934d2..d373877713f1 100644 --- a/clang-tools-extra/clang-tidy/android/CloexecCheck.cpp +++ b/clang-tools-extra/clang-tidy/android/CloexecCheck.cpp @@ -87,7 +87,7 @@ void CloexecCheck::insertStringFlag( // Check if the may be in the mode string. const auto *ModeStr = dyn_cast(ModeArg->IgnoreParenCasts()); - if (!ModeStr || (ModeStr->getString().find(Mode) != StringRef::npos)) + if (!ModeStr || ModeStr->getString().contains(Mode)) return; std::string ReplacementText = buildFixMsgForStringFlag( diff --git a/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp index 8da046955425..4bf841648f94 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp @@ -64,7 +64,7 @@ static std::string collapseConsecutive(StringRef Str, char C) { static bool hasReservedDoubleUnderscore(StringRef Name, const LangOptions &LangOpts) { if (LangOpts.CPlusPlus) - return Name.find("__") != StringRef::npos; + return Name.contains("__"); return Name.startswith("__"); } diff --git a/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp index 26b1d8ecdc31..40dda98b1e49 100644 --- a/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/RawStringLiteralCheck.cpp @@ -25,7 +25,7 @@ bool containsEscapes(StringRef HayStack, StringRef Escapes) { return false; while (BackSlash != StringRef::npos) { - if (Escapes.find(HayStack[BackSlash + 1]) == StringRef::npos) + if (!Escapes.contains(HayStack[BackSlash + 1])) return false; BackSlash = HayStack.find('\\', BackSlash + 2); } diff --git a/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp b/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp index 7dc519c15282..07e962a07e84 100644 --- a/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp @@ -81,7 +81,7 @@ static SourceLocation findEndLocation(const Stmt &S, const SourceManager &SM, SourceRange TokRange(Loc, TokEndLoc); StringRef Comment = Lexer::getSourceText( CharSourceRange::getTokenRange(TokRange), SM, Context->getLangOpts()); - if (Comment.startswith("/*") && Comment.find('\n') != StringRef::npos) { + if (Comment.startswith("/*") && Comment.contains('\n')) { // Multi-line block comment, insert brace before. break; } diff --git a/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.cpp b/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.cpp index 4f81dc49ded7..c8a8edf67884 100644 --- a/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/NamedParameterCheck.cpp @@ -71,7 +71,7 @@ void NamedParameterCheck::check(const MatchFinder::MatchResult &Result) { const char *Begin = SM.getCharacterData(Parm->getBeginLoc()); const char *End = SM.getCharacterData(Parm->getLocation()); StringRef Data(Begin, End - Begin); - if (Data.find("/*") != StringRef::npos) + if (Data.contains("/*")) continue; UnnamedParams.push_back(std::make_pair(Function, I)); diff --git a/clang/tools/driver/driver.cpp b/clang/tools/driver/driver.cpp index c9129ee9e502..a7bfb07e002b 100644 --- a/clang/tools/driver/driver.cpp +++ b/clang/tools/driver/driver.cpp @@ -120,7 +120,7 @@ static void ApplyOneQAOverride(raw_ostream &OS, OS << "### Adding argument " << Str << " at end\n"; Args.push_back(Str); } else if (Edit[0] == 's' && Edit[1] == '/' && Edit.endswith("/") && - Edit.slice(2, Edit.size()-1).find('/') != StringRef::npos) { + Edit.slice(2, Edit.size() - 1).contains('/')) { StringRef MatchPattern = Edit.substr(2).split('/').first; StringRef ReplPattern = Edit.substr(2).split('/').second; ReplPattern = ReplPattern.slice(0, ReplPattern.size()-1); diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index 15a249e6177e..e2e4340f44e9 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -565,7 +565,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, // TODO: Use version number when setting target features // Currently LLVM supports only "mafdcbv". StringRef SupportedStandardExtension = "mafdcbv"; - if (SupportedStandardExtension.find(C) == StringRef::npos) + if (!SupportedStandardExtension.contains(C)) return createStringError(errc::invalid_argument, "unsupported standard user-level extension '%c'", C); From cde37a7e5a1fd3917ab8f66c8b61d86220afc581 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 24 Dec 2021 22:24:15 -0800 Subject: [PATCH 245/293] [ELF][test] Add tests for mixed GD-to-IE and IE, mixed TLSDESC and GD Note: mixed TLSDESC and GD currently does not work. --- lld/test/ELF/x86-64-tls-ie.s | 37 +++++++++++++++++--------- lld/test/ELF/x86-64-tlsdesc-gd-mixed.s | 25 +++++++++++++++++ 2 files changed, 49 insertions(+), 13 deletions(-) create mode 100644 lld/test/ELF/x86-64-tlsdesc-gd-mixed.s diff --git a/lld/test/ELF/x86-64-tls-ie.s b/lld/test/ELF/x86-64-tls-ie.s index e5510f84d5f3..8190c569f0fe 100644 --- a/lld/test/ELF/x86-64-tls-ie.s +++ b/lld/test/ELF/x86-64-tls-ie.s @@ -4,7 +4,7 @@ // RUN: ld.lld -shared %t2.o -soname=so -o %t2.so // RUN: ld.lld -e main %t1.o %t2.so -o %t3 // RUN: llvm-readobj -S -r %t3 | FileCheck %s -// RUN: llvm-objdump -d %t3 | FileCheck --check-prefix=DISASM %s +// RUN: llvm-objdump -d --no-show-raw-insn %t3 | FileCheck --check-prefix=DISASM %s // CHECK: Section { // CHECK: Index: 9 @@ -15,7 +15,7 @@ // CHECK-NEXT: SHF_WRITE // CHECK-NEXT: ] // CHECK-NEXT: Address: [[ADDR:.*]] -// CHECK-NEXT: Offset: 0x3B0 +// CHECK-NEXT: Offset: 0x3F0 // CHECK-NEXT: Size: 16 // CHECK-NEXT: Link: 0 // CHECK-NEXT: Info: 0 @@ -26,23 +26,27 @@ // CHECK: Relocations [ // CHECK-NEXT: Section (5) .rela.dyn { // CHECK-NEXT: [[ADDR]] R_X86_64_TPOFF64 tls1 0x0 -// CHECK-NEXT: 0x2023B8 R_X86_64_TPOFF64 tls0 0x0 +// CHECK-NEXT: 0x2023F8 R_X86_64_TPOFF64 tls0 0x0 // CHECK-NEXT: } // CHECK-NEXT: ] -// 0x2012d0 + 4313 + 7 = 0x2023B0 -// 0x2012dA + 4311 + 7 = 0x2023B8 -// 0x2012e4 + 4301 + 7 = 0x2023B8 +/// 0x2023F0 - 0x201307 = 4329 +/// 0x2023F8 - 0x201311 = 4327 +/// 0x2023F8 - 0x20131b = 4317 // DISASM: Disassembly of section .text: // DISASM-EMPTY: // DISASM-NEXT:
: -// DISASM-NEXT: 2012d0: {{.*}} movq 4313(%rip), %rax -// DISASM-NEXT: 2012d7: {{.*}} movl %fs:(%rax), %eax -// DISASM-NEXT: 2012da: {{.*}} movq 4311(%rip), %rax -// DISASM-NEXT: 2012e1: {{.*}} movl %fs:(%rax), %eax -// DISASM-NEXT: 2012e4: {{.*}} movq 4301(%rip), %rax -// DISASM-NEXT: 2012eb: {{.*}} movl %fs:(%rax), %eax -// DISASM-NEXT: 2012ee: {{.*}} retq +// DISASM-NEXT: movq 4329(%rip), %rax +// DISASM-NEXT: 201307: movl %fs:(%rax), %eax +// DISASM-NEXT: movq 4327(%rip), %rax +// DISASM-NEXT: 201311: movl %fs:(%rax), %eax +// DISASM-NEXT: movq 4317(%rip), %rax +// DISASM-NEXT: 20131b: movl %fs:(%rax), %eax + +/// 0x2023F0 - 0x20132e = 4290 +// DISASM-NEXT: movq %fs:0, %rax +// DISASM-NEXT: addq 4290(%rip), %rax +// DISASM-NEXT: 20132e: retq .section .tdata,"awT",@progbits @@ -57,4 +61,11 @@ main: movl %fs:0(%rax), %eax movq tls0@GOTTPOFF(%rip), %rax movl %fs:0(%rax), %eax + +## Relaxed to TLS IE. Share the GOT entry with GOTTPOFF. + .byte 0x66 + leaq tls1@tlsgd(%rip), %rdi + .value 0x6666 + rex64 + call __tls_get_addr@PLT ret diff --git a/lld/test/ELF/x86-64-tlsdesc-gd-mixed.s b/lld/test/ELF/x86-64-tlsdesc-gd-mixed.s new file mode 100644 index 000000000000..f6bfe5bbba1c --- /dev/null +++ b/lld/test/ELF/x86-64-tlsdesc-gd-mixed.s @@ -0,0 +1,25 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o +# RUN: ld.lld -shared %t.o -o %t.so +# RUN: llvm-readobj -r %t.so | FileCheck %s --check-prefix=RELA + +## FIXME Both TLSDESC and DTPMOD64/DTPOFF64 should be present. +# RELA: .rela.dyn { +# RELA-NEXT: 0x2430 R_X86_64_TLSDESC a 0x0 +# RELA-NEXT: } + +leaq a@tlsdesc(%rip), %rax +call *a@tlscall(%rax) +movl %fs:(%rax), %eax + +.byte 0x66 +leaq a@tlsgd(%rip), %rdi +.word 0x6666 +rex64 +call __tls_get_addr@PLT + +.section .tbss +.globl a +.zero 8 +a: +.zero 4 From 70912420bbc39e0cf486a933182d910bfd835063 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 24 Dec 2021 22:36:49 -0800 Subject: [PATCH 246/293] [ELF] Move TLS dynamic relocations to postScanRelocations This temporarily increases sizeof(SymbolUnion), but allows us to mov GOT/PLT/etc index members outside Symbol in the future. Then, we can make TLSDESC and TLSGD use different indexes and support mixed TLSDESC and TLSGD (tested by x86-64-tlsdesc-gd-mixed.s). Note: needsTlsGd and needsTlsGdToIe may optionally be combined. Test updates are due to reordered GOT entries. --- lld/ELF/Relocations.cpp | 111 ++++++++++++++++++------------- lld/ELF/Symbols.h | 15 +++-- lld/test/ELF/i386-tls-dynamic.s | 22 +++--- lld/test/ELF/i386-tlsdesc-gd.s | 20 +++--- lld/test/ELF/riscv-tls-ld.s | 34 +++++----- lld/test/ELF/x86-64-tlsdesc-gd.s | 20 +++--- 6 files changed, 122 insertions(+), 100 deletions(-) diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 23612ec48ded..cfe49007b814 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1158,13 +1158,10 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, if (oneof(expr) && config->shared) { - if (in.got->addDynTlsEntry(sym)) { - uint64_t off = in.got->getGlobalDynOffset(sym); - mainPart->relaDyn->addAddendOnlyRelocIfNonPreemptible( - target->tlsDescRel, *in.got, off, sym, target->tlsDescRel); - } - if (expr != R_TLSDESC_CALL) + if (expr != R_TLSDESC_CALL) { + sym.needsTlsDesc = true; c.relocations.push_back({expr, type, offset, addend, &sym}); + } return 1; } @@ -1200,14 +1197,7 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, } if (expr == R_TLSLD_HINT) return 1; - if (in.got->addTlsIndex()) { - if (isLocalInExecutable) - in.got->relocations.push_back( - {R_ADDEND, target->symbolicRel, in.got->getTlsIndexOff(), 1, &sym}); - else - mainPart->relaDyn->addReloc( - {target->tlsModuleIndexRel, in.got, in.got->getTlsIndexOff()}); - } + sym.needsTlsLd = true; c.relocations.push_back({expr, type, offset, addend, &sym}); return 1; } @@ -1223,12 +1213,7 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, // Local-Dynamic sequence where offset of tls variable relative to dynamic // thread pointer is stored in the got. This cannot be relaxed to Local-Exec. if (expr == R_TLSLD_GOT_OFF) { - if (!sym.isInGot()) { - in.got->addEntry(sym); - uint64_t off = sym.getGotOffset(); - in.got->relocations.push_back( - {R_ABS, target->tlsOffsetRel, off, 0, &sym}); - } + sym.needsGotDtprel = true; c.relocations.push_back({expr, type, offset, addend, &sym}); return 1; } @@ -1236,27 +1221,7 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, if (oneof(expr)) { if (!toExecRelax) { - if (in.got->addDynTlsEntry(sym)) { - uint64_t off = in.got->getGlobalDynOffset(sym); - - if (isLocalInExecutable) - // Write one to the GOT slot. - in.got->relocations.push_back( - {R_ADDEND, target->symbolicRel, off, 1, &sym}); - else - mainPart->relaDyn->addSymbolReloc(target->tlsModuleIndexRel, *in.got, - off, sym); - - // If the symbol is preemptible we need the dynamic linker to write - // the offset too. - uint64_t offsetOff = off + config->wordsize; - if (sym.isPreemptible) - mainPart->relaDyn->addSymbolReloc(target->tlsOffsetRel, *in.got, - offsetOff, sym); - else - in.got->relocations.push_back( - {R_ABS, target->tlsOffsetRel, offsetOff, 0, &sym}); - } + sym.needsTlsGd = true; c.relocations.push_back({expr, type, offset, addend, &sym}); return 1; } @@ -1264,14 +1229,10 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, // Global-Dynamic relocs can be relaxed to Initial-Exec or Local-Exec // depending on the symbol being locally defined or not. if (sym.isPreemptible) { + sym.needsTlsGdToIe = true; c.relocations.push_back( {target->adjustTlsExpr(type, R_RELAX_TLS_GD_TO_IE), type, offset, addend, &sym}); - if (!sym.isInGot()) { - in.got->addEntry(sym); - mainPart->relaDyn->addSymbolReloc(target->tlsGotRel, *in.got, - sym.getGotOffset(), sym); - } } else { c.relocations.push_back( {target->adjustTlsExpr(type, R_RELAX_TLS_GD_TO_LE), type, offset, @@ -1288,8 +1249,7 @@ handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, c.relocations.push_back( {R_RELAX_TLS_IE_TO_LE, type, offset, addend, &sym}); } else if (expr != R_TLSIE_HINT) { - if (!sym.isInGot()) - addTpOffsetGotEntry(sym); + sym.needsTlsIe = true; // R_GOT needs a relative relocation for PIC on i386 and Hexagon. if (expr == R_GOT && config->isPic && !target->usesOnlyLowPageBits(type)) addRelativeReloc(c, offset, sym, addend, expr, type); @@ -1638,6 +1598,61 @@ void elf::postScanRelocations() { } } } + + if (!sym.isTls()) + return; + bool isLocalInExecutable = !sym.isPreemptible && !config->shared; + + if (sym.needsTlsDesc) { + in.got->addDynTlsEntry(sym); + mainPart->relaDyn->addAddendOnlyRelocIfNonPreemptible( + target->tlsDescRel, *in.got, in.got->getGlobalDynOffset(sym), sym, + target->tlsDescRel); + } + if (sym.needsTlsGd && !sym.needsTlsDesc) { + // TODO Support mixed TLSDESC and TLS GD. + in.got->addDynTlsEntry(sym); + uint64_t off = in.got->getGlobalDynOffset(sym); + if (isLocalInExecutable) + // Write one to the GOT slot. + in.got->relocations.push_back( + {R_ADDEND, target->symbolicRel, off, 1, &sym}); + else + mainPart->relaDyn->addSymbolReloc(target->tlsModuleIndexRel, *in.got, + off, sym); + + // If the symbol is preemptible we need the dynamic linker to write + // the offset too. + uint64_t offsetOff = off + config->wordsize; + if (sym.isPreemptible) + mainPart->relaDyn->addSymbolReloc(target->tlsOffsetRel, *in.got, + offsetOff, sym); + else + in.got->relocations.push_back( + {R_ABS, target->tlsOffsetRel, offsetOff, 0, &sym}); + } + if (sym.needsTlsGdToIe) { + in.got->addEntry(sym); + mainPart->relaDyn->addSymbolReloc(target->tlsGotRel, *in.got, + sym.getGotOffset(), sym); + } + + if (sym.needsTlsLd && in.got->addTlsIndex()) { + if (isLocalInExecutable) + in.got->relocations.push_back( + {R_ADDEND, target->symbolicRel, in.got->getTlsIndexOff(), 1, &sym}); + else + mainPart->relaDyn->addReloc( + {target->tlsModuleIndexRel, in.got, in.got->getTlsIndexOff()}); + } + if (sym.needsGotDtprel) { + in.got->addEntry(sym); + in.got->relocations.push_back( + {R_ABS, target->tlsOffsetRel, sym.getGotOffset(), 0, &sym}); + } + + if (sym.needsTlsIe && !sym.needsTlsGdToIe) + addTpOffsetGotEntry(sym); }; for (Symbol *sym : symtab->symbols()) fn(*sym); diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h index e5fe53c6c496..27c36eedce80 100644 --- a/lld/ELF/Symbols.h +++ b/lld/ELF/Symbols.h @@ -249,8 +249,9 @@ class Symbol { canInline(false), referenced(false), traced(false), isInIplt(false), gotInIgot(false), isPreemptible(false), used(!config->gcSections), folded(false), needsTocRestore(false), scriptDefined(false), - needsCopy(false), needsGot(false), needsPlt(false), - hasDirectReloc(false) {} + needsCopy(false), needsGot(false), needsPlt(false), needsTlsDesc(false), + needsTlsGd(false), needsTlsGdToIe(false), needsTlsLd(false), + needsGotDtprel(false), needsTlsIe(false), hasDirectReloc(false) {} public: // True if this symbol is in the Iplt sub-section of the Plt and the Igot @@ -288,6 +289,12 @@ class Symbol { // entries during postScanRelocations(); uint8_t needsGot : 1; uint8_t needsPlt : 1; + uint8_t needsTlsDesc : 1; + uint8_t needsTlsGd : 1; + uint8_t needsTlsGdToIe : 1; + uint8_t needsTlsLd : 1; + uint8_t needsGotDtprel : 1; + uint8_t needsTlsIe : 1; uint8_t hasDirectReloc : 1; // The partition whose dynamic symbol table contains this symbol's definition. @@ -493,9 +500,9 @@ union SymbolUnion { }; // It is important to keep the size of SymbolUnion small for performance and -// memory usage reasons. 72 bytes is a soft limit based on the size of Defined +// memory usage reasons. 80 bytes is a soft limit based on the size of Defined // on a 64-bit system. -static_assert(sizeof(SymbolUnion) <= 72, "SymbolUnion too large"); +static_assert(sizeof(SymbolUnion) <= 80, "SymbolUnion too large"); template struct AssertSymbol { static_assert(std::is_trivially_destructible(), diff --git a/lld/test/ELF/i386-tls-dynamic.s b/lld/test/ELF/i386-tls-dynamic.s index 07e894795cc0..d61ee5526bb1 100644 --- a/lld/test/ELF/i386-tls-dynamic.s +++ b/lld/test/ELF/i386-tls-dynamic.s @@ -66,35 +66,35 @@ addl tls1@gotntpoff(%ebx),%eax # CHECK: Relocations [ # CHECK: Section ({{.+}}) .rel.dyn { -# CHECK-NEXT: 0x2368 R_386_TLS_DTPMOD32 - +# CHECK-NEXT: 0x2370 R_386_TLS_DTPMOD32 - # CHECK-NEXT: 0x2358 R_386_TLS_DTPMOD32 tls0 # CHECK-NEXT: 0x235C R_386_TLS_DTPOFF32 tls0 -# CHECK-NEXT: 0x2370 R_386_TLS_TPOFF tls0 -# CHECK-NEXT: 0x2360 R_386_TLS_DTPMOD32 tls1 -# CHECK-NEXT: 0x2364 R_386_TLS_DTPOFF32 tls1 -# CHECK-NEXT: 0x2374 R_386_TLS_TPOFF tls1 +# CHECK-NEXT: 0x2360 R_386_TLS_TPOFF tls0 +# CHECK-NEXT: 0x2364 R_386_TLS_DTPMOD32 tls1 +# CHECK-NEXT: 0x2368 R_386_TLS_DTPOFF32 tls1 +# CHECK-NEXT: 0x236C R_386_TLS_TPOFF tls1 # CHECK-NEXT: } # DIS: Disassembly of section .text: # DIS-EMPTY: # DIS-NEXT: <_start>: ## General dynamic model: -## -4128 and -4120 are first and second GOT entries offsets. +## -4128 and -4116 are first and second GOT entries offsets. ## Each one is a pair of records. # DIS-NEXT: 1260: leal -4128(,%ebx), %eax # DIS-NEXT: 1267: calll 0x12d0 -# DIS-NEXT: 126c: leal -4120(,%ebx), %eax +# DIS-NEXT: 126c: leal -4116(,%ebx), %eax # DIS-NEXT: 1273: calll 0x12d0 ## Local dynamic model: ## -16 is a local module tls index offset. -# DIS-NEXT: 1278: leal -4112(%ebx), %eax +# DIS-NEXT: 1278: leal -4104(%ebx), %eax # DIS-NEXT: 127e: calll 0x12d0 # DIS-NEXT: 1283: leal 8(%eax), %edx -# DIS-NEXT: 1289: leal -4112(%ebx), %eax +# DIS-NEXT: 1289: leal -4104(%ebx), %eax # DIS-NEXT: 128f: calll 0x12d0 # DIS-NEXT: 1294: leal 12(%eax), %edx ## Initial exec model: # DIS-NEXT: 129a: movl %gs:0, %eax -# DIS-NEXT: 12a0: addl -4104(%ebx), %eax +# DIS-NEXT: 12a0: addl -4120(%ebx), %eax # DIS-NEXT: 12a6: movl %gs:0, %eax -# DIS-NEXT: 12ac: addl -4100(%ebx), %eax +# DIS-NEXT: 12ac: addl -4108(%ebx), %eax diff --git a/lld/test/ELF/i386-tlsdesc-gd.s b/lld/test/ELF/i386-tlsdesc-gd.s index a2fc0f8f6645..132febed2feb 100644 --- a/lld/test/ELF/i386-tlsdesc-gd.s +++ b/lld/test/ELF/i386-tlsdesc-gd.s @@ -19,18 +19,18 @@ # RUN: llvm-objdump -h -d --no-show-raw-insn %t | FileCheck --check-prefix=IE %s # GD-REL: .rel.dyn { -# GD-REL-NEXT: 0x2250 R_386_TLS_DESC - +# GD-REL-NEXT: 0x2258 R_386_TLS_DESC - # GD-REL-NEXT: 0x2248 R_386_TLS_DESC a -# GD-REL-NEXT: 0x2258 R_386_TLS_DESC c +# GD-REL-NEXT: 0x2250 R_386_TLS_DESC c # GD-REL-NEXT: } # GD-REL: Hex dump of section '.got': -# GD-REL-NEXT: 0x00002248 00000000 00000000 00000000 0b000000 -# GD-REL-NEXT: 0x00002258 00000000 00000000 +# GD-REL-NEXT: 0x00002248 00000000 00000000 00000000 00000000 +# GD-REL-NEXT: 0x00002258 00000000 0b000000 # GD-RELA: .rela.dyn { -# GD-RELA-NEXT: 0x225C R_386_TLS_DESC - 0xB +# GD-RELA-NEXT: 0x2264 R_386_TLS_DESC - 0xB # GD-RELA-NEXT: 0x2254 R_386_TLS_DESC a 0x0 -# GD-RELA-NEXT: 0x2264 R_386_TLS_DESC c 0x0 +# GD-RELA-NEXT: 0x225C R_386_TLS_DESC c 0x0 # GD-RELA-NEXT: } # GD-RELA: Hex dump of section '.got': # GD-RELA-NEXT: 0x00002254 00000000 00000000 00000000 00000000 @@ -44,14 +44,14 @@ # GD-NEXT: calll *(%eax) # GD-NEXT: movl %gs:(%eax), %eax -# &.rel.dyn[b]-.got.plt = 0x2250-0x2260 = -16 -# GD-NEXT: leal -16(%ebx), %eax +# &.rel.dyn[b]-.got.plt = 0x2258-0x2260 = -8 +# GD-NEXT: leal -8(%ebx), %eax # GD-NEXT: movl %edx, %ebx # GD-NEXT: calll *(%eax) # GD-NEXT: movl %gs:(%eax), %eax -# &.rel.dyn[c]-.got.plt = 0x2258-0x2260 = -8 -# GD-NEXT: leal -8(%ebx), %eax +# &.rel.dyn[c]-.got.plt = 0x2250-0x2260 = -16 +# GD-NEXT: leal -16(%ebx), %eax # GD-NEXT: calll *(%eax) # GD-NEXT: movl %gs:(%eax), %eax diff --git a/lld/test/ELF/riscv-tls-ld.s b/lld/test/ELF/riscv-tls-ld.s index f47964ef3e26..bc9a601a74ec 100644 --- a/lld/test/ELF/riscv-tls-ld.s +++ b/lld/test/ELF/riscv-tls-ld.s @@ -35,26 +35,26 @@ ## a@dtprel = st_value(a)-0x800 = 0xfffff808 is a link-time constant. # LD32-REL: .rela.dyn { -# LD32-REL-NEXT: 0x22B4 -# LD32-REL-NEXT: 0x22AC R_RISCV_TLS_DTPMOD32 - 0x0 +# LD32-REL-NEXT: 0x22AC +# LD32-REL-NEXT: 0x22B0 R_RISCV_TLS_DTPMOD32 - 0x0 # LD32-REL-NEXT: } # LD32-GOT: section '.got': -# LD32-GOT-NEXT: 0x000022a8 30220000 00000000 00f8ffff 00000000 +# LD32-GOT-NEXT: 0x000022a8 30220000 00000000 00000000 00f8ffff # LD64-REL: .rela.dyn { -# LD64-REL-NEXT: 0x2458 -# LD64-REL-NEXT: 0x2448 R_RISCV_TLS_DTPMOD64 - 0x0 +# LD64-REL-NEXT: 0x2448 +# LD64-REL-NEXT: 0x2450 R_RISCV_TLS_DTPMOD64 - 0x0 # LD64-REL-NEXT: } # LD64-GOT: section '.got': # LD64-GOT-NEXT: 0x00002440 50230000 00000000 00000000 00000000 -# LD64-GOT-NEXT: 0x00002450 00f8ffff ffffffff 00000000 00000000 +# LD64-GOT-NEXT: 0x00002450 00000000 00000000 00f8ffff ffffffff -## rv32: &DTPMOD(a) - . = 0x22ac - 0x11d8 = 4096*1+212 -## rv64: &DTPMOD(a) - . = 0x2448 - 0x12f8 = 4096*1+336 +## rv32: &DTPMOD(a) - . = 0x22b0 - 0x11d8 = 4096*1+216 +## rv64: &DTPMOD(a) - . = 0x2450 - 0x12f8 = 4096*1+344 # LD32: 11d8: auipc a0, 1 -# LD32-NEXT: addi a0, a0, 212 +# LD32-NEXT: addi a0, a0, 216 # LD64: 12f8: auipc a0, 1 -# LD64-NEXT: addi a0, a0, 336 +# LD64-NEXT: addi a0, a0, 344 # LD-NEXT: auipc ra, 0 # LD-NEXT: jalr 64(ra) @@ -63,18 +63,18 @@ ## a is local - its DTPMOD/DTPREL slots are link-time constants. ## a@dtpmod = 1 (main module) # LE32-GOT: section '.got': -# LE32-GOT-NEXT: 0x00012134 00000000 01000000 00f8ffff 34210100 +# LE32-GOT-NEXT: 0x00012134 00000000 34210100 01000000 00f8ffff # LE64-GOT: section '.got': -# LE64-GOT-NEXT: 0x000121e8 00000000 00000000 01000000 00000000 -# LE64-GOT-NEXT: 0x000121f8 00f8ffff ffffffff e8210100 00000000 +# LE64-GOT-NEXT: 0x000121e8 00000000 00000000 e8210100 00000000 +# LE64-GOT-NEXT: 0x000121f8 01000000 00000000 00f8ffff ffffffff -## rv32: DTPMOD(.LANCHOR0) - . = 0x12138 - 0x11114 = 4096*1+36 -## rv64: DTPMOD(.LANCHOR0) - . = 0x121f0 - 0x111c8 = 4096*1+40 +## rv32: DTPMOD(.LANCHOR0) - . = 0x1213c - 0x11114 = 4096*1+40 +## rv64: DTPMOD(.LANCHOR0) - . = 0x121f8 - 0x111c8 = 4096*1+48 # LE32: 11114: auipc a0, 1 -# LE32-NEXT: addi a0, a0, 36 +# LE32-NEXT: addi a0, a0, 40 # LE64: 111c8: auipc a0, 1 -# LE64-NEXT: addi a0, a0, 40 +# LE64-NEXT: addi a0, a0, 48 # LE-NEXT: auipc ra, 0 # LE-NEXT: jalr 24(ra) diff --git a/lld/test/ELF/x86-64-tlsdesc-gd.s b/lld/test/ELF/x86-64-tlsdesc-gd.s index f0cdf08040f3..436216e7d3d5 100644 --- a/lld/test/ELF/x86-64-tlsdesc-gd.s +++ b/lld/test/ELF/x86-64-tlsdesc-gd.s @@ -19,9 +19,9 @@ # RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=IE %s # GD-RELA: .rela.dyn { -# GD-RELA-NEXT: 0x23C0 R_X86_64_TLSDESC - 0xB +# GD-RELA-NEXT: 0x23D0 R_X86_64_TLSDESC - 0xB # GD-RELA-NEXT: 0x23B0 R_X86_64_TLSDESC a 0x0 -# GD-RELA-NEXT: 0x23D0 R_X86_64_TLSDESC c 0x0 +# GD-RELA-NEXT: 0x23C0 R_X86_64_TLSDESC c 0x0 # GD-RELA-NEXT: } # GD-RELA: Hex dump of section '.got': # GD-RELA-NEXT: 0x000023b0 00000000 00000000 00000000 00000000 @@ -29,28 +29,28 @@ # GD-RELA-NEXT: 0x000023d0 00000000 00000000 00000000 00000000 # GD-REL: .rel.dyn { -# GD-REL-NEXT: 0x23A8 R_X86_64_TLSDESC - +# GD-REL-NEXT: 0x23B8 R_X86_64_TLSDESC - # GD-REL-NEXT: 0x2398 R_X86_64_TLSDESC a -# GD-REL-NEXT: 0x23B8 R_X86_64_TLSDESC c +# GD-REL-NEXT: 0x23A8 R_X86_64_TLSDESC c # GD-REL-NEXT: } # GD-REL: Hex dump of section '.got': # GD-REL-NEXT: 0x00002398 00000000 00000000 00000000 00000000 -# GD-REL-NEXT: 0x000023a8 00000000 00000000 0b000000 00000000 -# GD-REL-NEXT: 0x000023b8 00000000 00000000 00000000 00000000 +# GD-REL-NEXT: 0x000023a8 00000000 00000000 00000000 00000000 +# GD-REL-NEXT: 0x000023b8 00000000 00000000 0b000000 00000000 ## &.rela.dyn[a]-pc = 0x23B0-0x12e7 = 4297 # GD: leaq 4297(%rip), %rax # GD-NEXT: 12e7: callq *(%rax) # GD-NEXT: movl %fs:(%rax), %eax -## &.rela.dyn[b]-pc = 0x23C0-0x12f3 = 4301 -# GD-NEXT: leaq 4301(%rip), %rcx +## &.rela.dyn[b]-pc = 0x23D0-0x12f3 = 4317 +# GD-NEXT: leaq 4317(%rip), %rcx # GD-NEXT: 12f3: movq %rcx, %rax # GD-NEXT: callq *(%rax) # GD-NEXT: movl %fs:(%rax), %eax -## &.rela.dyn[c]-pc = 0x23D0-0x1302 = 4302 -# GD-NEXT: leaq 4302(%rip), %r15 +## &.rela.dyn[c]-pc = 0x23C0-0x1302 = 4286 +# GD-NEXT: leaq 4286(%rip), %r15 # GD-NEXT: 1302: movq %r15, %rax # GD-NEXT: callq *(%rax) # GD-NEXT: movl %fs:(%rax), %eax From 2d303e678152fddb88dea4199c8872223232b406 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 24 Dec 2021 23:17:53 -0800 Subject: [PATCH 247/293] Remove redundant return and continue statements (NFC) Identified with readability-redundant-control-flow. --- .../abseil/DurationFactoryScaleCheck.cpp | 1 - .../llvmlibc/ImplementationInNamespaceCheck.cpp | 1 - clang/lib/ASTMatchers/Dynamic/Marshallers.h | 1 - clang/lib/Analysis/CFG.cpp | 2 -- clang/lib/Basic/Targets/PPC.cpp | 5 +---- clang/lib/CodeGen/CGStmtOpenMP.cpp | 8 ++++---- clang/lib/CodeGen/CodeGenAction.cpp | 1 - clang/lib/Driver/ToolChains/HIPAMD.cpp | 1 - clang/lib/Format/UnwrappedLineParser.cpp | 1 - clang/lib/Sema/SemaCUDA.cpp | 1 - clang/lib/Sema/SemaCodeComplete.cpp | 1 - clang/lib/Sema/SemaType.cpp | 1 - .../clang-fuzzer/handle-llvm/handle_llvm.cpp | 2 -- lldb/include/lldb/Core/ValueObject.h | 2 +- lldb/include/lldb/Target/LanguageRuntime.h | 2 +- lldb/include/lldb/Utility/RangeMap.h | 1 - lldb/source/Core/IOHandlerCursesGUI.cpp | 15 +++++---------- lldb/source/Expression/IRExecutionUnit.cpp | 2 -- lldb/source/Expression/IRMemoryMap.cpp | 4 ---- .../Host/posix/ProcessLauncherPosixFork.cpp | 1 - lldb/source/Interpreter/CommandInterpreter.cpp | 3 --- .../MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp | 1 - .../ExpressionParser/Clang/ClangASTImporter.cpp | 2 +- .../ExpressionParser/Clang/ClangASTSource.cpp | 2 -- .../ExpressionParser/Clang/ClangASTSource.h | 2 +- .../Clang/ClangExpressionDeclMap.cpp | 2 -- .../ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp | 3 --- .../SymbolFile/NativePDB/SymbolFileNativePDB.cpp | 2 +- .../Plugins/SymbolFile/PDB/SymbolFilePDB.cpp | 1 - lldb/source/Symbol/SymbolFile.cpp | 4 +--- lldb/source/Target/ThreadPlanStack.cpp | 1 - lldb/source/Target/UnwindLLDB.cpp | 1 - .../OrcV2CBindingsVeryLazy.c | 1 - llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 1 - llvm/lib/DebugInfo/DWARF/DWARFDie.cpp | 2 -- llvm/lib/IR/Instructions.cpp | 1 - llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp | 1 - llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 2 -- llvm/lib/Target/AVR/AVRInstrInfo.cpp | 2 -- polly/lib/External/isl/isl_int_sioimath.h | 1 - polly/lib/Transform/ManualOptimizer.cpp | 1 - 41 files changed, 16 insertions(+), 73 deletions(-) diff --git a/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp b/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp index aa839beddac6..c9f3a7db0346 100644 --- a/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp +++ b/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp @@ -221,7 +221,6 @@ void DurationFactoryScaleCheck::check(const MatchFinder::MatchResult &Result) { tooling::fixit::getText(*Remainder, *Result.Context) + ")") .str()); } - return; } } // namespace abseil diff --git a/clang-tools-extra/clang-tidy/llvmlibc/ImplementationInNamespaceCheck.cpp b/clang-tools-extra/clang-tidy/llvmlibc/ImplementationInNamespaceCheck.cpp index 42b697076b35..842bf43b2c45 100644 --- a/clang-tools-extra/clang-tidy/llvmlibc/ImplementationInNamespaceCheck.cpp +++ b/clang-tools-extra/clang-tidy/llvmlibc/ImplementationInNamespaceCheck.cpp @@ -41,7 +41,6 @@ void ImplementationInNamespaceCheck::check( diag(MatchedDecl->getLocation(), "declaration must be declared within the '%0' namespace") << RequiredNamespace; - return; } } // namespace llvm_libc diff --git a/clang/lib/ASTMatchers/Dynamic/Marshallers.h b/clang/lib/ASTMatchers/Dynamic/Marshallers.h index 783fb203c408..fa9d42247e24 100644 --- a/clang/lib/ASTMatchers/Dynamic/Marshallers.h +++ b/clang/lib/ASTMatchers/Dynamic/Marshallers.h @@ -1035,7 +1035,6 @@ class MapAnyOfBuilderDescriptor : public MatcherDescriptor { void getArgKinds(ASTNodeKind ThisKind, unsigned, std::vector &ArgKinds) const override { ArgKinds.push_back(ArgKind::MakeNodeArg(ThisKind)); - return; } bool isConvertibleTo(ASTNodeKind Kind, unsigned *Specificity = nullptr, ASTNodeKind *LeastDerivedKind = nullptr) const override { diff --git a/clang/lib/Analysis/CFG.cpp b/clang/lib/Analysis/CFG.cpp index abf65e3efce9..9ef3b5b6277a 100644 --- a/clang/lib/Analysis/CFG.cpp +++ b/clang/lib/Analysis/CFG.cpp @@ -1820,8 +1820,6 @@ void CFGBuilder::addScopesEnd(LocalScope::const_iterator B, for (VarDecl *VD : llvm::reverse(DeclsWithEndedScope)) appendScopeEnd(Block, VD, S); - - return; } /// addAutomaticObjDtors - Add to current block automatic objects destructors diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index c3c61ed443ca..7f7b44b658eb 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -623,14 +623,11 @@ void PPCTargetInfo::addP10SpecificFeatures( Features["pcrelative-memops"] = true; Features["prefix-instrs"] = true; Features["isa-v31-instructions"] = true; - return; } // Add features specific to the "Future" CPU. void PPCTargetInfo::addFutureSpecificFeatures( - llvm::StringMap &Features) const { - return; -} + llvm::StringMap &Features) const {} bool PPCTargetInfo::hasFeature(StringRef Feature) const { return llvm::StringSwitch(Feature) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 564c3a591f16..4c11f7d67534 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -4301,10 +4301,10 @@ class CheckVarsEscapingUntiedTaskDeclContext final PrivateDecls.push_back(VD); } } - void VisitOMPExecutableDirective(const OMPExecutableDirective *) { return; } - void VisitCapturedStmt(const CapturedStmt *) { return; } - void VisitLambdaExpr(const LambdaExpr *) { return; } - void VisitBlockExpr(const BlockExpr *) { return; } + void VisitOMPExecutableDirective(const OMPExecutableDirective *) {} + void VisitCapturedStmt(const CapturedStmt *) {} + void VisitLambdaExpr(const LambdaExpr *) {} + void VisitBlockExpr(const BlockExpr *) {} void VisitStmt(const Stmt *S) { if (!S) return; diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index 52c54d3c7a72..b72b16cf2b5f 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -571,7 +571,6 @@ void BackendConsumer::SrcMgrDiagHandler(const llvm::DiagnosticInfoSrcMgr &DI) { // If Loc is invalid, we still need to report the issue, it just gets no // location info. Diags.Report(Loc, DiagID).AddString(Message); - return; } bool diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp index c08f825e0cb9..6d553791b394 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.cpp +++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -365,5 +365,4 @@ void HIPAMDToolChain::checkTargetID( getDriver().Diag(clang::diag::err_drv_bad_target_id) << PTID.OptionalTargetID.getValue(); } - return; } diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 4f300bb63a42..b6e55aab708f 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -1138,7 +1138,6 @@ void UnwrappedLineParser::parseModuleImport() { } addUnwrappedLine(); - return; } // readTokenWithJavaScriptASI reads the next token and terminates the current diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp index 840b3daae63c..59601c5ce79d 100644 --- a/clang/lib/Sema/SemaCUDA.cpp +++ b/clang/lib/Sema/SemaCUDA.cpp @@ -886,7 +886,6 @@ void Sema::CUDACheckLambdaCapture(CXXMethodDecl *Callee, diag::warn_maybe_capture_bad_target_this_ptr, Callee, *this); } - return; } void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index 083a67db7a91..93c07ccc891f 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -569,7 +569,6 @@ void PreferredTypeBuilder::enterMemAccess(Sema &S, SourceLocation Tok, return; // Keep the expected type, only update the location. ExpectedLoc = Tok; - return; } void PreferredTypeBuilder::enterUnary(Sema &S, SourceLocation Tok, diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 0607d3a774aa..7a038301a249 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -6541,7 +6541,6 @@ static void HandleBTFTypeTagAttribute(QualType &Type, const ParsedAttr &Attr, StringRef BTFTypeTag = StrLiteral->getString(); Type = State.getAttributedType( ::new (Ctx) BTFTypeTagAttr(Ctx, Attr, BTFTypeTag), Type, Type); - return; } /// HandleAddressSpaceTypeAttribute - Process an address_space attribute on the diff --git a/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp b/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp index bf694e575a9d..aa38b5e4aa26 100644 --- a/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp +++ b/clang/tools/clang-fuzzer/handle-llvm/handle_llvm.cpp @@ -227,6 +227,4 @@ void clang_fuzzer::HandleLLVM(const std::string &IR, if (memcmp(OptArrays, UnoptArrays, kTotalSize)) ErrorAndExit("!!!BUG!!!"); - - return; } diff --git a/lldb/include/lldb/Core/ValueObject.h b/lldb/include/lldb/Core/ValueObject.h index 5f1cbc65b320..192149f05436 100644 --- a/lldb/include/lldb/Core/ValueObject.h +++ b/lldb/include/lldb/Core/ValueObject.h @@ -1000,7 +1000,7 @@ class ValueObject { void SetPreferredDisplayLanguageIfNeeded(lldb::LanguageType); protected: - virtual void DoUpdateChildrenAddressType(ValueObject &valobj) { return; }; + virtual void DoUpdateChildrenAddressType(ValueObject &valobj){}; private: virtual CompilerType MaybeCalculateCompleteType(); diff --git a/lldb/include/lldb/Target/LanguageRuntime.h b/lldb/include/lldb/Target/LanguageRuntime.h index 2f95c2643318..ba96d080f908 100644 --- a/lldb/include/lldb/Target/LanguageRuntime.h +++ b/lldb/include/lldb/Target/LanguageRuntime.h @@ -141,7 +141,7 @@ class LanguageRuntime : public Runtime, public PluginInterface { return false; } - virtual void SymbolsDidLoad(const ModuleList &module_list) { return; } + virtual void SymbolsDidLoad(const ModuleList &module_list) {} virtual lldb::ThreadPlanSP GetStepThroughTrampolinePlan(Thread &thread, bool stop_others) = 0; diff --git a/lldb/include/lldb/Utility/RangeMap.h b/lldb/include/lldb/Utility/RangeMap.h index 118fdfd85fa9..422f90d807a7 100644 --- a/lldb/include/lldb/Utility/RangeMap.h +++ b/lldb/include/lldb/Utility/RangeMap.h @@ -360,7 +360,6 @@ template class RangeVector { m_entries.erase(next); } } - return; } Collection m_entries; diff --git a/lldb/source/Core/IOHandlerCursesGUI.cpp b/lldb/source/Core/IOHandlerCursesGUI.cpp index 9122117ef5ff..60207f75b7df 100644 --- a/lldb/source/Core/IOHandlerCursesGUI.cpp +++ b/lldb/source/Core/IOHandlerCursesGUI.cpp @@ -1034,7 +1034,7 @@ class FieldDelegate { // navigates to the next or the previous field. This is particularly useful to // do in-field validation and error setting. Fields with internal navigation // should call this method on their fields. - virtual void FieldDelegateExitCallback() { return; } + virtual void FieldDelegateExitCallback() {} // Fields may have internal navigation, for instance, a List Field have // multiple internal elements, which needs to be navigated. To allow for this @@ -1055,10 +1055,10 @@ class FieldDelegate { virtual bool FieldDelegateOnLastOrOnlyElement() { return true; } // Select the first element in the field if multiple elements exists. - virtual void FieldDelegateSelectFirstElement() { return; } + virtual void FieldDelegateSelectFirstElement() {} // Select the last element in the field if multiple elements exists. - virtual void FieldDelegateSelectLastElement() { return; } + virtual void FieldDelegateSelectLastElement() {} // Returns true if the field has an error, false otherwise. virtual bool FieldDelegateHasError() { return false; } @@ -2000,7 +2000,6 @@ template class ListFieldDelegate : public FieldDelegate { void FieldDelegateSelectLastElement() override { m_selection_type = SelectionType::NewButton; - return; } int GetNumberOfFields() { return m_fields.size(); } @@ -2292,7 +2291,7 @@ class FormDelegate { virtual std::string GetName() = 0; - virtual void UpdateFieldsVisibility() { return; } + virtual void UpdateFieldsVisibility() {} FieldDelegate *GetField(uint32_t field_index) { if (field_index < m_fields.size()) @@ -3765,13 +3764,11 @@ class SearcherWindowDelegate : public WindowDelegate { void SelectNext() { if (m_selected_match != m_delegate_sp->GetNumberOfMatches() - 1) m_selected_match++; - return; } void SelectPrevious() { if (m_selected_match != 0) m_selected_match--; - return; } void ExecuteCallback(Window &window) { @@ -4608,9 +4605,7 @@ class TreeDelegate { virtual void TreeDelegateDrawTreeItem(TreeItem &item, Window &window) = 0; virtual void TreeDelegateGenerateChildren(TreeItem &item) = 0; virtual void TreeDelegateUpdateSelection(TreeItem &root, int &selection_index, - TreeItem *&selected_item) { - return; - } + TreeItem *&selected_item) {} // This is invoked when a tree item is selected. If true is returned, the // views are updated. virtual bool TreeDelegateItemSelected(TreeItem &item) = 0; diff --git a/lldb/source/Expression/IRExecutionUnit.cpp b/lldb/source/Expression/IRExecutionUnit.cpp index f2d22f7ed9cc..3c4a87c27e20 100644 --- a/lldb/source/Expression/IRExecutionUnit.cpp +++ b/lldb/source/Expression/IRExecutionUnit.cpp @@ -483,8 +483,6 @@ void IRExecutionUnit::GetRunnableInfo(Status &error, lldb::addr_t &func_addr, func_addr = m_function_load_addr; func_end = m_function_end_load_addr; - - return; } IRExecutionUnit::~IRExecutionUnit() { diff --git a/lldb/source/Expression/IRMemoryMap.cpp b/lldb/source/Expression/IRMemoryMap.cpp index 4ae2724d4dd8..9eee5cf5b9a2 100644 --- a/lldb/source/Expression/IRMemoryMap.cpp +++ b/lldb/source/Expression/IRMemoryMap.cpp @@ -609,7 +609,6 @@ void IRMemoryMap::WriteScalarToMemory(lldb::addr_t process_address, error.SetErrorToGenericError(); error.SetErrorString("Couldn't write scalar: its size was zero"); } - return; } void IRMemoryMap::WritePointerToMemory(lldb::addr_t process_address, @@ -757,7 +756,6 @@ void IRMemoryMap::ReadScalarFromMemory(Scalar &scalar, error.SetErrorToGenericError(); error.SetErrorString("Couldn't read scalar: its size was zero"); } - return; } void IRMemoryMap::ReadPointerFromMemory(lldb::addr_t *address, @@ -773,8 +771,6 @@ void IRMemoryMap::ReadPointerFromMemory(lldb::addr_t *address, return; *address = pointer_scalar.ULongLong(); - - return; } void IRMemoryMap::GetMemoryData(DataExtractor &extractor, diff --git a/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp b/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp index 63178e6c8a7a..2f08b9fa8857 100644 --- a/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp +++ b/lldb/source/Host/posix/ProcessLauncherPosixFork.cpp @@ -86,7 +86,6 @@ static void DupDescriptor(int error_fd, const FileSpec &file_spec, int fd, ExitWithError(error_fd, "DupDescriptor-dup2"); ::close(target_fd); - return; } [[noreturn]] static void ChildFunc(int error_fd, diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index 301bf949feef..bd03f18b47c0 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -2216,7 +2216,6 @@ void CommandInterpreter::BuildAliasCommandArgs(CommandObject *alias_cmd_obj, } result.SetStatus(eReturnStatusSuccessFinishNoResult); - return; } int CommandInterpreter::GetOptionArgumentPosition(const char *in_string) { @@ -2563,8 +2562,6 @@ void CommandInterpreter::HandleCommands(const StringList &commands, result.SetStatus(eReturnStatusSuccessFinishResult); m_debugger.SetAsyncExecution(old_async_execution); - - return; } // Make flags that we can pass into the IOHandler so our delegates can do the diff --git a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp index c3cfd0afe551..866b89f532ac 100644 --- a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp +++ b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderMacOSXDYLD.cpp @@ -242,7 +242,6 @@ void DynamicLoaderMacOSXDYLD::DoInitialImageFetch() { ReadDYLDInfoFromMemoryAndSetNotificationCallback(0x8fe00000); } } - return; } // Assume that dyld is in memory at ADDR and try to parse it's load commands diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp index 719b35689feb..6ed3cc9384f0 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTImporter.cpp @@ -806,7 +806,7 @@ void ClangASTImporter::ForgetSource(clang::ASTContext *dst_ast, md->removeOriginsWithContext(src_ast); } -ClangASTImporter::MapCompleter::~MapCompleter() { return; } +ClangASTImporter::MapCompleter::~MapCompleter() {} llvm::Expected ClangASTImporter::ASTImporterDelegate::ImportImpl(Decl *From) { diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp index 410d8a95cb12..510352e8c173 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.cpp @@ -513,8 +513,6 @@ void ClangASTSource::FindExternalLexicalDecls( // is consulted again when a clang::DeclContext::lookup is called. const_cast(decl_context)->setMustBuildLookupTable(); } - - return; } void ClangASTSource::FindExternalVisibleDecls(NameSearchContext &context) { diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h index 3afd1fd5f2d1..f3fec3f944a1 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangASTSource.h @@ -59,7 +59,7 @@ class ClangASTSource : public clang::ExternalASTSource, GetExternalCXXBaseSpecifiers(uint64_t Offset) override { return nullptr; } - void MaterializeVisibleDecls(const clang::DeclContext *DC) { return; } + void MaterializeVisibleDecls(const clang::DeclContext *DC) {} void InstallASTContext(TypeSystemClang &ast_context); diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp index 846c1597292b..4af5d41a5921 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionDeclMap.cpp @@ -1954,8 +1954,6 @@ void ClangExpressionDeclMap::AddContextClassType(NameSearchContext &context, return; context.AddNamedDecl(typedef_decl); - - return; } void ClangExpressionDeclMap::AddOneType(NameSearchContext &context, diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp index 9bc40c16e5d0..af11109ae45d 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.cpp @@ -80,8 +80,6 @@ class lldb_private::AppleObjCExternalASTSource LLDB_LOG(log, " AOEAS::CT Before:\n{1}", ClangUtil::DumpDecl(tag_decl)); LLDB_LOG(log, " AOEAS::CT After:{1}", ClangUtil::DumpDecl(tag_decl)); - - return; } void CompleteType(clang::ObjCInterfaceDecl *interface_decl) override { @@ -107,7 +105,6 @@ class lldb_private::AppleObjCExternalASTSource LLDB_LOGF(log, " [CT] After:"); LLDB_LOG(log, " [CT] {0}", ClangUtil::DumpDecl(interface_decl)); } - return; } bool layoutRecordType( diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp index bf101ac1acf1..e859b1d5a86c 100644 --- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp @@ -900,7 +900,7 @@ lldb::LanguageType SymbolFileNativePDB::ParseLanguage(CompileUnit &comp_unit) { return TranslateLanguage(item->m_compile_opts->getLanguage()); } -void SymbolFileNativePDB::AddSymbols(Symtab &symtab) { return; } +void SymbolFileNativePDB::AddSymbols(Symtab &symtab) {} size_t SymbolFileNativePDB::ParseFunctions(CompileUnit &comp_unit) { std::lock_guard guard(GetModuleMutex()); diff --git a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp index c0547936b666..a40b6ec9a635 100644 --- a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp +++ b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp @@ -239,7 +239,6 @@ void SymbolFilePDB::GetCompileUnitIndex( } } index = UINT32_MAX; - return; } std::unique_ptr diff --git a/lldb/source/Symbol/SymbolFile.cpp b/lldb/source/Symbol/SymbolFile.cpp index 557c55699137..b85901af4d67 100644 --- a/lldb/source/Symbol/SymbolFile.cpp +++ b/lldb/source/Symbol/SymbolFile.cpp @@ -125,9 +125,7 @@ void SymbolFile::FindFunctions(const RegularExpression ®ex, void SymbolFile::GetMangledNamesForFunction( const std::string &scope_qualified_name, - std::vector &mangled_names) { - return; -} + std::vector &mangled_names) {} void SymbolFile::FindTypes( ConstString name, const CompilerDeclContext &parent_decl_ctx, diff --git a/lldb/source/Target/ThreadPlanStack.cpp b/lldb/source/Target/ThreadPlanStack.cpp index f09583cc50cc..80634647f9e0 100644 --- a/lldb/source/Target/ThreadPlanStack.cpp +++ b/lldb/source/Target/ThreadPlanStack.cpp @@ -210,7 +210,6 @@ void ThreadPlanStack::DiscardAllPlans() { for (int i = stack_size - 1; i > 0; i--) { DiscardPlan(); } - return; } void ThreadPlanStack::DiscardConsultingControllingPlans() { diff --git a/lldb/source/Target/UnwindLLDB.cpp b/lldb/source/Target/UnwindLLDB.cpp index 047147112f3b..77dd19b04ebd 100644 --- a/lldb/source/Target/UnwindLLDB.cpp +++ b/lldb/source/Target/UnwindLLDB.cpp @@ -312,7 +312,6 @@ void UnwindLLDB::UpdateUnwindPlanForFirstFrameIfInvalid(ABI *abi) { // Restore status after calling AddOneMoreFrame m_unwind_complete = old_m_unwind_complete; m_candidate_frame = old_m_candidate_frame; - return; } bool UnwindLLDB::AddOneMoreFrame(ABI *abi) { diff --git a/llvm/examples/OrcV2Examples/OrcV2CBindingsVeryLazy/OrcV2CBindingsVeryLazy.c b/llvm/examples/OrcV2Examples/OrcV2CBindingsVeryLazy/OrcV2CBindingsVeryLazy.c index 93d36a40b12f..c557fc7795e5 100644 --- a/llvm/examples/OrcV2Examples/OrcV2CBindingsVeryLazy/OrcV2CBindingsVeryLazy.c +++ b/llvm/examples/OrcV2Examples/OrcV2CBindingsVeryLazy/OrcV2CBindingsVeryLazy.c @@ -159,7 +159,6 @@ void Materialize(void *Ctx, LLVMOrcMaterializationResponsibilityRef MR) { LLVMOrcIRTransformLayerRef IRLayer = LLVMOrcLLJITGetIRTransformLayer(J); LLVMOrcIRTransformLayerEmit(IRLayer, MR, TSM); } - return; } int main(int argc, char *argv[]) { diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 262dc16f44ab..e8a8efd5dad4 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -302,7 +302,6 @@ void LegalizerHelper::mergeMixedSubvectors(Register DstReg, appendVectorElts(AllElts, Leftover); MIRBuilder.buildMerge(DstReg, AllElts); - return; } /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs. diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index 906a48d81840..5421b2d59a1b 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -89,7 +89,6 @@ static void dumpLocationList(raw_ostream &OS, const DWARFFormValue &FormValue, U->getLocationTable().dumpLocationList(&Offset, OS, U->getBaseAddress(), MRI, Ctx.getDWARFObj(), U, DumpOpts, Indent); - return; } static void dumpLocationExpr(raw_ostream &OS, const DWARFFormValue &FormValue, @@ -105,7 +104,6 @@ static void dumpLocationExpr(raw_ostream &OS, const DWARFFormValue &FormValue, Ctx.isLittleEndian(), 0); DWARFExpression(Data, U->getAddressByteSize(), U->getFormParams().Format) .print(OS, DumpOpts, MRI, U); - return; } static DWARFDie resolveReferencedType(DWARFDie D, diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index fb6105712d1a..b40fe0cfc860 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -2324,7 +2324,6 @@ bool ShuffleVectorInst::isInsertSubvectorMask(ArrayRef Mask, } Src1Elts.setBit(i); Src1Identity &= (M == (i + NumSrcElts)); - continue; } assert((Src0Elts | Src1Elts | UndefElts).isAllOnes() && "unknown shuffle elements"); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp index 301e6f6d6f42..e79ff9b597c9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp @@ -378,5 +378,4 @@ void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI, } MI.eraseFromParent(); - return; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 4ce2c8a02194..1755b93538ce 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2395,8 +2395,6 @@ void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, OffsetLo->setVariableValue(MCBinaryExpr::createAnd(Offset, Mask, MCCtx)); auto *ShAmt = MCConstantExpr::create(32, MCCtx); OffsetHi->setVariableValue(MCBinaryExpr::createAShr(Offset, ShAmt, MCCtx)); - - return; } unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) { diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp index 798d08393eae..51060018a5ca 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp +++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp @@ -571,8 +571,6 @@ void AVRInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB, // See lib/CodeGen/RegisterRelaxation.cpp for details. // We end up here when a jump is too long for a RJMP instruction. BuildMI(&MBB, DL, get(AVR::JMPk)).addMBB(&NewDestBB); - - return; } } // end of namespace llvm diff --git a/polly/lib/External/isl/isl_int_sioimath.h b/polly/lib/External/isl/isl_int_sioimath.h index a2112cd8e2fb..dc691b8a0b1e 100644 --- a/polly/lib/External/isl/isl_int_sioimath.h +++ b/polly/lib/External/isl/isl_int_sioimath.h @@ -868,7 +868,6 @@ inline void isl_sioimath_tdiv_q(isl_sioimath_ptr dst, isl_sioimath_src lhs, isl_sioimath_bigarg_src(rhs, &rhsscratch), isl_sioimath_reinit_big(dst), NULL); isl_sioimath_try_demote(dst); - return; } /* Divide lhs by an unsigned long rhs, rounding to zero (Truncate). diff --git a/polly/lib/Transform/ManualOptimizer.cpp b/polly/lib/Transform/ManualOptimizer.cpp index 2c05927582e2..ec4c584b27e2 100644 --- a/polly/lib/Transform/ManualOptimizer.cpp +++ b/polly/lib/Transform/ManualOptimizer.cpp @@ -271,7 +271,6 @@ class SearchTransformVisitor } // not a loop transformation; look for next property - continue; } } From 34558b039b3baf057851b8d39f53402608da4927 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Sat, 25 Dec 2021 00:35:41 -0800 Subject: [PATCH 248/293] [StaticAnalyzer] Remove redundant declaration isStdSmartPtr (NFC) An identical declaration is present just a couple of lines above the line being removed in this patch. Identified with readability-redundant-declaration. --- clang/lib/StaticAnalyzer/Checkers/SmartPtr.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h b/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h index 6a40f8eda5fa..b4352b450c7f 100644 --- a/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h +++ b/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h @@ -25,8 +25,6 @@ bool isStdSmartPtrCall(const CallEvent &Call); bool isStdSmartPtr(const CXXRecordDecl *RD); bool isStdSmartPtr(const Expr *E); -bool isStdSmartPtr(const CXXRecordDecl *RD); - /// Returns whether the smart pointer is null or not. bool isNullSmartPtr(const ProgramStateRef State, const MemRegion *ThisRegion); From d86e2cc2e37c9051a5cf5a4b9b3ae4b9a961ef11 Mon Sep 17 00:00:00 2001 From: Serge Pavlov Date: Mon, 20 Sep 2021 00:12:21 +0700 Subject: [PATCH 249/293] [NFC] Method for evaluation of FCmpInst for constant operands New method `FCmpInst::compare` is added, which evaluates the given compare predicate for constant operands. Interface is made similar to `ICmpInst::compare`. Differential Revision: https://reviews.llvm.org/D116168 --- llvm/include/llvm/IR/Instructions.h | 4 +++ llvm/lib/IR/ConstantFold.cpp | 42 ++--------------------------- llvm/lib/IR/Instructions.cpp | 41 ++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 40 deletions(-) diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index ccf17628e265..84ebb461ebef 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -1456,6 +1456,10 @@ class FCmpInst: public CmpInst { /// static auto predicates() { return FCmpPredicates(); } + /// Return result of `LHS Pred RHS` comparison. + static bool compare(const APFloat &LHS, const APFloat &RHS, + FCmpInst::Predicate Pred); + /// Methods for support type inquiry through isa, cast, and dyn_cast: static bool classof(const Instruction *I) { return I->getOpcode() == Instruction::FCmp; diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp index ae48d0333d67..8668fe82601c 100644 --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -1801,46 +1801,8 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, } else if (isa(C1) && isa(C2)) { const APFloat &C1V = cast(C1)->getValueAPF(); const APFloat &C2V = cast(C2)->getValueAPF(); - APFloat::cmpResult R = C1V.compare(C2V); - switch (pred) { - default: llvm_unreachable("Invalid FCmp Predicate"); - case FCmpInst::FCMP_FALSE: return Constant::getNullValue(ResultTy); - case FCmpInst::FCMP_TRUE: return Constant::getAllOnesValue(ResultTy); - case FCmpInst::FCMP_UNO: - return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered); - case FCmpInst::FCMP_ORD: - return ConstantInt::get(ResultTy, R!=APFloat::cmpUnordered); - case FCmpInst::FCMP_UEQ: - return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered || - R==APFloat::cmpEqual); - case FCmpInst::FCMP_OEQ: - return ConstantInt::get(ResultTy, R==APFloat::cmpEqual); - case FCmpInst::FCMP_UNE: - return ConstantInt::get(ResultTy, R!=APFloat::cmpEqual); - case FCmpInst::FCMP_ONE: - return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan || - R==APFloat::cmpGreaterThan); - case FCmpInst::FCMP_ULT: - return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered || - R==APFloat::cmpLessThan); - case FCmpInst::FCMP_OLT: - return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan); - case FCmpInst::FCMP_UGT: - return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered || - R==APFloat::cmpGreaterThan); - case FCmpInst::FCMP_OGT: - return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan); - case FCmpInst::FCMP_ULE: - return ConstantInt::get(ResultTy, R!=APFloat::cmpGreaterThan); - case FCmpInst::FCMP_OLE: - return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan || - R==APFloat::cmpEqual); - case FCmpInst::FCMP_UGE: - return ConstantInt::get(ResultTy, R!=APFloat::cmpLessThan); - case FCmpInst::FCMP_OGE: - return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan || - R==APFloat::cmpEqual); - } + CmpInst::Predicate Predicate = CmpInst::Predicate(pred); + return ConstantInt::get(ResultTy, FCmpInst::compare(C1V, C2V, Predicate)); } else if (auto *C1VTy = dyn_cast(C1->getType())) { // Fast path for splatted constants. diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index b40fe0cfc860..7798af3b19b9 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -4160,6 +4160,47 @@ bool ICmpInst::compare(const APInt &LHS, const APInt &RHS, }; } +bool FCmpInst::compare(const APFloat &LHS, const APFloat &RHS, + FCmpInst::Predicate Pred) { + APFloat::cmpResult R = LHS.compare(RHS); + switch (Pred) { + default: + llvm_unreachable("Invalid FCmp Predicate"); + case FCmpInst::FCMP_FALSE: + return false; + case FCmpInst::FCMP_TRUE: + return true; + case FCmpInst::FCMP_UNO: + return R == APFloat::cmpUnordered; + case FCmpInst::FCMP_ORD: + return R != APFloat::cmpUnordered; + case FCmpInst::FCMP_UEQ: + return R == APFloat::cmpUnordered || R == APFloat::cmpEqual; + case FCmpInst::FCMP_OEQ: + return R == APFloat::cmpEqual; + case FCmpInst::FCMP_UNE: + return R != APFloat::cmpEqual; + case FCmpInst::FCMP_ONE: + return R == APFloat::cmpLessThan || R == APFloat::cmpGreaterThan; + case FCmpInst::FCMP_ULT: + return R == APFloat::cmpUnordered || R == APFloat::cmpLessThan; + case FCmpInst::FCMP_OLT: + return R == APFloat::cmpLessThan; + case FCmpInst::FCMP_UGT: + return R == APFloat::cmpUnordered || R == APFloat::cmpGreaterThan; + case FCmpInst::FCMP_OGT: + return R == APFloat::cmpGreaterThan; + case FCmpInst::FCMP_ULE: + return R != APFloat::cmpGreaterThan; + case FCmpInst::FCMP_OLE: + return R == APFloat::cmpLessThan || R == APFloat::cmpEqual; + case FCmpInst::FCMP_UGE: + return R != APFloat::cmpLessThan; + case FCmpInst::FCMP_OGE: + return R == APFloat::cmpGreaterThan || R == APFloat::cmpEqual; + } +} + CmpInst::Predicate CmpInst::getFlippedSignednessPredicate(Predicate pred) { assert(CmpInst::isRelational(pred) && "Call only with non-equality predicates!"); From 5b2e611b734cb64c5600c1c8d0a6b4881a05f874 Mon Sep 17 00:00:00 2001 From: Groverkss Date: Sat, 25 Dec 2021 22:35:57 +0530 Subject: [PATCH 250/293] [MLIR][FlatAffineConstraints][NFC] Move some static functions to be available to Presburger/ This patch moves some static functions from AffineStructures.cpp to Presburger/Utils.cpp and some to be private members of FlatAffineConstraints (which will later be moved to IntegerPolyhedron) to allow for a smoother transition for moving FlatAffineConstraints math functionality to Presburger/IntegerPolyhedron. This patch is part of a series of patches for moving math functionality to Presburger directory. Reviewed By: arjunp, bondhugula Differential Revision: https://reviews.llvm.org/D115869 --- mlir/include/mlir/Analysis/AffineStructures.h | 20 +- .../Analysis/Presburger/IntegerPolyhedron.h | 10 + mlir/include/mlir/Analysis/Presburger/Utils.h | 40 +++ mlir/lib/Analysis/AffineStructures.cpp | 231 ++---------------- mlir/lib/Analysis/Presburger/CMakeLists.txt | 2 + .../Analysis/Presburger/IntegerPolyhedron.cpp | 54 ++++ mlir/lib/Analysis/Presburger/Utils.cpp | 155 ++++++++++++ 7 files changed, 289 insertions(+), 223 deletions(-) create mode 100644 mlir/include/mlir/Analysis/Presburger/Utils.h create mode 100644 mlir/lib/Analysis/Presburger/Utils.cpp diff --git a/mlir/include/mlir/Analysis/AffineStructures.h b/mlir/include/mlir/Analysis/AffineStructures.h index 9e7ffb125f7e..ea20edd1036f 100644 --- a/mlir/include/mlir/Analysis/AffineStructures.h +++ b/mlir/include/mlir/Analysis/AffineStructures.h @@ -292,16 +292,6 @@ class FlatAffineConstraints : public IntegerPolyhedron { unsigned symStartPos, ArrayRef localExprs, MLIRContext *context) const; - /// Gather positions of all lower and upper bounds of the identifier at `pos`, - /// and optionally any equalities on it. In addition, the bounds are to be - /// independent of identifiers in position range [`offset`, `offset` + `num`). - void - getLowerAndUpperBoundIndices(unsigned pos, - SmallVectorImpl *lbIndices, - SmallVectorImpl *ubIndices, - SmallVectorImpl *eqIndices = nullptr, - unsigned offset = 0, unsigned num = 0) const; - /// Removes constraints that are independent of (i.e., do not have a /// coefficient) identifiers in the range [pos, pos + num). void removeIndependentConstraints(unsigned pos, unsigned num); @@ -419,6 +409,16 @@ class FlatAffineConstraints : public IntegerPolyhedron { /// Normalized each constraints by the GCD of its coefficients. void normalizeConstraintsByGCD(); + /// Searches for a constraint with a non-zero coefficient at `colIdx` in + /// equality (isEq=true) or inequality (isEq=false) constraints. + /// Returns true and sets row found in search in `rowIdx`, false otherwise. + bool findConstraintWithNonZeroAt(unsigned colIdx, bool isEq, + unsigned *rowIdx) const; + + /// Returns true if the pos^th column is all zero for both inequalities and + /// equalities. + bool isColZero(unsigned pos) const; + /// A parameter that controls detection of an unrealistic number of /// constraints. If the number of constraints is this many times the number of /// variables, we consider such a system out of line with the intended use diff --git a/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h b/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h index 933f20e44122..c7eae0cd29ee 100644 --- a/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h +++ b/mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h @@ -185,6 +185,16 @@ class IntegerPolyhedron { /// Removes all equalities and inequalities. void clearConstraints(); + /// Gather positions of all lower and upper bounds of the identifier at `pos`, + /// and optionally any equalities on it. In addition, the bounds are to be + /// independent of identifiers in position range [`offset`, `offset` + `num`). + void + getLowerAndUpperBoundIndices(unsigned pos, + SmallVectorImpl *lbIndices, + SmallVectorImpl *ubIndices, + SmallVectorImpl *eqIndices = nullptr, + unsigned offset = 0, unsigned num = 0) const; + protected: /// Return the index at which the specified kind of id starts. unsigned getIdKindOffset(IdKind kind) const; diff --git a/mlir/include/mlir/Analysis/Presburger/Utils.h b/mlir/include/mlir/Analysis/Presburger/Utils.h new file mode 100644 index 000000000000..0b9c2c707d3e --- /dev/null +++ b/mlir/include/mlir/Analysis/Presburger/Utils.h @@ -0,0 +1,40 @@ +//===- Utils.h - General utilities for Presburger library ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utility functions required by the Presburger Library. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_ANALYSIS_PRESBURGER_UTILS_H +#define MLIR_ANALYSIS_PRESBURGER_UTILS_H + +#include "mlir/Support/LLVM.h" + +namespace mlir { + +class IntegerPolyhedron; + +namespace presburger_utils { + +/// Check if the pos^th identifier can be expressed as a floordiv of an affine +/// function of other identifiers (where the divisor is a positive constant). +/// `foundRepr` contains a boolean for each identifier indicating if the +/// explicit representation for that identifier has already been computed. +/// Returns the upper and lower bound inequalities using which the floordiv +/// can be computed. If the representation could be computed, `dividend` and +/// `denominator` are set. If the representation could not be computed, +/// `llvm::None` is returned. +Optional> +computeSingleVarRepr(const IntegerPolyhedron &cst, ArrayRef foundRepr, + unsigned pos, SmallVector ÷nd, + unsigned &divisor); + +} // namespace presburger_utils +} // namespace mlir + +#endif // MLIR_ANALYSIS_PRESBURGER_UTILS_H diff --git a/mlir/lib/Analysis/AffineStructures.cpp b/mlir/lib/Analysis/AffineStructures.cpp index f4d857479cde..520262d6fddc 100644 --- a/mlir/lib/Analysis/AffineStructures.cpp +++ b/mlir/lib/Analysis/AffineStructures.cpp @@ -13,6 +13,7 @@ #include "mlir/Analysis/AffineStructures.h" #include "mlir/Analysis/LinearTransform.h" #include "mlir/Analysis/Presburger/Simplex.h" +#include "mlir/Analysis/Presburger/Utils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Affine/IR/AffineValueMap.h" #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h" @@ -700,14 +701,13 @@ void FlatAffineValueConstraints::addAffineIfOpDomain(AffineIfOp ifOp) { // Searches for a constraint with a non-zero coefficient at `colIdx` in // equality (isEq=true) or inequality (isEq=false) constraints. // Returns true and sets row found in search in `rowIdx`, false otherwise. -static bool findConstraintWithNonZeroAt(const FlatAffineConstraints &cst, - unsigned colIdx, bool isEq, - unsigned *rowIdx) { - assert(colIdx < cst.getNumCols() && "position out of bounds"); +bool FlatAffineConstraints::findConstraintWithNonZeroAt( + unsigned colIdx, bool isEq, unsigned *rowIdx) const { + assert(colIdx < getNumCols() && "position out of bounds"); auto at = [&](unsigned rowIdx) -> int64_t { - return isEq ? cst.atEq(rowIdx, colIdx) : cst.atIneq(rowIdx, colIdx); + return isEq ? atEq(rowIdx, colIdx) : atIneq(rowIdx, colIdx); }; - unsigned e = isEq ? cst.getNumEqualities() : cst.getNumInequalities(); + unsigned e = isEq ? getNumEqualities() : getNumInequalities(); for (*rowIdx = 0; *rowIdx < e; ++(*rowIdx)) { if (at(*rowIdx) != 0) { return true; @@ -1203,145 +1203,6 @@ bool FlatAffineConstraints::containsPoint(ArrayRef point) const { return true; } -/// Check if the pos^th identifier can be represented as a division using upper -/// bound inequality at position `ubIneq` and lower bound inequality at position -/// `lbIneq`. -/// -/// Let `id` be the pos^th identifier, then `id` is equivalent to -/// `expr floordiv divisor` if there are constraints of the form: -/// 0 <= expr - divisor * id <= divisor - 1 -/// Rearranging, we have: -/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' -/// -divisor * id + expr >= 0 <-- Upper bound for 'id' -/// -/// For example: -/// 32*k >= 16*i + j - 31 <-- Lower bound for 'k' -/// 32*k <= 16*i + j <-- Upper bound for 'k' -/// expr = 16*i + j, divisor = 32 -/// k = ( 16*i + j ) floordiv 32 -/// -/// 4q >= i + j - 2 <-- Lower bound for 'q' -/// 4q <= i + j + 1 <-- Upper bound for 'q' -/// expr = i + j + 1, divisor = 4 -/// q = (i + j + 1) floordiv 4 -// -/// This function also supports detecting divisions from bounds that are -/// strictly tighter than the division bounds described above, since tighter -/// bounds imply the division bounds. For example: -/// 4q - i - j + 2 >= 0 <-- Lower bound for 'q' -/// -4q + i + j >= 0 <-- Tight upper bound for 'q' -/// -/// To extract floor divisions with tighter bounds, we assume that that the -/// constraints are of the form: -/// c <= expr - divisior * id <= divisor - 1, where 0 <= c <= divisor - 1 -/// Rearranging, we have: -/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' -/// -divisor * id + expr - c >= 0 <-- Upper bound for 'id' -/// -/// If successful, `expr` is set to dividend of the division and `divisor` is -/// set to the denominator of the division. -static LogicalResult getDivRepr(const FlatAffineConstraints &cst, unsigned pos, - unsigned ubIneq, unsigned lbIneq, - SmallVector &expr, - unsigned &divisor) { - - assert(pos <= cst.getNumIds() && "Invalid identifier position"); - assert(ubIneq <= cst.getNumInequalities() && - "Invalid upper bound inequality position"); - assert(lbIneq <= cst.getNumInequalities() && - "Invalid upper bound inequality position"); - - // Extract divisor from the lower bound. - divisor = cst.atIneq(lbIneq, pos); - - // First, check if the constraints are opposite of each other except the - // constant term. - unsigned i = 0, e = 0; - for (i = 0, e = cst.getNumIds(); i < e; ++i) - if (cst.atIneq(ubIneq, i) != -cst.atIneq(lbIneq, i)) - break; - - if (i < e) - return failure(); - - // Then, check if the constant term is of the proper form. - // Due to the form of the upper/lower bound inequalities, the sum of their - // constants is `divisor - 1 - c`. From this, we can extract c: - int64_t constantSum = cst.atIneq(lbIneq, cst.getNumCols() - 1) + - cst.atIneq(ubIneq, cst.getNumCols() - 1); - int64_t c = divisor - 1 - constantSum; - - // Check if `c` satisfies the condition `0 <= c <= divisor - 1`. This also - // implictly checks that `divisor` is positive. - if (!(c >= 0 && c <= divisor - 1)) - return failure(); - - // The inequality pair can be used to extract the division. - // Set `expr` to the dividend of the division except the constant term, which - // is set below. - expr.resize(cst.getNumCols(), 0); - for (i = 0, e = cst.getNumIds(); i < e; ++i) - if (i != pos) - expr[i] = cst.atIneq(ubIneq, i); - - // From the upper bound inequality's form, its constant term is equal to the - // constant term of `expr`, minus `c`. From this, - // constant term of `expr` = constant term of upper bound + `c`. - expr.back() = cst.atIneq(ubIneq, cst.getNumCols() - 1) + c; - - return success(); -} - -/// Check if the pos^th identifier can be expressed as a floordiv of an affine -/// function of other identifiers (where the divisor is a positive constant). -/// `foundRepr` contains a boolean for each identifier indicating if the -/// explicit representation for that identifier has already been computed. -/// Returns the upper and lower bound inequalities using which the floordiv can -/// be computed. If the representation could be computed, `dividend` and -/// `denominator` are set. If the representation could not be computed, -/// `llvm::None` is returned. -static Optional> -computeSingleVarRepr(const FlatAffineConstraints &cst, - const SmallVector &foundRepr, unsigned pos, - SmallVector ÷nd, unsigned &divisor) { - assert(pos < cst.getNumIds() && "invalid position"); - assert(foundRepr.size() == cst.getNumIds() && - "Size of foundRepr does not match total number of variables"); - - SmallVector lbIndices, ubIndices; - cst.getLowerAndUpperBoundIndices(pos, &lbIndices, &ubIndices); - - for (unsigned ubPos : ubIndices) { - for (unsigned lbPos : lbIndices) { - // Attempt to get divison representation from ubPos, lbPos. - if (failed(getDivRepr(cst, pos, ubPos, lbPos, dividend, divisor))) - continue; - - // Check if the inequalities depend on a variable for which - // an explicit representation has not been found yet. - // Exit to avoid circular dependencies between divisions. - unsigned c, f; - for (c = 0, f = cst.getNumIds(); c < f; ++c) { - if (c == pos) - continue; - if (!foundRepr[c] && dividend[c] != 0) - break; - } - - // Expression can't be constructed as it depends on a yet unknown - // identifier. - // TODO: Visit/compute the identifiers in an order so that this doesn't - // happen. More complex but much more efficient. - if (c < f) - continue; - - return std::make_pair(ubPos, lbPos); - } - } - - return llvm::None; -} - void FlatAffineConstraints::getLocalReprs( std::vector>> &repr) const { std::vector> dividends(getNumLocalIds()); @@ -1378,8 +1239,9 @@ void FlatAffineConstraints::getLocalReprs( changed = false; for (unsigned i = 0, e = getNumLocalIds(); i < e; ++i) { if (!foundRepr[i + divOffset]) { - if (auto res = computeSingleVarRepr(*this, foundRepr, divOffset + i, - dividends[i], denominators[i])) { + if (auto res = presburger_utils::computeSingleVarRepr( + *this, foundRepr, divOffset + i, dividends[i], + denominators[i])) { foundRepr[i + divOffset] = true; repr[i] = res; changed = true; @@ -1437,11 +1299,9 @@ unsigned FlatAffineConstraints::gaussianEliminateIds(unsigned posStart, for (pivotCol = posStart; pivotCol < posLimit; ++pivotCol) { // Find a row which has a non-zero coefficient in column 'j'. unsigned pivotRow; - if (!findConstraintWithNonZeroAt(*this, pivotCol, /*isEq=*/true, - &pivotRow)) { + if (!findConstraintWithNonZeroAt(pivotCol, /*isEq=*/true, &pivotRow)) { // No pivot row in equalities with non-zero at 'pivotCol'. - if (!findConstraintWithNonZeroAt(*this, pivotCol, /*isEq=*/false, - &pivotRow)) { + if (!findConstraintWithNonZeroAt(pivotCol, /*isEq=*/false, &pivotRow)) { // If inequalities are also non-zero in 'pivotCol', it can be // eliminated. continue; @@ -1596,60 +1456,6 @@ static bool detectAsMod(const FlatAffineConstraints &cst, unsigned pos, return false; } -/// Gather all lower and upper bounds of the identifier at `pos`, and -/// optionally any equalities on it. In addition, the bounds are to be -/// independent of identifiers in position range [`offset`, `offset` + `num`). -void FlatAffineConstraints::getLowerAndUpperBoundIndices( - unsigned pos, SmallVectorImpl *lbIndices, - SmallVectorImpl *ubIndices, SmallVectorImpl *eqIndices, - unsigned offset, unsigned num) const { - assert(pos < getNumIds() && "invalid position"); - assert(offset + num < getNumCols() && "invalid range"); - - // Checks for a constraint that has a non-zero coeff for the identifiers in - // the position range [offset, offset + num) while ignoring `pos`. - auto containsConstraintDependentOnRange = [&](unsigned r, bool isEq) { - unsigned c, f; - auto cst = isEq ? getEquality(r) : getInequality(r); - for (c = offset, f = offset + num; c < f; ++c) { - if (c == pos) - continue; - if (cst[c] != 0) - break; - } - return c < f; - }; - - // Gather all lower bounds and upper bounds of the variable. Since the - // canonical form c_1*x_1 + c_2*x_2 + ... + c_0 >= 0, a constraint is a lower - // bound for x_i if c_i >= 1, and an upper bound if c_i <= -1. - for (unsigned r = 0, e = getNumInequalities(); r < e; r++) { - // The bounds are to be independent of [offset, offset + num) columns. - if (containsConstraintDependentOnRange(r, /*isEq=*/false)) - continue; - if (atIneq(r, pos) >= 1) { - // Lower bound. - lbIndices->push_back(r); - } else if (atIneq(r, pos) <= -1) { - // Upper bound. - ubIndices->push_back(r); - } - } - - // An equality is both a lower and upper bound. Record any equalities - // involving the pos^th identifier. - if (!eqIndices) - return; - - for (unsigned r = 0, e = getNumEqualities(); r < e; r++) { - if (atEq(r, pos) == 0) - continue; - if (containsConstraintDependentOnRange(r, /*isEq=*/true)) - continue; - eqIndices->push_back(r); - } -} - /// Check if the pos^th identifier can be expressed as a floordiv of an affine /// function of other identifiers (where the divisor is a positive constant) /// given the initial set of expressions in `exprs`. If it can be, the @@ -1670,7 +1476,8 @@ static bool detectAsFloorDiv(const FlatAffineConstraints &cst, unsigned pos, SmallVector dividend; unsigned divisor; - auto ulPair = computeSingleVarRepr(cst, foundRepr, pos, dividend, divisor); + auto ulPair = presburger_utils::computeSingleVarRepr(cst, foundRepr, pos, + dividend, divisor); // No upper-lower bound pair found for this var. if (!ulPair) @@ -2109,7 +1916,7 @@ void FlatAffineConstraints::getSliceBounds(unsigned offset, unsigned num, // Detect an identifier as an expression of other identifiers. unsigned idx; - if (!findConstraintWithNonZeroAt(*this, pos, /*isEq=*/true, &idx)) { + if (!findConstraintWithNonZeroAt(pos, /*isEq=*/true, &idx)) { continue; } @@ -3447,12 +3254,10 @@ void FlatAffineValueConstraints::getIneqAsAffineValueMap( vmap.reset(AffineMap::get(numDims - 1, numSyms, boundExpr), operands); } -/// Returns true if the pos^th column is all zero for both inequalities and -/// equalities.. -static bool isColZero(const FlatAffineConstraints &cst, unsigned pos) { +bool FlatAffineConstraints::isColZero(unsigned pos) const { unsigned rowPos; - return !findConstraintWithNonZeroAt(cst, pos, /*isEq=*/false, &rowPos) && - !findConstraintWithNonZeroAt(cst, pos, /*isEq=*/true, &rowPos); + return !findConstraintWithNonZeroAt(pos, /*isEq=*/false, &rowPos) && + !findConstraintWithNonZeroAt(pos, /*isEq=*/true, &rowPos); } IntegerSet FlatAffineConstraints::getAsIntegerSet(MLIRContext *context) const { @@ -3471,7 +3276,7 @@ IntegerSet FlatAffineConstraints::getAsIntegerSet(MLIRContext *context) const { SmallVector noLocalRepVars; unsigned numDimsSymbols = getNumDimAndSymbolIds(); for (unsigned i = numDimsSymbols, e = getNumIds(); i < e; ++i) { - if (!memo[i] && !isColZero(*this, /*pos=*/i)) + if (!memo[i] && !isColZero(/*pos=*/i)) noLocalRepVars.push_back(i - numDimsSymbols); } if (!noLocalRepVars.empty()) { diff --git a/mlir/lib/Analysis/Presburger/CMakeLists.txt b/mlir/lib/Analysis/Presburger/CMakeLists.txt index dd8c8d96d872..d52d4ccdb1c2 100644 --- a/mlir/lib/Analysis/Presburger/CMakeLists.txt +++ b/mlir/lib/Analysis/Presburger/CMakeLists.txt @@ -2,10 +2,12 @@ add_mlir_library(MLIRPresburger IntegerPolyhedron.cpp Matrix.cpp Simplex.cpp + Utils.cpp DEPENDS MLIRBuiltinLocationAttributesIncGen LINK_LIBS PUBLIC MLIRIR + MLIRSupport ) diff --git a/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp b/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp index 3c4f512bf4fb..958f52e2625e 100644 --- a/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp +++ b/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp @@ -217,3 +217,57 @@ void IntegerPolyhedron::clearConstraints() { equalities.resizeVertically(0); inequalities.resizeVertically(0); } + +/// Gather all lower and upper bounds of the identifier at `pos`, and +/// optionally any equalities on it. In addition, the bounds are to be +/// independent of identifiers in position range [`offset`, `offset` + `num`). +void IntegerPolyhedron::getLowerAndUpperBoundIndices( + unsigned pos, SmallVectorImpl *lbIndices, + SmallVectorImpl *ubIndices, SmallVectorImpl *eqIndices, + unsigned offset, unsigned num) const { + assert(pos < getNumIds() && "invalid position"); + assert(offset + num < getNumCols() && "invalid range"); + + // Checks for a constraint that has a non-zero coeff for the identifiers in + // the position range [offset, offset + num) while ignoring `pos`. + auto containsConstraintDependentOnRange = [&](unsigned r, bool isEq) { + unsigned c, f; + auto cst = isEq ? getEquality(r) : getInequality(r); + for (c = offset, f = offset + num; c < f; ++c) { + if (c == pos) + continue; + if (cst[c] != 0) + break; + } + return c < f; + }; + + // Gather all lower bounds and upper bounds of the variable. Since the + // canonical form c_1*x_1 + c_2*x_2 + ... + c_0 >= 0, a constraint is a lower + // bound for x_i if c_i >= 1, and an upper bound if c_i <= -1. + for (unsigned r = 0, e = getNumInequalities(); r < e; r++) { + // The bounds are to be independent of [offset, offset + num) columns. + if (containsConstraintDependentOnRange(r, /*isEq=*/false)) + continue; + if (atIneq(r, pos) >= 1) { + // Lower bound. + lbIndices->push_back(r); + } else if (atIneq(r, pos) <= -1) { + // Upper bound. + ubIndices->push_back(r); + } + } + + // An equality is both a lower and upper bound. Record any equalities + // involving the pos^th identifier. + if (!eqIndices) + return; + + for (unsigned r = 0, e = getNumEqualities(); r < e; r++) { + if (atEq(r, pos) == 0) + continue; + if (containsConstraintDependentOnRange(r, /*isEq=*/true)) + continue; + eqIndices->push_back(r); + } +} diff --git a/mlir/lib/Analysis/Presburger/Utils.cpp b/mlir/lib/Analysis/Presburger/Utils.cpp new file mode 100644 index 000000000000..8fb9390a440e --- /dev/null +++ b/mlir/lib/Analysis/Presburger/Utils.cpp @@ -0,0 +1,155 @@ +//===- Utils.cpp - General utilities for Presburger library ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Utility functions required by the Presburger Library. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Analysis/Presburger/Utils.h" +#include "mlir/Analysis/Presburger/IntegerPolyhedron.h" +#include "mlir/Support/LogicalResult.h" + +using namespace mlir; + +/// Check if the pos^th identifier can be represented as a division using upper +/// bound inequality at position `ubIneq` and lower bound inequality at position +/// `lbIneq`. +/// +/// Let `id` be the pos^th identifier, then `id` is equivalent to +/// `expr floordiv divisor` if there are constraints of the form: +/// 0 <= expr - divisor * id <= divisor - 1 +/// Rearranging, we have: +/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' +/// -divisor * id + expr >= 0 <-- Upper bound for 'id' +/// +/// For example: +/// 32*k >= 16*i + j - 31 <-- Lower bound for 'k' +/// 32*k <= 16*i + j <-- Upper bound for 'k' +/// expr = 16*i + j, divisor = 32 +/// k = ( 16*i + j ) floordiv 32 +/// +/// 4q >= i + j - 2 <-- Lower bound for 'q' +/// 4q <= i + j + 1 <-- Upper bound for 'q' +/// expr = i + j + 1, divisor = 4 +/// q = (i + j + 1) floordiv 4 +// +/// This function also supports detecting divisions from bounds that are +/// strictly tighter than the division bounds described above, since tighter +/// bounds imply the division bounds. For example: +/// 4q - i - j + 2 >= 0 <-- Lower bound for 'q' +/// -4q + i + j >= 0 <-- Tight upper bound for 'q' +/// +/// To extract floor divisions with tighter bounds, we assume that that the +/// constraints are of the form: +/// c <= expr - divisior * id <= divisor - 1, where 0 <= c <= divisor - 1 +/// Rearranging, we have: +/// divisor * id - expr + (divisor - 1) >= 0 <-- Lower bound for 'id' +/// -divisor * id + expr - c >= 0 <-- Upper bound for 'id' +/// +/// If successful, `expr` is set to dividend of the division and `divisor` is +/// set to the denominator of the division. +static LogicalResult getDivRepr(const IntegerPolyhedron &cst, unsigned pos, + unsigned ubIneq, unsigned lbIneq, + SmallVector &expr, + unsigned &divisor) { + + assert(pos <= cst.getNumIds() && "Invalid identifier position"); + assert(ubIneq <= cst.getNumInequalities() && + "Invalid upper bound inequality position"); + assert(lbIneq <= cst.getNumInequalities() && + "Invalid upper bound inequality position"); + + // Extract divisor from the lower bound. + divisor = cst.atIneq(lbIneq, pos); + + // First, check if the constraints are opposite of each other except the + // constant term. + unsigned i = 0, e = 0; + for (i = 0, e = cst.getNumIds(); i < e; ++i) + if (cst.atIneq(ubIneq, i) != -cst.atIneq(lbIneq, i)) + break; + + if (i < e) + return failure(); + + // Then, check if the constant term is of the proper form. + // Due to the form of the upper/lower bound inequalities, the sum of their + // constants is `divisor - 1 - c`. From this, we can extract c: + int64_t constantSum = cst.atIneq(lbIneq, cst.getNumCols() - 1) + + cst.atIneq(ubIneq, cst.getNumCols() - 1); + int64_t c = divisor - 1 - constantSum; + + // Check if `c` satisfies the condition `0 <= c <= divisor - 1`. This also + // implictly checks that `divisor` is positive. + if (!(c >= 0 && c <= divisor - 1)) + return failure(); + + // The inequality pair can be used to extract the division. + // Set `expr` to the dividend of the division except the constant term, which + // is set below. + expr.resize(cst.getNumCols(), 0); + for (i = 0, e = cst.getNumIds(); i < e; ++i) + if (i != pos) + expr[i] = cst.atIneq(ubIneq, i); + + // From the upper bound inequality's form, its constant term is equal to the + // constant term of `expr`, minus `c`. From this, + // constant term of `expr` = constant term of upper bound + `c`. + expr.back() = cst.atIneq(ubIneq, cst.getNumCols() - 1) + c; + + return success(); +} + +/// Check if the pos^th identifier can be expressed as a floordiv of an affine +/// function of other identifiers (where the divisor is a positive constant). +/// `foundRepr` contains a boolean for each identifier indicating if the +/// explicit representation for that identifier has already been computed. +/// Returns the upper and lower bound inequalities using which the floordiv can +/// be computed. If the representation could be computed, `dividend` and +/// `denominator` are set. If the representation could not be computed, +/// `llvm::None` is returned. +Optional> presburger_utils::computeSingleVarRepr( + const IntegerPolyhedron &cst, ArrayRef foundRepr, unsigned pos, + SmallVector ÷nd, unsigned &divisor) { + assert(pos < cst.getNumIds() && "invalid position"); + assert(foundRepr.size() == cst.getNumIds() && + "Size of foundRepr does not match total number of variables"); + + SmallVector lbIndices, ubIndices; + cst.getLowerAndUpperBoundIndices(pos, &lbIndices, &ubIndices); + + for (unsigned ubPos : ubIndices) { + for (unsigned lbPos : lbIndices) { + // Attempt to get divison representation from ubPos, lbPos. + if (failed(getDivRepr(cst, pos, ubPos, lbPos, dividend, divisor))) + continue; + + // Check if the inequalities depend on a variable for which + // an explicit representation has not been found yet. + // Exit to avoid circular dependencies between divisions. + unsigned c, f; + for (c = 0, f = cst.getNumIds(); c < f; ++c) { + if (c == pos) + continue; + if (!foundRepr[c] && dividend[c] != 0) + break; + } + + // Expression can't be constructed as it depends on a yet unknown + // identifier. + // TODO: Visit/compute the identifiers in an order so that this doesn't + // happen. More complex but much more efficient. + if (c < f) + continue; + + return std::make_pair(ubPos, lbPos); + } + } + + return llvm::None; +} From 46cdcf08730012128173cd261767a7d12898c8d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Sat, 25 Dec 2021 19:37:45 +0000 Subject: [PATCH 251/293] [lldb] Add support for UTF-8 unicode formatting This patch adds missing formatting for UTF-8 unicode. Cross-referencing https://reviews.llvm.org/D66447 Reviewed By: labath Differential Revision: https://reviews.llvm.org/D112564 --- lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp | 3 +++ .../data-formatter/builtin-formats/TestBuiltinFormats.py | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 0df95594eea2..88c3aedb4c6b 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -5149,6 +5149,8 @@ lldb::Format TypeSystemClang::GetFormat(lldb::opaque_compiler_type_t type) { case clang::BuiltinType::UChar: case clang::BuiltinType::WChar_U: return lldb::eFormatChar; + case clang::BuiltinType::Char8: + return lldb::eFormatUnicode8; case clang::BuiltinType::Char16: return lldb::eFormatUnicode16; case clang::BuiltinType::Char32: @@ -8957,6 +8959,7 @@ bool TypeSystemClang::DumpTypeValue( case eFormatCharPrintable: case eFormatCharArray: case eFormatBytes: + case eFormatUnicode8: case eFormatBytesWithASCII: item_count = byte_size; byte_size = 1; diff --git a/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py b/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py index c894b80228cf..7763305b58db 100644 --- a/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py +++ b/lldb/test/API/functionalities/data-formatter/builtin-formats/TestBuiltinFormats.py @@ -115,8 +115,7 @@ def test(self): self.assertIn('= \\0\\e90zaZA\\v\\t\\r\\n\\f\\b\\a \n', self.getFormatted("character array", string_expr)) self.assertIn('= \\0\\e90zaZA\\v\\t\\r\\n\\f\\b\\a \n', self.getFormatted("character", string_expr)) self.assertIn('= ..90zaZA....... \n', self.getFormatted("printable character", string_expr)) - # FIXME: This should probably print the characters in the uint128_t. - self.assertIn('= 0x2007080c0a0d090b415a617a30391b00\n', self.getFormatted("unicode8", string_expr)) + self.assertIn('= 0x00 0x1b 0x39 0x30 0x7a 0x61 0x5a 0x41 0x0b 0x09 0x0d 0x0a 0x0c 0x08 0x07 0x20\n', self.getFormatted("unicode8", string_expr)) # OSType ostype_expr = "(__UINT64_TYPE__)0x" @@ -137,6 +136,9 @@ def test(self): # bytes with ASCII self.assertIn(r'= " \U0000001b\a\b\f\n\r\t\vaA09\0"', self.getFormatted("bytes with ASCII", "cstring")) + # unicode8 + self.assertIn('= 0x78 0x56 0x34 0x12\n', self.getFormatted("unicode8", "0x12345678")) + # unicode16 self.assertIn('= U+5678 U+1234\n', self.getFormatted("unicode16", "0x12345678")) From 0c553cc1af2e4c14100df6cf4a6fc91987e778e6 Mon Sep 17 00:00:00 2001 From: Arjun P Date: Sun, 26 Dec 2021 01:48:19 +0530 Subject: [PATCH 252/293] [MLIR] Add forgotten directory Support to unittests cmake The Support directory was removed from the unittests cmake when the directory was removed in 204c3b551626a925dfdc3822a6f240bdc8ef5d3a. Subsequent commits added the directory back but seem to have missed adding it back to the cmake. This patch also removes MLIRSupportIndentedStream from the list of linked libraries to avoid an ODR violation (it's already part of MLIRSupport which is also being linked here). Otherwise ASAN complains: ``` ================================================================= ==102592==ERROR: AddressSanitizer: odr-violation (0x7fbdf214eee0): [1] size=120 'vtable for mlir::raw_indented_ostream' /home/arjun/llvm-project/mlir/lib/Support/IndentedOstream.cpp [2] size=120 'vtable for mlir::raw_indented_ostream' /home/arjun/llvm-project/mlir/lib/Support/IndentedOstream.cpp These globals were registered at these points: [1]: #0 0x28a71d in __asan_register_globals (/home/arjun/llvm-project/build/tools/mlir/unittests/Support/MLIRSupportTests+0x28a71d) #1 0x7fbdf214a61b in asan.module_ctor (/home/arjun/llvm-project/build/lib/libMLIRSupportIndentedOstream.so.14git+0x661b) [2]: #0 0x28a71d in __asan_register_globals (/home/arjun/llvm-project/build/tools/mlir/unittests/Support/MLIRSupportTests+0x28a71d) #1 0x7fbdf2061c4b in asan.module_ctor (/home/arjun/llvm-project/build/lib/libMLIRSupport.so.14git+0x11bc4b) ==102592==HINT: if you don't care about these errors you may set ASAN_OPTIONS=detect_odr_violation=0 SUMMARY AddressSanitizer: odr-violation: global 'vtable for mlir::raw_indented_ostream' at /home/arjun/llvm-project/mlir/lib/Support/IndentedOstream.cpp ==102592==ABORTING ``` Reviewed By: jpienaar Differential Revision: https://reviews.llvm.org/D116027 --- mlir/include/mlir/Support/DebugAction.h | 3 +-- mlir/unittests/CMakeLists.txt | 1 + mlir/unittests/Support/CMakeLists.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/Support/DebugAction.h b/mlir/include/mlir/Support/DebugAction.h index 227d213b0dda..4dc04476b583 100644 --- a/mlir/include/mlir/Support/DebugAction.h +++ b/mlir/include/mlir/Support/DebugAction.h @@ -205,8 +205,7 @@ template class DebugAction { /// Provide classof to allow casting between handler types. static bool classof(const DebugActionManager::HandlerBase *handler) { - return handler->getHandlerID() == - TypeID::get::Handler>(); + return handler->getHandlerID() == TypeID::get(); } }; diff --git a/mlir/unittests/CMakeLists.txt b/mlir/unittests/CMakeLists.txt index 21506862a302..2798c443cf98 100644 --- a/mlir/unittests/CMakeLists.txt +++ b/mlir/unittests/CMakeLists.txt @@ -11,6 +11,7 @@ add_subdirectory(ExecutionEngine) add_subdirectory(Interfaces) add_subdirectory(IR) add_subdirectory(Pass) +add_subdirectory(Support) add_subdirectory(Rewrite) add_subdirectory(TableGen) add_subdirectory(Transforms) diff --git a/mlir/unittests/Support/CMakeLists.txt b/mlir/unittests/Support/CMakeLists.txt index 6616a793ec12..fd1e66205c07 100644 --- a/mlir/unittests/Support/CMakeLists.txt +++ b/mlir/unittests/Support/CMakeLists.txt @@ -7,4 +7,4 @@ add_mlir_unittest(MLIRSupportTests ) target_link_libraries(MLIRSupportTests - PRIVATE MLIRSupportIndentedOstream MLIRSupport) + PRIVATE MLIRSupport) From dd4f5d4ae57703a9b79e466e027674fbeac80d41 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 25 Dec 2021 14:23:12 -0800 Subject: [PATCH 253/293] [ELF] De-template handleTlsRelocation. NFC --- lld/ELF/Relocations.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index cfe49007b814..e0dc9a9fa3ea 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1145,10 +1145,9 @@ static unsigned handleMipsTlsRelocation(RelType type, Symbol &sym, // symbol in TLS block. // // Returns the number of relocations processed. -template -static unsigned -handleTlsRelocation(RelType type, Symbol &sym, InputSectionBase &c, - typename ELFT::uint offset, int64_t addend, RelExpr expr) { +static unsigned handleTlsRelocation(RelType type, Symbol &sym, + InputSectionBase &c, uint64_t offset, + int64_t addend, RelExpr expr) { if (!sym.isTls()) return 0; @@ -1354,8 +1353,8 @@ static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, getLocation(sec, sym, offset)); return; } - } else if (unsigned processed = handleTlsRelocation( - type, sym, sec, offset, addend, expr)) { + } else if (unsigned processed = + handleTlsRelocation(type, sym, sec, offset, addend, expr)) { i += (processed - 1); return; } From a00f480fe8ee5236abfe6ba89ec17f113b6fe132 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sat, 25 Dec 2021 14:34:05 -0800 Subject: [PATCH 254/293] [ELF] scanReloc: remove unused start parameter. NFC This was once used as a workaround for detecting missing PPC64 TLSGD/TLSLD relocations produced by ancient IBM XL C/C++. --- lld/ELF/Relocations.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index e0dc9a9fa3ea..719386e2e71f 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1263,7 +1263,7 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, template static void scanReloc(InputSectionBase &sec, OffsetGetter &getOffset, RelTy *&i, - RelTy *start, RelTy *end) { + RelTy *end) { const RelTy &rel = *i; uint32_t symIndex = rel.getSymbol(config->isMips64EL); Symbol &sym = sec.getFile()->getSymbol(symIndex); @@ -1470,7 +1470,7 @@ static void scanRelocs(InputSectionBase &sec, ArrayRef rels) { rels = sortRels(rels, storage); for (auto i = rels.begin(), end = rels.end(); i != end;) - scanReloc(sec, getOffset, i, rels.begin(), end); + scanReloc(sec, getOffset, i, end); // Sort relocations by offset for more efficient searching for // R_RISCV_PCREL_HI20 and R_PPC64_ADDR64. From bba228877c40aceabe0977c8bec840af49b9d2d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Tue, 21 Dec 2021 20:28:58 +0000 Subject: [PATCH 255/293] [lldb] Add UTF-8 char basic type --- lldb/bindings/python/python-extensions.swig | 1 + lldb/docs/python_api_enums.rst | 1 + lldb/include/lldb/lldb-enumerations.h | 1 + 3 files changed, 3 insertions(+) diff --git a/lldb/bindings/python/python-extensions.swig b/lldb/bindings/python/python-extensions.swig index b98b0d458f0c..398df708830a 100644 --- a/lldb/bindings/python/python-extensions.swig +++ b/lldb/bindings/python/python-extensions.swig @@ -563,6 +563,7 @@ def is_numeric_type(basic_type): if basic_type == eBasicTypeWChar: return (True,False) if basic_type == eBasicTypeSignedWChar: return (True,True) if basic_type == eBasicTypeUnsignedWChar: return (True,False) + if basic_type == eBasicTypeChar8: return (True,False) if basic_type == eBasicTypeChar16: return (True,False) if basic_type == eBasicTypeChar32: return (True,False) if basic_type == eBasicTypeShort: return (True,True) diff --git a/lldb/docs/python_api_enums.rst b/lldb/docs/python_api_enums.rst index 58100b2aa9a2..1c363381a11b 100644 --- a/lldb/docs/python_api_enums.rst +++ b/lldb/docs/python_api_enums.rst @@ -1017,6 +1017,7 @@ BasicType .. py:data:: eBasicTypeWChar .. py:data:: eBasicTypeSignedWChar .. py:data:: eBasicTypeUnsignedWChar +.. py:data:: eBasicTypeChar8 .. py:data:: eBasicTypeChar16 .. py:data:: eBasicTypeChar32 .. py:data:: eBasicTypeShort diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index f5cabc02bd84..b459d6b7ec3b 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -747,6 +747,7 @@ enum BasicType { eBasicTypeWChar, eBasicTypeSignedWChar, eBasicTypeUnsignedWChar, + eBasicTypeChar8, eBasicTypeChar16, eBasicTypeChar32, eBasicTypeShort, From d8c5a68796b0f52f856cd902f9670b6a3238b425 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Wed, 22 Dec 2021 00:23:30 +0000 Subject: [PATCH 256/293] [lldb] Add UTF-8 Basic Type on Clang TypeSystem UTF-8 mapping from Clang to LLDB basic type was missing. This patch adds that to be consistent with UTF-16 and UTF-32 basic types. --- lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp | 4 ++++ lldb/unittests/Symbol/TestTypeSystemClang.cpp | 2 ++ 2 files changed, 6 insertions(+) diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 88c3aedb4c6b..126009c93bb7 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -1999,6 +1999,8 @@ TypeSystemClang::GetOpaqueCompilerType(clang::ASTContext *ast, return ast->getSignedWCharType().getAsOpaquePtr(); case eBasicTypeUnsignedWChar: return ast->getUnsignedWCharType().getAsOpaquePtr(); + case eBasicTypeChar8: + return ast->Char8Ty.getAsOpaquePtr(); case eBasicTypeChar16: return ast->Char16Ty.getAsOpaquePtr(); case eBasicTypeChar32: @@ -5444,6 +5446,8 @@ TypeSystemClang::GetBasicTypeEnumeration(lldb::opaque_compiler_type_t type) { return eBasicTypeSignedChar; case clang::BuiltinType::Char_U: return eBasicTypeUnsignedChar; + case clang::BuiltinType::Char8: + return eBasicTypeChar8; case clang::BuiltinType::Char16: return eBasicTypeChar16; case clang::BuiltinType::Char32: diff --git a/lldb/unittests/Symbol/TestTypeSystemClang.cpp b/lldb/unittests/Symbol/TestTypeSystemClang.cpp index 5b1154d2cd8b..e78a084f59f0 100644 --- a/lldb/unittests/Symbol/TestTypeSystemClang.cpp +++ b/lldb/unittests/Symbol/TestTypeSystemClang.cpp @@ -53,6 +53,8 @@ TEST_F(TestTypeSystemClang, TestGetBasicTypeFromEnum) { context.hasSameType(GetBasicQualType(eBasicTypeBool), context.BoolTy)); EXPECT_TRUE( context.hasSameType(GetBasicQualType(eBasicTypeChar), context.CharTy)); + EXPECT_TRUE(context.hasSameType(GetBasicQualType(eBasicTypeChar8), + context.Char8Ty)); EXPECT_TRUE(context.hasSameType(GetBasicQualType(eBasicTypeChar16), context.Char16Ty)); EXPECT_TRUE(context.hasSameType(GetBasicQualType(eBasicTypeChar32), From 71793f58ef811180a50a4b19e456f1227acae88c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Tue, 5 Oct 2021 17:21:50 +0100 Subject: [PATCH 257/293] [Demangle] Add support for D simple basic and compound types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch implements simple demangling of basic types such as primitives, arrays, `__vector`, associative arrays, pointers and compound types such as structs and classes. Differential Revision: https://reviews.llvm.org/D111416 Signed-off-by: Luís Ferreira --- llvm/lib/Demangle/DLangDemangle.cpp | 269 +++++++++++++++++- llvm/unittests/Demangle/DLangDemangleTest.cpp | 6 +- 2 files changed, 272 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp index 0cefbd63a7ae..d135d43b7ec7 100644 --- a/llvm/lib/Demangle/DLangDemangle.cpp +++ b/llvm/lib/Demangle/DLangDemangle.cpp @@ -115,6 +115,17 @@ struct Demangler { /// \see https://dlang.org/spec/abi.html#QualifiedName . const char *parseQualified(OutputBuffer *Demangled, const char *Mangled); + /// Extract and demangle a type from a given mangled symbol append it to + /// the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#Type . + const char *parseType(OutputBuffer *Demangled, const char *Mangled); + /// The string we are demangling. const char *Str; }; @@ -174,8 +185,13 @@ const char *Demangler::parseMangle(OutputBuffer *Demangled, if (*Mangled == 'Z') ++Mangled; else { - // TODO: Implement symbols with types. - return nullptr; + // Discard the declaration or return type. + OutputBuffer Type; + if (!initializeOutputBuffer(nullptr, nullptr, Type, 32)) + return nullptr; + + Mangled = parseType(&Type, Mangled); + std::free(Type.getBuffer()); } } @@ -262,6 +278,255 @@ const char *Demangler::parseIdentifier(OutputBuffer *Demangled, return parseLName(Demangled, Mangled, Len); } +const char *Demangler::parseType(OutputBuffer *Demangled, const char *Mangled) { + if (Mangled == nullptr || *Mangled == '\0') + return nullptr; + + switch (*Mangled) { + case 'O': // shared(T) + ++Mangled; + *Demangled << "shared("; + Mangled = parseType(Demangled, Mangled); + *Demangled << ')'; + return Mangled; + + case 'x': // const(T) + ++Mangled; + *Demangled << "const("; + Mangled = parseType(Demangled, Mangled); + *Demangled << ')'; + return Mangled; + + case 'y': // immutable(T) + ++Mangled; + *Demangled << "immutable("; + Mangled = parseType(Demangled, Mangled); + *Demangled << ')'; + return Mangled; + + case 'N': + ++Mangled; + + switch (*Mangled) { + case 'g': // wild(T) + ++Mangled; + *Demangled << "inout("; + Mangled = parseType(Demangled, Mangled); + *Demangled << ')'; + return Mangled; + + case 'h': // vector(T) + ++Mangled; + *Demangled << "__vector("; + Mangled = parseType(Demangled, Mangled); + *Demangled << ')'; + return Mangled; + + case 'n': // typeof(*null) + ++Mangled; + *Demangled << "typeof(*null)"; + return Mangled; + } + + // invalid. + return nullptr; + + case 'A': // dynamic array (T[]) + ++Mangled; + Mangled = parseType(Demangled, Mangled); + *Demangled << "[]"; + return Mangled; + + case 'G': // static array (T[N]) + { + const char *NumPtr; + size_t Num = 0; + ++Mangled; + + NumPtr = Mangled; + while (std::isdigit(*Mangled)) { + ++Num; + ++Mangled; + } + Mangled = parseType(Demangled, Mangled); + *Demangled << '['; + *Demangled << StringView(NumPtr, Num); + *Demangled << ']'; + return Mangled; + } + + case 'H': // associative array (T[T]) + { + OutputBuffer Type; + if (!initializeOutputBuffer(nullptr, nullptr, Type, 32)) + return nullptr; + + size_t Sztype; + ++Mangled; + + Mangled = parseType(&Type, Mangled); + Sztype = Type.getCurrentPosition(); + + Mangled = parseType(Demangled, Mangled); + *Demangled << '['; + *Demangled << StringView(Type.getBuffer(), Sztype); + *Demangled << ']'; + + std::free(Type.getBuffer()); + return Mangled; + } + + case 'P': // pointer (T*) + ++Mangled; + if (!isCallConvention(Mangled)) { + Mangled = parseType(Demangled, Mangled); + *Demangled << '*'; + return Mangled; + } + + // TODO: Parse function types. + + [[clang::fallthrough]]; + case 'C': // class T + case 'S': // struct T + case 'E': // enum T + case 'T': // typedef T + ++Mangled; + // TODO: Handle type modifiers and type functions in qualifiers. + return parseQualified(Demangled, Mangled); + + // TODO: Parse delegate types. + // TODO: Parse tuple types. + + // Basic types. + case 'n': + ++Mangled; + *Demangled << "typeof(null)"; + return Mangled; + + case 'v': + ++Mangled; + *Demangled << "void"; + return Mangled; + + case 'g': + ++Mangled; + *Demangled << "byte"; + return Mangled; + case 'h': + ++Mangled; + *Demangled << "ubyte"; + return Mangled; + + case 's': + ++Mangled; + *Demangled << "short"; + return Mangled; + case 't': + ++Mangled; + *Demangled << "ushort"; + return Mangled; + + case 'i': + ++Mangled; + *Demangled << "int"; + return Mangled; + case 'k': + ++Mangled; + *Demangled << "uint"; + return Mangled; + + case 'l': + ++Mangled; + *Demangled << "long"; + return Mangled; + case 'm': + ++Mangled; + *Demangled << "ulong"; + return Mangled; + + case 'f': + ++Mangled; + *Demangled << "float"; + return Mangled; + case 'd': + ++Mangled; + *Demangled << "double"; + return Mangled; + case 'e': + ++Mangled; + *Demangled << "real"; + return Mangled; + + // Imaginary types. + case 'o': + ++Mangled; + *Demangled << "ifloat"; + return Mangled; + case 'p': + ++Mangled; + *Demangled << "idouble"; + return Mangled; + case 'j': + ++Mangled; + *Demangled << "ireal"; + return Mangled; + + // Complex types. + case 'q': + ++Mangled; + *Demangled << "cfloat"; + return Mangled; + case 'r': + ++Mangled; + *Demangled << "cdouble"; + return Mangled; + case 'c': + ++Mangled; + *Demangled << "creal"; + return Mangled; + + // Other types. + case 'b': + ++Mangled; + *Demangled << "bool"; + return Mangled; + + case 'a': + ++Mangled; + *Demangled << "char"; + return Mangled; + case 'u': + ++Mangled; + *Demangled << "wchar"; + return Mangled; + case 'w': + ++Mangled; + *Demangled << "dchar"; + return Mangled; + + case 'z': + ++Mangled; + + switch (*Mangled) { + case 'i': + ++Mangled; + *Demangled << "cent"; + return Mangled; + case 'k': + ++Mangled; + *Demangled << "ucent"; + return Mangled; + } + return nullptr; + + // TODO: Parse back referenced types. + + default: // unhandled. + return nullptr; + } +} + const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled, unsigned long Len) { switch (Len) { diff --git a/llvm/unittests/Demangle/DLangDemangleTest.cpp b/llvm/unittests/Demangle/DLangDemangleTest.cpp index 3de47ab1c4e5..a0184178504c 100644 --- a/llvm/unittests/Demangle/DLangDemangleTest.cpp +++ b/llvm/unittests/Demangle/DLangDemangleTest.cpp @@ -47,4 +47,8 @@ INSTANTIATE_TEST_SUITE_P( std::make_pair("_D8demangle4test12__ModuleInfoZ", "ModuleInfo for demangle.test"), std::make_pair("_D8demangle4__S14testZ", "demangle.test"), - std::make_pair("_D8demangle4__Sd4testZ", "demangle.__Sd.test"))); + std::make_pair("_D8demangle4__Sd4testZ", "demangle.__Sd.test"), + std::make_pair("_D8demangle1ii", "demangle.i"), + std::make_pair("_D8demangle2siOi", "demangle.si"), + std::make_pair("_D8demangle1re", "demangle.r"), + std::make_pair("_D8demangle1iinvalidtypeseq", nullptr))); From 69d61cb470d2e7c4bdc17608f985d695c21b1c33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Wed, 6 Oct 2021 01:53:26 +0000 Subject: [PATCH 258/293] [Demangle] Add support for D symbols back referencing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds support for identifier back referencing allowing compressed mangled names by avoiding repetitiveness. Differential Revision: https://reviews.llvm.org/D111417 Signed-off-by: Luís Ferreira --- llvm/lib/Demangle/DLangDemangle.cpp | 142 +++++++++++++++++- llvm/unittests/Demangle/DLangDemangleTest.cpp | 4 +- 2 files changed, 142 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp index d135d43b7ec7..07cc1e92a63b 100644 --- a/llvm/lib/Demangle/DLangDemangle.cpp +++ b/llvm/lib/Demangle/DLangDemangle.cpp @@ -70,6 +70,41 @@ struct Demangler { /// \see https://dlang.org/spec/abi.html#Number . const char *decodeNumber(const char *Mangled, unsigned long *Ret); + /// Extract the back reference position from a given string. + /// + /// \param Mangled string to extract the back reference position. + /// \param Ret assigned result value. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \note a result <= 0 is a failure. + /// + /// \see https://dlang.org/spec/abi.html#back_ref . + /// \see https://dlang.org/spec/abi.html#NumberBackRef . + const char *decodeBackrefPos(const char *Mangled, long *Ret); + + /// Extract the symbol pointed by the back reference form a given string. + /// + /// \param Mangled string to extract the back reference position. + /// \param Ret assigned result value. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#back_ref . + const char *decodeBackref(const char *Mangled, const char **Ret); + + /// Extract and demangle backreferenced symbol from a given mangled symbol + /// and append it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#back_ref . + /// \see https://dlang.org/spec/abi.html#IdentifierBackRef . + const char *parseSymbolBackref(OutputBuffer *Demangled, const char *Mangled); + /// Check whether it is the beginning of a symbol name. /// /// \param Mangled string to extract the symbol name. @@ -157,12 +192,110 @@ const char *Demangler::decodeNumber(const char *Mangled, unsigned long *Ret) { return Mangled; } +const char *Demangler::decodeBackrefPos(const char *Mangled, long *Ret) { + // Return nullptr if trying to extract something that isn't a digit + if (Mangled == nullptr || !std::isalpha(*Mangled)) + return nullptr; + + // Any identifier or non-basic type that has been emitted to the mangled + // symbol before will not be emitted again, but is referenced by a special + // sequence encoding the relative position of the original occurrence in the + // mangled symbol name. + // Numbers in back references are encoded with base 26 by upper case letters + // A-Z for higher digits but lower case letters a-z for the last digit. + // NumberBackRef: + // [a-z] + // [A-Z] NumberBackRef + // ^ + unsigned long Val = 0; + + while (std::isalpha(*Mangled)) { + // Check for overflow + if (Val > (std::numeric_limits::max() - 25) / 26) + break; + + Val *= 26; + + if (Mangled[0] >= 'a' && Mangled[0] <= 'z') { + Val += Mangled[0] - 'a'; + if ((long)Val <= 0) + break; + *Ret = Val; + return Mangled + 1; + } + + Val += Mangled[0] - 'A'; + ++Mangled; + } + + return nullptr; +} + +const char *Demangler::decodeBackref(const char *Mangled, const char **Ret) { + *Ret = nullptr; + + if (Mangled == nullptr || *Mangled != 'Q') + return nullptr; + + // Position of 'Q' + const char *Qpos = Mangled; + long RefPos; + ++Mangled; + + Mangled = decodeBackrefPos(Mangled, &RefPos); + if (Mangled == nullptr) + return nullptr; + + if (RefPos > Qpos - Str) + return nullptr; + + // Set the position of the back reference. + *Ret = Qpos - RefPos; + + return Mangled; +} + +const char *Demangler::parseSymbolBackref(OutputBuffer *Demangled, + const char *Mangled) { + // An identifier back reference always points to a digit 0 to 9. + // IdentifierBackRef: + // Q NumberBackRef + // ^ + const char *Backref; + unsigned long Len; + + // Get position of the back reference + Mangled = decodeBackref(Mangled, &Backref); + + // Must point to a simple identifier + Backref = decodeNumber(Backref, &Len); + if (Backref == nullptr || strlen(Backref) < Len) + return nullptr; + + Backref = parseLName(Demangled, Backref, Len); + if (Backref == nullptr) + return nullptr; + + return Mangled; +} + bool Demangler::isSymbolName(const char *Mangled) { + long Ret; + const char *Qref = Mangled; + if (std::isdigit(*Mangled)) return true; - // TODO: Handle symbol back references and template instances. - return false; + // TODO: Handle template instances. + + if (*Mangled != 'Q') + return false; + + Mangled = decodeBackrefPos(Mangled + 1, &Ret); + if (Mangled == nullptr || Ret > Qref - Str) + return false; + + return std::isdigit(Qref[-Ret]); } const char *Demangler::parseMangle(OutputBuffer *Demangled, @@ -244,7 +377,10 @@ const char *Demangler::parseIdentifier(OutputBuffer *Demangled, if (Mangled == nullptr || *Mangled == '\0') return nullptr; - // TODO: Parse back references and lengthless template instances. + if (*Mangled == 'Q') + return parseSymbolBackref(Demangled, Mangled); + + // TODO: Parse lengthless template instances. const char *Endptr = decodeNumber(Mangled, &Len); diff --git a/llvm/unittests/Demangle/DLangDemangleTest.cpp b/llvm/unittests/Demangle/DLangDemangleTest.cpp index a0184178504c..04f6a55678ba 100644 --- a/llvm/unittests/Demangle/DLangDemangleTest.cpp +++ b/llvm/unittests/Demangle/DLangDemangleTest.cpp @@ -51,4 +51,6 @@ INSTANTIATE_TEST_SUITE_P( std::make_pair("_D8demangle1ii", "demangle.i"), std::make_pair("_D8demangle2siOi", "demangle.si"), std::make_pair("_D8demangle1re", "demangle.r"), - std::make_pair("_D8demangle1iinvalidtypeseq", nullptr))); + std::make_pair("_D8demangle1iinvalidtypeseq", nullptr), + std::make_pair("_D8demangle3ABCQeQg1ai", "demangle.ABC.ABC.ABC.a"), + std::make_pair("_D8demangle3ABCQeQaaaa1ai", nullptr))); From 64e28a68fd9099c3aa5c7d635a5c1969b0c75631 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Wed, 6 Oct 2021 15:35:08 +0100 Subject: [PATCH 259/293] [Demangle] Add support for D types back referencing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds support for type back referencing, allowing demangling of compressed mangled symbols with repetitive types. Differential Revision: https://reviews.llvm.org/D111419 Signed-off-by: Luís Ferreira --- llvm/lib/Demangle/DLangDemangle.cpp | 51 ++++++++++++++++++- llvm/unittests/Demangle/DLangDemangleTest.cpp | 4 +- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp index 07cc1e92a63b..696d175c82dc 100644 --- a/llvm/lib/Demangle/DLangDemangle.cpp +++ b/llvm/lib/Demangle/DLangDemangle.cpp @@ -105,6 +105,18 @@ struct Demangler { /// \see https://dlang.org/spec/abi.html#IdentifierBackRef . const char *parseSymbolBackref(OutputBuffer *Demangled, const char *Mangled); + /// Extract and demangle backreferenced type from a given mangled symbol + /// and append it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#back_ref . + /// \see https://dlang.org/spec/abi.html#TypeBackRef . + const char *parseTypeBackref(OutputBuffer *Demangled, const char *Mangled); + /// Check whether it is the beginning of a symbol name. /// /// \param Mangled string to extract the symbol name. @@ -163,6 +175,8 @@ struct Demangler { /// The string we are demangling. const char *Str; + /// The index of the last back reference. + int LastBackref; }; } // namespace @@ -279,6 +293,36 @@ const char *Demangler::parseSymbolBackref(OutputBuffer *Demangled, return Mangled; } +const char *Demangler::parseTypeBackref(OutputBuffer *Demangled, + const char *Mangled) { + // A type back reference always points to a letter. + // TypeBackRef: + // Q NumberBackRef + // ^ + const char *Backref; + + // If we appear to be moving backwards through the mangle string, then + // bail as this may be a recursive back reference. + if (Mangled - Str >= LastBackref) + return nullptr; + + int SaveRefPos = LastBackref; + LastBackref = Mangled - Str; + + // Get position of the back reference. + Mangled = decodeBackref(Mangled, &Backref); + + // TODO: Add support for function type back references. + Backref = parseType(Demangled, Backref); + + LastBackref = SaveRefPos; + + if (Backref == nullptr) + return nullptr; + + return Mangled; +} + bool Demangler::isSymbolName(const char *Mangled) { long Ret; const char *Qref = Mangled; @@ -656,7 +700,9 @@ const char *Demangler::parseType(OutputBuffer *Demangled, const char *Mangled) { } return nullptr; - // TODO: Parse back referenced types. + // Back referenced type. + case 'Q': + return parseTypeBackref(Demangled, Mangled); default: // unhandled. return nullptr; @@ -720,7 +766,8 @@ const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled, return Mangled; } -Demangler::Demangler(const char *Mangled) : Str(Mangled) {} +Demangler::Demangler(const char *Mangled) + : Str(Mangled), LastBackref(strlen(Mangled)) {} const char *Demangler::parseMangle(OutputBuffer *Demangled) { return parseMangle(Demangled, this->Str); diff --git a/llvm/unittests/Demangle/DLangDemangleTest.cpp b/llvm/unittests/Demangle/DLangDemangleTest.cpp index 04f6a55678ba..04caafb6b2a2 100644 --- a/llvm/unittests/Demangle/DLangDemangleTest.cpp +++ b/llvm/unittests/Demangle/DLangDemangleTest.cpp @@ -53,4 +53,6 @@ INSTANTIATE_TEST_SUITE_P( std::make_pair("_D8demangle1re", "demangle.r"), std::make_pair("_D8demangle1iinvalidtypeseq", nullptr), std::make_pair("_D8demangle3ABCQeQg1ai", "demangle.ABC.ABC.ABC.a"), - std::make_pair("_D8demangle3ABCQeQaaaa1ai", nullptr))); + std::make_pair("_D8demangle3ABCQeQaaaa1ai", nullptr), + std::make_pair("_D8demangle4ABCiQfQh1aQh", "demangle.ABCi.ABCi.ABCi.a"), + std::make_pair("_D8demangle4ABCiQfQh1aQaaa", nullptr))); From 1409c8024eef68ccac6a94bae93c9ea61f9d7812 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Wed, 6 Oct 2021 17:40:02 +0100 Subject: [PATCH 260/293] [Demangle] Add support for D type functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Differential Revision: https://reviews.llvm.org/D111420 Signed-off-by: Luís Ferreira --- llvm/lib/Demangle/DLangDemangle.cpp | 318 +++++++++++++++++- llvm/unittests/Demangle/DLangDemangleTest.cpp | 3 +- 2 files changed, 318 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp index 696d175c82dc..2eae12f8ba34 100644 --- a/llvm/lib/Demangle/DLangDemangle.cpp +++ b/llvm/lib/Demangle/DLangDemangle.cpp @@ -117,6 +117,18 @@ struct Demangler { /// \see https://dlang.org/spec/abi.html#TypeBackRef . const char *parseTypeBackref(OutputBuffer *Demangled, const char *Mangled); + /// Extract and demangle calling convention from a given mangled symbol and + /// append it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#CallConvention . + /// \see https://dlang.org/spec/abi.html#function_calling_conventions . + const char *parseCallConvention(OutputBuffer *Demangled, const char *Mangled); + /// Check whether it is the beginning of a symbol name. /// /// \param Mangled string to extract the symbol name. @@ -162,6 +174,58 @@ struct Demangler { /// \see https://dlang.org/spec/abi.html#QualifiedName . const char *parseQualified(OutputBuffer *Demangled, const char *Mangled); + /// Extract and demangle the D function attributes from a given mangled + /// symbol append it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#FuncAttr . + const char *parseAttributes(OutputBuffer *Demangled, const char *Mangled); + + /// Extract and demangle the function type from a given mangled symbol + /// without the return type and append it to the arguments, calling + /// convention and attribute output strings, respectively. + /// + /// \param Args output buffer to write the demangled arguments. + /// \param Call output buffer to write the demangled calling convention. + /// \param Attr output buffer to write the demangled attributes. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \note Any of the output strings can be nullptr to throw the information + /// away. + /// + /// \see https://dlang.org/spec/abi.html#TypeFunctionNoReturn . + const char *parseFunctionTypeNoreturn(OutputBuffer *Args, OutputBuffer *Call, + OutputBuffer *Attr, + const char *Mangled); + + /// Extract and demangle the function type from a given mangled symbol + /// append it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#TypeFunction . + const char *parseFunctionType(OutputBuffer *Demangled, const char *Mangled); + + /// Extract and demangle the function arguments list from a given mangled + /// symbol append it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#Parameters . + const char *parseFunctionArgs(OutputBuffer *Demangled, const char *Mangled); + /// Extract and demangle a type from a given mangled symbol append it to /// the output string. /// @@ -342,6 +406,184 @@ bool Demangler::isSymbolName(const char *Mangled) { return std::isdigit(Qref[-Ret]); } +const char *Demangler::parseCallConvention(OutputBuffer *Demangled, + const char *Mangled) { + if (Mangled == nullptr || *Mangled == '\0') + return nullptr; + + switch (*Mangled) { + case 'F': // D + ++Mangled; + break; + + case 'U': // C + ++Mangled; + *Demangled << "extern(C) "; + break; + + case 'W': // Windows + ++Mangled; + *Demangled << "extern(Windows) "; + break; + + case 'V': // Pascal + ++Mangled; + *Demangled << "extern(Pascal) "; + break; + + case 'R': // C++ + ++Mangled; + *Demangled << "extern(C++) "; + break; + + case 'Y': // Objective-C + ++Mangled; + *Demangled << "extern(Objective-C) "; + break; + + default: + return nullptr; + } + + return Mangled; +} + +const char *Demangler::parseAttributes(OutputBuffer *Demangled, + const char *Mangled) { + if (Mangled == nullptr || *Mangled == '\0') + return nullptr; + + while (*Mangled == 'N') { + ++Mangled; + switch (*Mangled) { + case 'a': // pure + ++Mangled; + *Demangled << "pure "; + continue; + + case 'b': // nothrow + ++Mangled; + *Demangled << "nothrow "; + continue; + + case 'c': // ref + ++Mangled; + *Demangled << "ref "; + continue; + + case 'd': // @property + ++Mangled; + *Demangled << "@property "; + continue; + + case 'e': // @trusted + ++Mangled; + *Demangled << "@trusted "; + continue; + + case 'f': // @safe + ++Mangled; + *Demangled << "@safe "; + continue; + + case 'g': + case 'h': + case 'k': + case 'n': + // inout parameter is represented as 'Ng'. + // vector parameter is represented as 'Nh'. + // return parameter is represented as 'Nk'. + // typeof(*null) parameter is represented as 'Nn'. + // If we see this, then we know we're really in the + // parameter list. Rewind and break. + Mangled--; + break; + + case 'i': // @nogc + ++Mangled; + *Demangled << "@nogc "; + continue; + + case 'j': // return + ++Mangled; + *Demangled << "return "; + continue; + + case 'l': // scope + ++Mangled; + *Demangled << "scope "; + continue; + + case 'm': // @live + ++Mangled; + *Demangled << "@live "; + continue; + + default: // unknown attribute + return nullptr; + } + break; + } + + return Mangled; +} + +const char *Demangler::parseFunctionTypeNoreturn(OutputBuffer *Args, + OutputBuffer *Call, + OutputBuffer *Attr, + const char *Mangled) { + OutputBuffer Dump; + if (!initializeOutputBuffer(nullptr, nullptr, Dump, 32)) + return nullptr; + + // Skip over calling convention and attributes + Mangled = parseCallConvention(Call ? Call : &Dump, Mangled); + Mangled = parseAttributes(Attr ? Attr : &Dump, Mangled); + + if (Args) + *Args << '('; + + Mangled = parseFunctionArgs(Args ? Args : &Dump, Mangled); + if (Args) + *Args << ')'; + + std::free(Dump.getBuffer()); + return Mangled; +} + +const char *Demangler::parseFunctionType(OutputBuffer *Demangled, + const char *Mangled) { + OutputBuffer Attr, Args, Type; + + if (Mangled == nullptr || *Mangled == '\0') + return nullptr; + + // The order of the mangled string is: + // CallConvention FuncAttrs Arguments ArgClose Type + // The demangled string is re-ordered as: + // CallConvention Type Arguments FuncAttrs + if (!initializeOutputBuffer(nullptr, nullptr, Attr, 32) || + !initializeOutputBuffer(nullptr, nullptr, Args, 32) || + !initializeOutputBuffer(nullptr, nullptr, Type, 32)) + return nullptr; + + Mangled = parseFunctionTypeNoreturn(&Args, Demangled, &Attr, Mangled); + + // Function return type + Mangled = parseType(&Type, Mangled); + + // Append to decl in order + *Demangled << StringView(Type.getBuffer(), Type.getCurrentPosition()); + *Demangled << StringView(Args.getBuffer(), Args.getCurrentPosition()); + *Demangled << ' '; + *Demangled << StringView(Attr.getBuffer(), Attr.getCurrentPosition()); + + std::free(Attr.getBuffer()); + std::free(Args.getBuffer()); + std::free(Type.getBuffer()); + return Mangled; +} + const char *Demangler::parseMangle(OutputBuffer *Demangled, const char *Mangled) { // A D mangled symbol is comprised of both scope and type information. @@ -458,6 +700,69 @@ const char *Demangler::parseIdentifier(OutputBuffer *Demangled, return parseLName(Demangled, Mangled, Len); } +const char *Demangler::parseFunctionArgs(OutputBuffer *Demangled, + const char *Mangled) { + size_t N = 0; + + while (Mangled && *Mangled != '\0') { + switch (*Mangled) { + case 'X': // (variadic T t...) style + ++Mangled; + *Demangled << "..."; + return Mangled; + case 'Y': // (variadic T t, ...) style + ++Mangled; + *Demangled << ", ..."; + return Mangled; + case 'Z': // Normal function + ++Mangled; + return Mangled; + } + + if (N++) + *Demangled << ", "; + + if (*Mangled == 'M') // scope(T) + { + ++Mangled; + *Demangled << "scope "; + } + + if (Mangled[0] == 'N' && Mangled[1] == 'k') // return(T) + { + Mangled += 2; + *Demangled << "return "; + } + + switch (*Mangled) { + case 'I': // in(T) + ++Mangled; + *Demangled << "in "; + if (*Mangled == 'K') // in ref(T) + { + ++Mangled; + *Demangled << "ref "; + } + break; + case 'J': // out(T) + ++Mangled; + *Demangled << "out "; + break; + case 'K': // ref(T) + ++Mangled; + *Demangled << "ref "; + break; + case 'L': // lazy(T) + ++Mangled; + *Demangled << "lazy "; + break; + } + Mangled = parseType(Demangled, Mangled); + } + + return Mangled; +} + const char *Demangler::parseType(OutputBuffer *Demangled, const char *Mangled) { if (Mangled == nullptr || *Mangled == '\0') return nullptr; @@ -564,9 +869,18 @@ const char *Demangler::parseType(OutputBuffer *Demangled, const char *Mangled) { return Mangled; } - // TODO: Parse function types. - [[clang::fallthrough]]; + case 'F': // function T (D) + case 'U': // function T (C) + case 'W': // function T (Windows) + case 'V': // function T (Pascal) + case 'R': // function T (C++) + case 'Y': // function T (Objective-C) + // Function pointer types don't include the trailing asterisk. + Mangled = parseFunctionType(Demangled, Mangled); + *Demangled << "function"; + return Mangled; + case 'C': // class T case 'S': // struct T case 'E': // enum T diff --git a/llvm/unittests/Demangle/DLangDemangleTest.cpp b/llvm/unittests/Demangle/DLangDemangleTest.cpp index 04caafb6b2a2..eb646dc1d05a 100644 --- a/llvm/unittests/Demangle/DLangDemangleTest.cpp +++ b/llvm/unittests/Demangle/DLangDemangleTest.cpp @@ -55,4 +55,5 @@ INSTANTIATE_TEST_SUITE_P( std::make_pair("_D8demangle3ABCQeQg1ai", "demangle.ABC.ABC.ABC.a"), std::make_pair("_D8demangle3ABCQeQaaaa1ai", nullptr), std::make_pair("_D8demangle4ABCiQfQh1aQh", "demangle.ABCi.ABCi.ABCi.a"), - std::make_pair("_D8demangle4ABCiQfQh1aQaaa", nullptr))); + std::make_pair("_D8demangle4ABCiQfQh1aQaaa", nullptr), + std::make_pair("_D8demangle4testPFLAiYi", "demangle.test"))); From 2b86e0a48726456cdaa2fa107ab8c0a96819d47a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Wed, 6 Oct 2021 21:29:10 +0100 Subject: [PATCH 261/293] [Demangle] Add support for D functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Differential Revision: https://reviews.llvm.org/D111421 Signed-off-by: Luís Ferreira --- llvm/lib/Demangle/DLangDemangle.cpp | 135 +++++++++++++++++- llvm/unittests/Demangle/DLangDemangleTest.cpp | 124 +++++++++++++++- 2 files changed, 253 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp index 2eae12f8ba34..1d4c94b89b0a 100644 --- a/llvm/lib/Demangle/DLangDemangle.cpp +++ b/llvm/lib/Demangle/DLangDemangle.cpp @@ -129,6 +129,16 @@ struct Demangler { /// \see https://dlang.org/spec/abi.html#function_calling_conventions . const char *parseCallConvention(OutputBuffer *Demangled, const char *Mangled); + /// Check whether it is a function calling convention. + /// + /// \param Mangled string to extract the function calling convention. + /// + /// \return True on success, false otherwise. + /// + /// \see https://dlang.org/spec/abi.html#CallConvention . + /// \see https://dlang.org/spec/abi.html#function_calling_conventions . + bool isCallConvention(const char *Mangled); + /// Check whether it is the beginning of a symbol name. /// /// \param Mangled string to extract the symbol name. @@ -168,11 +178,14 @@ struct Demangler { /// /// \param Demangled Output buffer to write the demangled name. /// \param Mangled Mangled symbol to be demangled. + /// \param SuffixModifiers True if we are printing the modifiers after the + /// symbol, false otherwise. /// /// \return The remaining string on success or nullptr on failure. /// /// \see https://dlang.org/spec/abi.html#QualifiedName . - const char *parseQualified(OutputBuffer *Demangled, const char *Mangled); + const char *parseQualified(OutputBuffer *Demangled, const char *Mangled, + bool SuffixModifiers); /// Extract and demangle the D function attributes from a given mangled /// symbol append it to the output string. @@ -237,6 +250,17 @@ struct Demangler { /// \see https://dlang.org/spec/abi.html#Type . const char *parseType(OutputBuffer *Demangled, const char *Mangled); + /// Extract and demangle the type modifiers from a given mangled symbol + /// append it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#TypeModifiers . + const char *parseTypeModifiers(OutputBuffer *Demangled, const char *Mangled); + /// The string we are demangling. const char *Str; /// The index of the last back reference. @@ -387,6 +411,21 @@ const char *Demangler::parseTypeBackref(OutputBuffer *Demangled, return Mangled; } +bool Demangler::isCallConvention(const char *Mangled) { + switch (*Mangled) { + case 'F': + case 'U': + case 'V': + case 'W': + case 'R': + case 'Y': + return true; + + default: + return false; + } +} + bool Demangler::isSymbolName(const char *Mangled) { long Ret; const char *Qref = Mangled; @@ -597,7 +636,7 @@ const char *Demangler::parseMangle(OutputBuffer *Demangled, // a function or the type of a variable. Mangled += 2; - Mangled = parseQualified(Demangled, Mangled); + Mangled = parseQualified(Demangled, Mangled, true); if (Mangled != nullptr) { // Artificial symbols end with 'Z' and have no type. @@ -618,7 +657,8 @@ const char *Demangler::parseMangle(OutputBuffer *Demangled, } const char *Demangler::parseQualified(OutputBuffer *Demangled, - const char *Mangled) { + const char *Mangled, + bool SuffixModifiers) { // Qualified names are identifiers separated by their encoded length. // Nested functions also encode their argument types without specifying // what they return. @@ -651,11 +691,76 @@ const char *Demangler::parseQualified(OutputBuffer *Demangled, Mangled = parseIdentifier(Demangled, Mangled); + if (Mangled && (*Mangled == 'M' || isCallConvention(Mangled))) { + const char *Start = Mangled; + int Saved = Demangled->getCurrentPosition(); + + // Save the type modifiers for appending at the end if needed. + OutputBuffer Mods; + if (!initializeOutputBuffer(nullptr, nullptr, Mods, 32)) + return nullptr; + + // Skip over 'this' parameter and type modifiers. + if (*Mangled == 'M') { + ++Mangled; + Mangled = parseTypeModifiers(&Mods, Mangled); + Demangled->setCurrentPosition(Saved); + } + + Mangled = parseFunctionTypeNoreturn(Demangled, nullptr, nullptr, Mangled); + if (SuffixModifiers) + *Demangled << StringView(Mods.getBuffer(), Mods.getCurrentPosition()); + + if (Mangled == nullptr || *Mangled == '\0') { + // Did not match the rule we were looking for. + Mangled = Start; + Demangled->setCurrentPosition(Saved); + } + + std::free(Mods.getBuffer()); + } } while (Mangled && isSymbolName(Mangled)); return Mangled; } +const char *Demangler::parseTypeModifiers(OutputBuffer *Demangled, + const char *Mangled) { + if (Mangled == nullptr || *Mangled == '\0') + return nullptr; + + switch (*Mangled) { + case 'x': // const + ++Mangled; + *Demangled << " const"; + return Mangled; + + case 'y': // immutable + ++Mangled; + *Demangled << " immutable"; + return Mangled; + + case 'O': // shared + ++Mangled; + *Demangled << " shared"; + return parseTypeModifiers(Demangled, Mangled); + + case 'N': + ++Mangled; + if (*Mangled == 'g') // wild + { + ++Mangled; + *Demangled << " inout"; + return parseTypeModifiers(Demangled, Mangled); + } + + return nullptr; + + default: + return Mangled; + } +} + const char *Demangler::parseIdentifier(OutputBuffer *Demangled, const char *Mangled) { unsigned long Len; @@ -886,8 +991,7 @@ const char *Demangler::parseType(OutputBuffer *Demangled, const char *Mangled) { case 'E': // enum T case 'T': // typedef T ++Mangled; - // TODO: Handle type modifiers and type functions in qualifiers. - return parseQualified(Demangled, Mangled); + return parseQualified(Demangled, Mangled, false); // TODO: Parse delegate types. // TODO: Parse tuple types. @@ -1027,6 +1131,18 @@ const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled, unsigned long Len) { switch (Len) { case 6: + if (strncmp(Mangled, "__ctor", Len) == 0) { + // Constructor symbol for a class/struct. + *Demangled << "this"; + Mangled += Len; + return Mangled; + } + if (strncmp(Mangled, "__dtor", Len) == 0) { + // Destructor symbol for a class/struct. + *Demangled << "~this"; + Mangled += Len; + return Mangled; + } if (strncmp(Mangled, "__initZ", Len + 1) == 0) { // The static initializer for a given symbol. Demangled->prepend("initializer for "); @@ -1053,6 +1169,15 @@ const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled, } break; + case 10: + if (strncmp(Mangled, "__postblitMFZ", Len + 3) == 0) { + // Postblit symbol for a struct. + *Demangled << "this(this)"; + Mangled += Len + 3; + return Mangled; + } + break; + case 11: if (strncmp(Mangled, "__InterfaceZ", Len + 1) == 0) { // The interface symbol for a given class. diff --git a/llvm/unittests/Demangle/DLangDemangleTest.cpp b/llvm/unittests/Demangle/DLangDemangleTest.cpp index eb646dc1d05a..4ebd0ac7ccf1 100644 --- a/llvm/unittests/Demangle/DLangDemangleTest.cpp +++ b/llvm/unittests/Demangle/DLangDemangleTest.cpp @@ -46,6 +46,10 @@ INSTANTIATE_TEST_SUITE_P( "Interface for demangle.test"), std::make_pair("_D8demangle4test12__ModuleInfoZ", "ModuleInfo for demangle.test"), + std::make_pair("_D8demangle4test6__ctorMFZv", "demangle.test.this()"), + std::make_pair("_D8demangle4test6__dtorMFZv", "demangle.test.~this()"), + std::make_pair("_D8demangle4test10__postblitMFZv", + "demangle.test.this(this)"), std::make_pair("_D8demangle4__S14testZ", "demangle.test"), std::make_pair("_D8demangle4__Sd4testZ", "demangle.__Sd.test"), std::make_pair("_D8demangle1ii", "demangle.i"), @@ -56,4 +60,122 @@ INSTANTIATE_TEST_SUITE_P( std::make_pair("_D8demangle3ABCQeQaaaa1ai", nullptr), std::make_pair("_D8demangle4ABCiQfQh1aQh", "demangle.ABCi.ABCi.ABCi.a"), std::make_pair("_D8demangle4ABCiQfQh1aQaaa", nullptr), - std::make_pair("_D8demangle4testPFLAiYi", "demangle.test"))); + std::make_pair("_D8demangle4testPFLAiYi", "demangle.test"), + std::make_pair("_D8demangle4testFaZv", "demangle.test(char)"), + std::make_pair("_D8demangle4testFbZv", "demangle.test(bool)"), + std::make_pair("_D8demangle4testFcZv", "demangle.test(creal)"), + std::make_pair("_D8demangle4testFdZv", "demangle.test(double)"), + std::make_pair("_D8demangle4testFeZv", "demangle.test(real)"), + std::make_pair("_D8demangle4testFfZv", "demangle.test(float)"), + std::make_pair("_D8demangle4testFgZv", "demangle.test(byte)"), + std::make_pair("_D8demangle4testFhZv", "demangle.test(ubyte)"), + std::make_pair("_D8demangle4testFiZv", "demangle.test(int)"), + std::make_pair("_D8demangle4testFjZv", "demangle.test(ireal)"), + std::make_pair("_D8demangle4testFkZv", "demangle.test(uint)"), + std::make_pair("_D8demangle4testFlZv", "demangle.test(long)"), + std::make_pair("_D8demangle4testFmZv", "demangle.test(ulong)"), + std::make_pair("_D8demangle4testFZv", "demangle.test()"), + std::make_pair("_D8demangle4testMFZ2fnMFZv", "demangle.test().fn()"), + std::make_pair("_D8demangle4testFnZv", "demangle.test(typeof(null))"), + std::make_pair("_D8demangle4testFNnZv", "demangle.test(typeof(*null))"), + std::make_pair("_D8demangle4testFoZv", "demangle.test(ifloat)"), + std::make_pair("_D8demangle4testFpZv", "demangle.test(idouble)"), + std::make_pair("_D8demangle4testFqZv", "demangle.test(cfloat)"), + std::make_pair("_D8demangle4testFrZv", "demangle.test(cdouble)"), + std::make_pair("_D8demangle4testFsZv", "demangle.test(short)"), + std::make_pair("_D8demangle4testFtZv", "demangle.test(ushort)"), + std::make_pair("_D8demangle4testFuZv", "demangle.test(wchar)"), + std::make_pair("_D8demangle4testFvZv", "demangle.test(void)"), + std::make_pair("_D8demangle4testFwZv", "demangle.test(dchar)"), + std::make_pair("_D8demangle4testFziZv", "demangle.test(cent)"), + std::make_pair("_D8demangle4testFzkZv", "demangle.test(ucent)"), + std::make_pair("_D8demangle4testFOaZv", "demangle.test(shared(char))"), + std::make_pair("_D8demangle4testFxaZv", "demangle.test(const(char))"), + std::make_pair("_D8demangle4testFyaZv", + "demangle.test(immutable(char))"), + std::make_pair("_D8demangle4testFNgaZv", "demangle.test(inout(char))"), + std::make_pair("_D8demangle4testFOxaZv", + "demangle.test(shared(const(char)))"), + std::make_pair("_D8demangle4testFONgaZv", + "demangle.test(shared(inout(char)))"), + std::make_pair("_D8demangle4testFAaZv", "demangle.test(char[])"), + std::make_pair("_D8demangle4testFAAaZv", "demangle.test(char[][])"), + std::make_pair("_D8demangle4testFAAAaZv", "demangle.test(char[][][])"), + std::make_pair("_D8demangle4testFHaaZv", "demangle.test(char[char])"), + std::make_pair("_D8demangle4testFHHaaaZv", + "demangle.test(char[char[char]])"), + std::make_pair("_D8demangle4testFPaZv", "demangle.test(char*)"), + std::make_pair("_D8demangle4testFPPaZv", "demangle.test(char**)"), + std::make_pair("_D8demangle4testFPPPaZv", "demangle.test(char***)"), + std::make_pair("_D8demangle4testFG42aZv", "demangle.test(char[42])"), + std::make_pair("_D8demangle4testFG42G42aZv", + "demangle.test(char[42][42])"), + std::make_pair("_D8demangle4testFG42G42G42aZv", + "demangle.test(char[42][42][42])"), + std::make_pair("_D8demangle4testFG1234567890aZv", + "demangle.test(char[1234567890])"), + std::make_pair("_D8demangle4testFNhG8gZv", + "demangle.test(__vector(byte[8]))"), + std::make_pair("_D8demangle4testFNhG16gZv", + "demangle.test(__vector(byte[16]))"), + std::make_pair("_D8demangle4testFNhG32gZv", + "demangle.test(__vector(byte[32]))"), + std::make_pair("_D8demangle4testFNhG4sZv", + "demangle.test(__vector(short[4]))"), + std::make_pair("_D8demangle4testFNhG8sZv", + "demangle.test(__vector(short[8]))"), + std::make_pair("_D8demangle4testFNhG16sZv", + "demangle.test(__vector(short[16]))"), + std::make_pair("_D8demangle4testFNhG2iZv", + "demangle.test(__vector(int[2]))"), + std::make_pair("_D8demangle4testFNhG4iZv", + "demangle.test(__vector(int[4]))"), + std::make_pair("_D8demangle4testFNhG8iZv", + "demangle.test(__vector(int[8]))"), + std::make_pair("_D8demangle4testFNhG1lZv", + "demangle.test(__vector(long[1]))"), + std::make_pair("_D8demangle4testFNhG2lZv", + "demangle.test(__vector(long[2]))"), + std::make_pair("_D8demangle4testFNhG4lZv", + "demangle.test(__vector(long[4]))"), + std::make_pair("_D8demangle4testFNhG2fZv", + "demangle.test(__vector(float[2]))"), + std::make_pair("_D8demangle4testFNhG4fZv", + "demangle.test(__vector(float[4]))"), + std::make_pair("_D8demangle4testFNhG8fZv", + "demangle.test(__vector(float[8]))"), + std::make_pair("_D8demangle4testFNhG1dZv", + "demangle.test(__vector(double[1]))"), + std::make_pair("_D8demangle4testFNhG2dZv", + "demangle.test(__vector(double[2]))"), + std::make_pair("_D8demangle4testFNhG4dZv", + "demangle.test(__vector(double[4]))"), + std::make_pair("_D8demangle4testFC5classZv", "demangle.test(class)"), + std::make_pair("_D8demangle4testFC5class4testZv", + "demangle.test(class.test)"), + std::make_pair("_D8demangle4testFS6structZv", "demangle.test(struct)"), + std::make_pair("_D8demangle4testFS6struct4testZv", + "demangle.test(struct.test)"), + std::make_pair("_D8demangle4testFE4enumZv", "demangle.test(enum)"), + std::make_pair("_D8demangle4testFE4enum4testZv", + "demangle.test(enum.test)"), + std::make_pair("_D8demangle4testFT7typedefZv", + "demangle.test(typedef)"), + std::make_pair("_D8demangle4testFT7typedef4testZv", + "demangle.test(typedef.test)"), + std::make_pair("_D8demangle4testFIaZv", "demangle.test(in char)"), + std::make_pair("_D8demangle4testFIKaZv", "demangle.test(in ref char)"), + std::make_pair("_D8demangle4testFJaZv", "demangle.test(out char)"), + std::make_pair("_D8demangle4testFKaZv", "demangle.test(ref char)"), + std::make_pair("_D8demangle4testFLaZv", "demangle.test(lazy char)"), + std::make_pair("_D8demangle4testFMaZv", "demangle.test(scope char)"), + std::make_pair("_D8demangle4testFNjaZv", "demangle.test(char)"), + std::make_pair("_D8demangle4testFNkaZv", "demangle.test(return char)"), + std::make_pair("_D8demangle4testFNlaZv", "demangle.test(char)"), + std::make_pair("_D8demangle4testFaXv", "demangle.test(char...)"), + std::make_pair("_D8demangle4testFaYv", "demangle.test(char, ...)"), + std::make_pair("_D8demangle4testFaaYv", + "demangle.test(char, char, ...)"), + std::make_pair("_D8demangle4testFXv", "demangle.test(...)"), + std::make_pair("_D8demangle4testFYv", "demangle.test(, ...)"), + std::make_pair("_D8demangle4testFaaZv", "demangle.test(char, char)"))); From ad79c704e0b8e54fcbf2bc55588ffac3715a499f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Thu, 7 Oct 2021 18:21:37 +0100 Subject: [PATCH 262/293] [Demangle] Add support for D native tuple types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds support for native tuple types as described by D ABI specification. Differential Revision: https://reviews.llvm.org/D111422 Signed-off-by: Luís Ferreira --- llvm/lib/Demangle/DLangDemangle.cpp | 41 ++++++++++++++++++- llvm/unittests/Demangle/DLangDemangleTest.cpp | 10 ++++- 2 files changed, 48 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp index 1d4c94b89b0a..421fdefdf73b 100644 --- a/llvm/lib/Demangle/DLangDemangle.cpp +++ b/llvm/lib/Demangle/DLangDemangle.cpp @@ -261,6 +261,17 @@ struct Demangler { /// \see https://dlang.org/spec/abi.html#TypeModifiers . const char *parseTypeModifiers(OutputBuffer *Demangled, const char *Mangled); + /// Extract and demangle a tuple value from a given mangled symbol append it + /// to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#TypeTuple . + const char *parseTuple(OutputBuffer *Demangled, const char *Mangled); + /// The string we are demangling. const char *Str; /// The index of the last back reference. @@ -993,8 +1004,11 @@ const char *Demangler::parseType(OutputBuffer *Demangled, const char *Mangled) { ++Mangled; return parseQualified(Demangled, Mangled, false); - // TODO: Parse delegate types. - // TODO: Parse tuple types. + // TODO: Parse delegate types. + + case 'B': // tuple T + ++Mangled; + return parseTuple(Demangled, Mangled); // Basic types. case 'n': @@ -1127,6 +1141,29 @@ const char *Demangler::parseType(OutputBuffer *Demangled, const char *Mangled) { } } +const char *Demangler::parseTuple(OutputBuffer *Demangled, + const char *Mangled) { + unsigned long Elements; + + Mangled = decodeNumber(Mangled, &Elements); + if (Mangled == nullptr) + return nullptr; + + *Demangled << "tuple("; + + while (Elements--) { + Mangled = parseType(Demangled, Mangled); + if (Mangled == nullptr) + return nullptr; + + if (Elements != 0) + *Demangled << ", "; + } + + *Demangled << ')'; + return Mangled; +} + const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled, unsigned long Len) { switch (Len) { diff --git a/llvm/unittests/Demangle/DLangDemangleTest.cpp b/llvm/unittests/Demangle/DLangDemangleTest.cpp index 4ebd0ac7ccf1..b5341b3facd3 100644 --- a/llvm/unittests/Demangle/DLangDemangleTest.cpp +++ b/llvm/unittests/Demangle/DLangDemangleTest.cpp @@ -178,4 +178,12 @@ INSTANTIATE_TEST_SUITE_P( "demangle.test(char, char, ...)"), std::make_pair("_D8demangle4testFXv", "demangle.test(...)"), std::make_pair("_D8demangle4testFYv", "demangle.test(, ...)"), - std::make_pair("_D8demangle4testFaaZv", "demangle.test(char, char)"))); + std::make_pair("_D8demangle4testFaaZv", "demangle.test(char, char)"), + std::make_pair("_D8demangle4testFB0Zv", "demangle.test(tuple())"), + std::make_pair("_D8demangle4testFB1aZv", "demangle.test(tuple(char))"), + std::make_pair("_D8demangle4testFB2aaZv", + "demangle.test(tuple(char, char))"), + std::make_pair("_D8demangle4testFB3aaaZv", + "demangle.test(tuple(char, char, char))"), + std::make_pair("_D8demangle4testFB2OaaZv", + "demangle.test(tuple(shared(char), char))"))); From 50388f214286865cdeb3e48793f03113ac2b8469 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Thu, 7 Oct 2021 18:53:16 +0100 Subject: [PATCH 263/293] [Demangle] Add support for D delegate types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Differential Revision: https://reviews.llvm.org/D111423 Signed-off-by: Luís Ferreira --- llvm/lib/Demangle/DLangDemangle.cpp | 41 ++++- llvm/unittests/Demangle/DLangDemangleTest.cpp | 159 +++++++++++++++++- 2 files changed, 193 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp index 421fdefdf73b..76d162069e8c 100644 --- a/llvm/lib/Demangle/DLangDemangle.cpp +++ b/llvm/lib/Demangle/DLangDemangle.cpp @@ -110,12 +110,15 @@ struct Demangler { /// /// \param Demangled output buffer to write the demangled name. /// \param Mangled mangled symbol to be demangled. + /// \param IsFunction whether the backreferenced type is expected to be a + /// function. /// /// \return the remaining string on success or nullptr on failure. /// /// \see https://dlang.org/spec/abi.html#back_ref . /// \see https://dlang.org/spec/abi.html#TypeBackRef . - const char *parseTypeBackref(OutputBuffer *Demangled, const char *Mangled); + const char *parseTypeBackref(OutputBuffer *Demangled, const char *Mangled, + bool IsFunction); /// Extract and demangle calling convention from a given mangled symbol and /// append it to the output string. @@ -393,7 +396,7 @@ const char *Demangler::parseSymbolBackref(OutputBuffer *Demangled, } const char *Demangler::parseTypeBackref(OutputBuffer *Demangled, - const char *Mangled) { + const char *Mangled, bool IsFunction) { // A type back reference always points to a letter. // TypeBackRef: // Q NumberBackRef @@ -411,8 +414,11 @@ const char *Demangler::parseTypeBackref(OutputBuffer *Demangled, // Get position of the back reference. Mangled = decodeBackref(Mangled, &Backref); - // TODO: Add support for function type back references. - Backref = parseType(Demangled, Backref); + // Must point to a type. + if (IsFunction) + Backref = parseFunctionType(Demangled, Backref); + else + Backref = parseType(Demangled, Backref); LastBackref = SaveRefPos; @@ -1004,7 +1010,30 @@ const char *Demangler::parseType(OutputBuffer *Demangled, const char *Mangled) { ++Mangled; return parseQualified(Demangled, Mangled, false); - // TODO: Parse delegate types. + case 'D': // delegate T + { + OutputBuffer Mods; + if (!initializeOutputBuffer(nullptr, nullptr, Mods, 32)) + return nullptr; + + size_t Szmods; + ++Mangled; + + Mangled = parseTypeModifiers(&Mods, Mangled); + Szmods = Mods.getCurrentPosition(); + + // Back referenced function type. + if (Mangled && *Mangled == 'Q') + Mangled = parseTypeBackref(Demangled, Mangled, true); + else + Mangled = parseFunctionType(Demangled, Mangled); + + *Demangled << "delegate"; + *Demangled << StringView(Mods.getBuffer(), Szmods); + + std::free(Mods.getBuffer()); + return Mangled; + } case 'B': // tuple T ++Mangled; @@ -1134,7 +1163,7 @@ const char *Demangler::parseType(OutputBuffer *Demangled, const char *Mangled) { // Back referenced type. case 'Q': - return parseTypeBackref(Demangled, Mangled); + return parseTypeBackref(Demangled, Mangled, false); default: // unhandled. return nullptr; diff --git a/llvm/unittests/Demangle/DLangDemangleTest.cpp b/llvm/unittests/Demangle/DLangDemangleTest.cpp index b5341b3facd3..785e617b02bf 100644 --- a/llvm/unittests/Demangle/DLangDemangleTest.cpp +++ b/llvm/unittests/Demangle/DLangDemangleTest.cpp @@ -186,4 +186,161 @@ INSTANTIATE_TEST_SUITE_P( std::make_pair("_D8demangle4testFB3aaaZv", "demangle.test(tuple(char, char, char))"), std::make_pair("_D8demangle4testFB2OaaZv", - "demangle.test(tuple(shared(char), char))"))); + "demangle.test(tuple(shared(char), char))"), + std::make_pair("_D8demangle4testFB3aDFZaaZv", + "demangle.test(tuple(char, char() delegate, char))"), + std::make_pair("_D8demangle4testFDFZaZv", + "demangle.test(char() delegate)"), + std::make_pair("_D8demangle4testFDUZaZv", + "demangle.test(extern(C) char() delegate)"), + std::make_pair("_D8demangle4testFDWZaZv", + "demangle.test(extern(Windows) char() delegate)"), + std::make_pair("_D8demangle4testFDVZaZv", + "demangle.test(extern(Pascal) char() delegate)"), + std::make_pair("_D8demangle4testFDRZaZv", + "demangle.test(extern(C++) char() delegate)"), + std::make_pair("_D8demangle4testFDYZaZv", + "demangle.test(extern(Objective-C) char() delegate)"), + std::make_pair("_D8demangle4testFPFZaZv", + "demangle.test(char() function)"), + std::make_pair("_D8demangle4testFPUZaZv", + "demangle.test(extern(C) char() function)"), + std::make_pair("_D8demangle4testFPWZaZv", + "demangle.test(extern(Windows) char() function)"), + std::make_pair("_D8demangle4testFPVZaZv", + "demangle.test(extern(Pascal) char() function)"), + std::make_pair("_D8demangle4testFPRZaZv", + "demangle.test(extern(C++) char() function)"), + std::make_pair("_D8demangle4testFPYZaZv", + "demangle.test(extern(Objective-C) char() function)"), + std::make_pair("_D8demangle4testFDFNaZaZv", + "demangle.test(char() pure delegate)"), + std::make_pair("_D8demangle4testFDFNbZaZv", + "demangle.test(char() nothrow delegate)"), + std::make_pair("_D8demangle4testFDFNcZaZv", + "demangle.test(char() ref delegate)"), + std::make_pair("_D8demangle4testFDFNdZaZv", + "demangle.test(char() @property delegate)"), + std::make_pair("_D8demangle4testFDFNeZaZv", + "demangle.test(char() @trusted delegate)"), + std::make_pair("_D8demangle4testFDFNfZaZv", + "demangle.test(char() @safe delegate)"), + std::make_pair("_D8demangle4testFDFNiZaZv", + "demangle.test(char() @nogc delegate)"), + std::make_pair("_D8demangle4testFDFNmZaZv", + "demangle.test(char() @live delegate)"), + std::make_pair("_D8demangle4testFDFNaNbZaZv", + "demangle.test(char() pure nothrow delegate)"), + std::make_pair("_D8demangle4testFDFNbNaZaZv", + "demangle.test(char() nothrow pure delegate)"), + std::make_pair("_D8demangle4testFDFNdNfNaZaZv", + "demangle.test(char() @property @safe pure delegate)"), + std::make_pair("_D8demangle4testFNjDFZaZv", + "demangle.test(char() delegate)"), + std::make_pair("_D8demangle4testFNkDFZaZv", + "demangle.test(return char() delegate)"), + std::make_pair("_D8demangle4testFDFNjZaZv", + "demangle.test(char() return delegate)"), + std::make_pair("_D8demangle4testFNjNkDFNjZaZv", + "demangle.test(return char() return delegate)"), + std::make_pair("_D8demangle4testFNlDFZaZv", + "demangle.test(char() delegate)"), + std::make_pair("_D8demangle4testFMDFZaZv", + "demangle.test(scope char() delegate)"), + std::make_pair("_D8demangle4testFDFNlZaZv", + "demangle.test(char() scope delegate)"), + std::make_pair("_D8demangle4testFMDFNlZaZv", + "demangle.test(scope char() scope delegate)"), + std::make_pair("_D8demangle4testFNlMDFNlZaZv", + "demangle.test(scope char() scope delegate)"), + std::make_pair("_D8demangle4testFPFNaZaZv", + "demangle.test(char() pure function)"), + std::make_pair("_D8demangle4testFPFNbZaZv", + "demangle.test(char() nothrow function)"), + std::make_pair("_D8demangle4testFPFNcZaZv", + "demangle.test(char() ref function)"), + std::make_pair("_D8demangle4testFPFNdZaZv", + "demangle.test(char() @property function)"), + std::make_pair("_D8demangle4testFPFNeZaZv", + "demangle.test(char() @trusted function)"), + std::make_pair("_D8demangle4testFPFNfZaZv", + "demangle.test(char() @safe function)"), + std::make_pair("_D8demangle4testFPFNiZaZv", + "demangle.test(char() @nogc function)"), + std::make_pair("_D8demangle4testFPFNmZaZv", + "demangle.test(char() @live function)"), + std::make_pair("_D8demangle4testFPFNaNbZaZv", + "demangle.test(char() pure nothrow function)"), + std::make_pair("_D8demangle4testFPFNbNaZaZv", + "demangle.test(char() nothrow pure function)"), + std::make_pair("_D8demangle4testFPFNdNfNaZaZv", + "demangle.test(char() @property @safe pure function)"), + std::make_pair("_D8demangle4testFNjPFZaZv", + "demangle.test(char() function)"), + std::make_pair("_D8demangle4testFNkPFZaZv", + "demangle.test(return char() function)"), + std::make_pair("_D8demangle4testFPFNjZaZv", + "demangle.test(char() return function)"), + std::make_pair("_D8demangle4testFNjNkPFNjZaZv", + "demangle.test(return char() return function)"), + std::make_pair("_D8demangle4testFNlPFZaZv", + "demangle.test(char() function)"), + std::make_pair("_D8demangle4testFMPFZaZv", + "demangle.test(scope char() function)"), + std::make_pair("_D8demangle4testFPFNlZaZv", + "demangle.test(char() scope function)"), + std::make_pair("_D8demangle4testFMPFNlZaZv", + "demangle.test(scope char() scope function)"), + std::make_pair("_D8demangle4testFNlMPFNlZaZv", + "demangle.test(scope char() scope function)"), + std::make_pair("_D8demangle4test6__ctorMFZv", "demangle.test.this()"), + std::make_pair("_D8demangle4test6__dtorMFZv", "demangle.test.~this()"), + std::make_pair("_D8demangle4test10__postblitMFZv", + "demangle.test.this(this)"), + std::make_pair("_D8demangle4testFHAbaZv", + "demangle.test(char[bool[]])"), + std::make_pair("_D8demangle4testFHG42caZv", + "demangle.test(char[creal[42]])"), + std::make_pair("_D8demangle4testFAiXv", "demangle.test(int[]...)"), + std::make_pair("_D8demangle4testFLAiXv", + "demangle.test(lazy int[]...)"), + std::make_pair("_D8demangle4testFAiYv", "demangle.test(int[], ...)"), + std::make_pair("_D8demangle4testFLAiYv", + "demangle.test(lazy int[], ...)"), + std::make_pair("_D8demangle4testFLilZv", + "demangle.test(lazy int, long)"), + std::make_pair("_D8demangle4testFLliZv", + "demangle.test(lazy long, int)"), + std::make_pair( + "_D8demangle4testFLC6ObjectLDFLiZiZi", + "demangle.test(lazy Object, lazy int(lazy int) delegate)"), + std::make_pair("_D8demangle4testMxFZv", "demangle.test() const"), + std::make_pair("_D8demangle4testMyFZv", "demangle.test() immutable"), + std::make_pair("_D8demangle4testMNgFZv", "demangle.test() inout"), + std::make_pair("_D8demangle4testMNgxFZv", + "demangle.test() inout const"), + std::make_pair("_D8demangle4testMOFZv", "demangle.test() shared"), + std::make_pair("_D8demangle4testMOxFZv", + "demangle.test() shared const"), + std::make_pair("_D8demangle4testMONgFZv", + "demangle.test() shared inout"), + std::make_pair("_D8demangle4testMONgxFZv", + "demangle.test() shared inout const"), + std::make_pair("_D8demangle4testFDxFZaZv", + "demangle.test(char() delegate const)"), + std::make_pair("_D8demangle4testFDyFZaZv", + "demangle.test(char() delegate immutable)"), + std::make_pair("_D8demangle4testFDNgFZaZv", + "demangle.test(char() delegate inout)"), + std::make_pair("_D8demangle4testFDNgxFZaZv", + "demangle.test(char() delegate inout const)"), + std::make_pair("_D8demangle4testFDOFZaZv", + "demangle.test(char() delegate shared)"), + std::make_pair("_D8demangle4testFDOxFZaZv", + "demangle.test(char() delegate shared const)"), + std::make_pair("_D8demangle4testFDONgFZaZv", + "demangle.test(char() delegate shared inout)"), + std::make_pair("_D8demangle4testFDONgxFZaZv", + "demangle.test(char() delegate shared inout const)"), + std::make_pair("_D8demangle004testFaZv", "demangle.test(char)"), + std::make_pair("_D8demangle000000004testFaZv", "demangle.test(char)"))); From f7afd8224d3e81b4650304e3f5944c703bcb3c88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Thu, 7 Oct 2021 19:40:09 +0000 Subject: [PATCH 264/293] [Demangle] Add support for D template instances MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Differential Revision: https://reviews.llvm.org/D111424 Signed-off-by: Luís Ferreira --- llvm/lib/Demangle/DLangDemangle.cpp | 197 +++++++++++++++++- llvm/unittests/Demangle/DLangDemangleTest.cpp | 24 ++- 2 files changed, 217 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp index 76d162069e8c..0b9432b5fc09 100644 --- a/llvm/lib/Demangle/DLangDemangle.cpp +++ b/llvm/lib/Demangle/DLangDemangle.cpp @@ -275,6 +275,42 @@ struct Demangler { /// \see https://dlang.org/spec/abi.html#TypeTuple . const char *parseTuple(OutputBuffer *Demangled, const char *Mangled); + /// Extract and demangle the template symbol parameter from a given mangled + /// symbol append it to the output string + /// + /// \param Demangled output buffer to write the demangled name + /// \param Mangled mangled symbol to be demangled + /// + /// \return the remaining string on success or nullptr on failure + /// + /// \see https://dlang.org/spec/abi.html#TemplateArgX + const char *parseTemplateSymbolParameter(OutputBuffer *Demangled, + const char *Mangled); + + /// Extract and demangle the template arguments list from a given mangled + /// symbol append it to the output string + /// + /// \param Demangled output buffer to write the demangled name + /// \param Mangled mangled symbol to be demangled + /// + /// \return the remaining string on success or nullptr on failure + /// + /// \see https://dlang.org/spec/abi.html#TemplateArgs + const char *parseTemplateArgs(OutputBuffer *Demangled, const char *Mangled); + + /// Extract and demangle a template from a given mangled symbol and append it + /// to the output string + /// + /// \param Demangled output buffer to write the demangled name + /// \param Mangled mangled symbol to be demangled + /// \param Len expected characters length (default to -1 if unknown length) + /// + /// \return the remaining string on success or nullptr on failure + /// + /// \see https://dlang.org/spec/abi.html#TemplateInstanceName + const char *parseTemplate(OutputBuffer *Demangled, const char *Mangled, + unsigned long Len = -1); + /// The string we are demangling. const char *Str; /// The index of the last back reference. @@ -450,7 +486,9 @@ bool Demangler::isSymbolName(const char *Mangled) { if (std::isdigit(*Mangled)) return true; - // TODO: Handle template instances. + if (Mangled[0] == '_' && Mangled[1] == '_' && + (Mangled[2] == 'T' || Mangled[2] == 'U')) + return true; if (*Mangled != 'Q') return false; @@ -788,7 +826,10 @@ const char *Demangler::parseIdentifier(OutputBuffer *Demangled, if (*Mangled == 'Q') return parseSymbolBackref(Demangled, Mangled); - // TODO: Parse lengthless template instances. + // May be a template instance without a length prefix. + if (Mangled[0] == '_' && Mangled[1] == '_' && + (Mangled[2] == 'T' || Mangled[2] == 'U')) + return parseTemplate(Demangled, Mangled); const char *Endptr = decodeNumber(Mangled, &Len); @@ -800,7 +841,10 @@ const char *Demangler::parseIdentifier(OutputBuffer *Demangled, Mangled = Endptr; - // TODO: Parse template instances with a length prefix. + // May be a template instance with a length prefix. + if (Len >= 5 && Mangled[0] == '_' && Mangled[1] == '_' && + (Mangled[2] == 'T' || Mangled[2] == 'U')) + return parseTemplate(Demangled, Mangled, Len); // There can be multiple different declarations in the same function that // have the same mangled name. To make the mangled names unique, a fake @@ -1271,6 +1315,153 @@ const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled, return Mangled; } +const char *Demangler::parseTemplateSymbolParameter(OutputBuffer *Demangled, + const char *Mangled) { + if (strncmp(Mangled, "_D", 2) == 0 && isSymbolName(Mangled + 2)) + return parseMangle(Demangled, Mangled); + + if (*Mangled == 'Q') + return parseQualified(Demangled, Mangled, false); + + unsigned long Len; + const char *EndPtr = decodeNumber(Mangled, &Len); + + if (EndPtr == nullptr || Len == 0) + return nullptr; + + // In template parameter symbols generated by the frontend up to 2.076, + // the symbol length is encoded and the first character of the mangled + // name can be a digit. This causes ambiguity issues because the digits + // of the two numbers are adjacent. + long PtrSize = Len; + const char *PtrEnd; + int Saved = Demangled->getCurrentPosition(); + + // Work backwards until a match is found. + for (PtrEnd = EndPtr; EndPtr != nullptr; PtrEnd--) { + Mangled = PtrEnd; + + // Reached the beginning of the pointer to the name length, + // try parsing the entire symbol. + if (PtrSize == 0) { + PtrSize = Len; + PtrEnd = EndPtr; + EndPtr = nullptr; + } + + // Check whether template parameter is a function with a valid + // return type or an untyped identifier. + if (isSymbolName(Mangled)) + Mangled = parseQualified(Demangled, Mangled, false); + else if (strncmp(Mangled, "_D", 2) == 0 && isSymbolName(Mangled + 2)) + Mangled = parseMangle(Demangled, Mangled); + + // Check for name length mismatch. + if (Mangled && (EndPtr == nullptr || (Mangled - PtrEnd) == PtrSize)) + return Mangled; + + PtrSize /= 10; + Demangled->setCurrentPosition(Saved); + } + + // No match on any combinations. + return nullptr; +} + +const char *Demangler::parseTemplateArgs(OutputBuffer *Demangled, + const char *Mangled) { + size_t N = 0; + + while (Mangled && *Mangled != '\0') { + switch (*Mangled) { + case 'Z': // End of parameter list. + ++Mangled; + return Mangled; + } + + if (N++) + *Demangled << ", "; + + // Skip over specialised template prefix. + if (*Mangled == 'H') + ++Mangled; + + switch (*Mangled) { + case 'S': // Symbol parameter. + ++Mangled; + Mangled = parseTemplateSymbolParameter(Demangled, Mangled); + break; + case 'T': // Type parameter. + ++Mangled; + Mangled = parseType(Demangled, Mangled); + break; + + // TODO: Parse value literals + + case 'X': // Externally mangled parameter. + { + unsigned long Len; + const char *EndPtr; + + ++Mangled; + EndPtr = decodeNumber(Mangled, &Len); + if (EndPtr == nullptr || strlen(EndPtr) < Len) + return nullptr; + + *Demangled << StringView(EndPtr, Len); + Mangled = EndPtr + Len; + break; + } + default: + return nullptr; + } + } + + return Mangled; +} + +const char *Demangler::parseTemplate(OutputBuffer *Demangled, + const char *Mangled, unsigned long Len) { + const char *Start = Mangled; + OutputBuffer Args; + + // Template instance names have the types and values of its parameters + // encoded into it. + // TemplateInstanceName: + // Number __T LName TemplateArgs Z + // Number __U LName TemplateArgs Z + // ^ + // The start pointer should be at the above location, and LEN should be + // the value of the decoded number. + + // Template symbol. + if (!isSymbolName(Mangled + 3) || Mangled[3] == '0') + return nullptr; + + Mangled += 3; + + // Template identifier. + Mangled = parseIdentifier(Demangled, Mangled); + + if (!initializeOutputBuffer(nullptr, nullptr, Args, 32)) + return nullptr; + + // Template arguments. + Mangled = parseTemplateArgs(&Args, Mangled); + + *Demangled << "!("; + *Demangled << StringView(Args.getBuffer(), Args.getCurrentPosition()); + *Demangled << ')'; + + std::free(Args.getBuffer()); + + // Check for template name length mismatch. + if (Len != -1UL && Mangled && (unsigned long)(Mangled - Start) != Len) + return nullptr; + + return Mangled; +} + Demangler::Demangler(const char *Mangled) : Str(Mangled), LastBackref(strlen(Mangled)) {} diff --git a/llvm/unittests/Demangle/DLangDemangleTest.cpp b/llvm/unittests/Demangle/DLangDemangleTest.cpp index 785e617b02bf..f0f2e89938cf 100644 --- a/llvm/unittests/Demangle/DLangDemangleTest.cpp +++ b/llvm/unittests/Demangle/DLangDemangleTest.cpp @@ -343,4 +343,26 @@ INSTANTIATE_TEST_SUITE_P( std::make_pair("_D8demangle4testFDONgxFZaZv", "demangle.test(char() delegate shared inout const)"), std::make_pair("_D8demangle004testFaZv", "demangle.test(char)"), - std::make_pair("_D8demangle000000004testFaZv", "demangle.test(char)"))); + std::make_pair("_D8demangle000000004testFaZv", "demangle.test(char)"), + std::make_pair("_D8demangle9__T4testZv", "demangle.test!()"), + std::make_pair("_D8demangle9__U4testZv", "demangle.test!()"), + std::make_pair("_D8demangle11__T4testTaZv", "demangle.test!(char)"), + std::make_pair("_D8demangle13__T4testTaTaZv", + "demangle.test!(char, char)"), + std::make_pair("_D8demangle15__T4testTaTaTaZv", + "demangle.test!(char, char, char)"), + std::make_pair("_D8demangle16__T4testTaTOiTaZv", + "demangle.test!(char, shared(int), char)"), + std::make_pair("_D8demangle17__T4testS6symbolZv", + "demangle.test!(symbol)"), + std::make_pair("_D8demangle23__T4testS116symbol3fooZv", + "demangle.test!(symbol.foo)"), + std::make_pair("_D8demangle32__T4testS20_D6symbol3foo3barFZvZv", + "demangle.test!(symbol.foo.bar())"), + std::make_pair("_D8demangle19__T4testTaS6symbolZv", + "demangle.test!(char, symbol)"), + std::make_pair("_D8demangle19__T4testS6symbolTaZv", + "demangle.test!(symbol, char)"), + std::make_pair("_D8demangle12__T4testHTaZv", "demangle.test!(char)"), + std::make_pair("_D8demangle13__T4testTFZaZ6mangleFZv", + "demangle.test!(char() function).mangle()"))); From a86027bb6872ad5de36d14e513091fed7436f445 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Thu, 7 Oct 2021 21:29:20 +0000 Subject: [PATCH 265/293] [Demangle] Add support for D integer literal values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Differential Revision: https://reviews.llvm.org/D111425 Signed-off-by: Luís Ferreira --- llvm/lib/Demangle/DLangDemangle.cpp | 195 +++++++++++++++++- llvm/unittests/Demangle/DLangDemangleTest.cpp | 29 ++- 2 files changed, 222 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp index 0b9432b5fc09..29456ba2d189 100644 --- a/llvm/lib/Demangle/DLangDemangle.cpp +++ b/llvm/lib/Demangle/DLangDemangle.cpp @@ -311,6 +311,34 @@ struct Demangler { const char *parseTemplate(OutputBuffer *Demangled, const char *Mangled, unsigned long Len = -1); + /// Extract and demangle an integer value from a given mangled symbol append + /// it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// \param Type mangled type character in which the type should be + /// represented as. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#Value . + const char *parseInteger(OutputBuffer *Demangled, const char *Mangled, + char Type); + + /// Extract and demangle any value from a given mangled symbol append it to + /// the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// \param Type mangled type character in which the type should be + /// represented as, if needed. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#Value . + const char *parseValue(OutputBuffer *Demangled, const char *Mangled, + char Type); + /// The string we are demangling. const char *Str; /// The index of the last back reference. @@ -1395,8 +1423,38 @@ const char *Demangler::parseTemplateArgs(OutputBuffer *Demangled, ++Mangled; Mangled = parseType(Demangled, Mangled); break; + case 'V': // Value parameter. + { + OutputBuffer Name; + char Type; - // TODO: Parse value literals + // Peek at the type. + ++Mangled; + Type = *Mangled; + + if (Type == 'Q') { + // Value type is a back reference, peek at the real type. + const char *Backref; + if (decodeBackref(Mangled, &Backref) == nullptr) + return nullptr; + + Type = *Backref; + } + + // In the few instances where the type is actually desired in + // the output, it should precede the value from dlang_value. + Name = OutputBuffer(); + if (!initializeOutputBuffer(nullptr, nullptr, Name, 32)) + return nullptr; + + Mangled = parseType(&Name, Mangled); + Name << '\0'; + Name.setCurrentPosition(Name.getCurrentPosition() - 1); + + Mangled = parseValue(Demangled, Mangled, Type); + std::free(Name.getBuffer()); + break; + } case 'X': // Externally mangled parameter. { @@ -1462,6 +1520,141 @@ const char *Demangler::parseTemplate(OutputBuffer *Demangled, return Mangled; } +const char *Demangler::parseValue(OutputBuffer *Demangled, const char *Mangled, + char Type) { + if (Mangled == nullptr || *Mangled == '\0') + return nullptr; + + switch (*Mangled) { + // Integral values. + case 'N': + ++Mangled; + *Demangled << '-'; + Mangled = parseInteger(Demangled, Mangled, Type); + break; + + case 'i': + ++Mangled; + + [[clang::fallthrough]]; + // There really should always be an `i' before encoded numbers, but there + // wasn't in early versions of D2, so this case range must remain for + // backwards compatibility. + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + Mangled = parseInteger(Demangled, Mangled, Type); + break; + + default: + return nullptr; + } + + return Mangled; +} + +const char *Demangler::parseInteger(OutputBuffer *Demangled, + const char *Mangled, char Type) { + if (Type == 'a' || Type == 'u' || Type == 'w') { + // Parse character value + char Value[20]; + int Pos = sizeof(Value); + int Width = 0; + unsigned long Val; + + Mangled = decodeNumber(Mangled, &Val); + if (Mangled == nullptr) + return nullptr; + + *Demangled << '\''; + + if (Type == 'a' && Val >= 0x20 && Val < 0x7F) { + // Represent as a character literal + *Demangled << static_cast(Val); + } else { + // Represent as a hexadecimal value + switch (Type) { + case 'a': // char + *Demangled << "\\x"; + Width = 2; + break; + case 'u': // wchar + *Demangled << "\\u"; + Width = 4; + break; + case 'w': // dchar + *Demangled << "\\U"; + Width = 8; + break; + } + + while (Val > 0) { + int Digit = Val % 16; + + if (Digit < 10) + Value[--Pos] = (char)(Digit + '0'); + else + Value[--Pos] = (char)((Digit - 10) + 'a'); + + Val /= 16; + Width--; + } + + for (; Width > 0; Width--) + Value[--Pos] = '0'; + + *Demangled << StringView(&(Value[Pos]), sizeof(Value) - Pos); + } + *Demangled << '\''; + } else if (Type == 'b') { + // Parse boolean value + unsigned long Val; + + Mangled = decodeNumber(Mangled, &Val); + if (Mangled == nullptr) + return nullptr; + + *Demangled << (Val ? "true" : "false"); + } else { + // Parse integer value + const char *NumPtr = Mangled; + size_t Num = 0; + + if (!std::isdigit(*Mangled)) + return nullptr; + + while (std::isdigit(*Mangled)) { + ++Num; + ++Mangled; + } + *Demangled << StringView(NumPtr, Num); + + // Append suffix + switch (Type) { + case 'h': // ubyte + case 't': // ushort + case 'k': // uint + *Demangled << 'u'; + break; + case 'l': // long + *Demangled << 'L'; + break; + case 'm': // ulong + *Demangled << "uL"; + break; + } + } + + return Mangled; +} + Demangler::Demangler(const char *Mangled) : Str(Mangled), LastBackref(strlen(Mangled)) {} diff --git a/llvm/unittests/Demangle/DLangDemangleTest.cpp b/llvm/unittests/Demangle/DLangDemangleTest.cpp index f0f2e89938cf..2216248d2dac 100644 --- a/llvm/unittests/Demangle/DLangDemangleTest.cpp +++ b/llvm/unittests/Demangle/DLangDemangleTest.cpp @@ -365,4 +365,31 @@ INSTANTIATE_TEST_SUITE_P( "demangle.test!(symbol, char)"), std::make_pair("_D8demangle12__T4testHTaZv", "demangle.test!(char)"), std::make_pair("_D8demangle13__T4testTFZaZ6mangleFZv", - "demangle.test!(char() function).mangle()"))); + "demangle.test!(char() function).mangle()"), + std::make_pair("_D8demangle15__T4testVgi123Zv", "demangle.test!(123)"), + std::make_pair("_D8demangle15__T4testVii123Zv", "demangle.test!(123)"), + std::make_pair("_D8demangle15__T4testVsi123Zv", "demangle.test!(123)"), + std::make_pair("_D8demangle15__T4testVhi123Zv", "demangle.test!(123u)"), + std::make_pair("_D8demangle15__T4testVki123Zv", "demangle.test!(123u)"), + std::make_pair("_D8demangle15__T4testVti123Zv", "demangle.test!(123u)"), + std::make_pair("_D8demangle15__T4testVli123Zv", "demangle.test!(123L)"), + std::make_pair("_D8demangle15__T4testVmi123Zv", + "demangle.test!(123uL)"), + std::make_pair("_D8demangle15__T4testViN123Zv", "demangle.test!(-123)"), + std::make_pair("_D8demangle15__T4testVkN123Zv", + "demangle.test!(-123u)"), + std::make_pair("_D8demangle15__T4testVlN123Zv", + "demangle.test!(-123L)"), + std::make_pair("_D8demangle15__T4testVmN123Zv", + "demangle.test!(-123uL)"), + std::make_pair("_D8demangle13__T4testVbi1Zv", "demangle.test!(true)"), + std::make_pair("_D8demangle13__T4testVbi0Zv", "demangle.test!(false)"), + std::make_pair("_D8demangle14__T4testVai10Zv", + "demangle.test!('\\x0a')"), + std::make_pair("_D8demangle14__T4testVai32Zv", "demangle.test!(' ')"), + std::make_pair("_D8demangle14__T4testVai65Zv", "demangle.test!('A')"), + std::make_pair("_D8demangle15__T4testVai126Zv", "demangle.test!('~')"), + std::make_pair("_D8demangle16__T4testVui1000Zv", + "demangle.test!('\\u03e8')"), + std::make_pair("_D8demangle18__T4testVwi100000Zv", + "demangle.test!('\\U000186a0')"))); From d44be27cfc02f1114ab3366d736243b5ff45c429 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Thu, 7 Oct 2021 22:37:33 +0000 Subject: [PATCH 266/293] [Demangle] Add support for D floating-point literal values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Differential Revision: https://reviews.llvm.org/D111426 Signed-off-by: Luís Ferreira --- llvm/lib/Demangle/DLangDemangle.cpp | 89 +++++++++++++++++++ llvm/unittests/Demangle/DLangDemangleTest.cpp | 35 +++++++- 2 files changed, 123 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp index 29456ba2d189..6bb82a3165fc 100644 --- a/llvm/lib/Demangle/DLangDemangle.cpp +++ b/llvm/lib/Demangle/DLangDemangle.cpp @@ -325,6 +325,17 @@ struct Demangler { const char *parseInteger(OutputBuffer *Demangled, const char *Mangled, char Type); + /// Extract and demangle a floating-point value from a given mangled symbol + /// append it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#Value . + const char *parseReal(OutputBuffer *Demangled, const char *Mangled); + /// Extract and demangle any value from a given mangled symbol append it to /// the output string. /// @@ -1553,6 +1564,24 @@ const char *Demangler::parseValue(OutputBuffer *Demangled, const char *Mangled, Mangled = parseInteger(Demangled, Mangled, Type); break; + // Real value. + case 'e': + ++Mangled; + Mangled = parseReal(Demangled, Mangled); + break; + + // Complex value. + case 'c': + ++Mangled; + Mangled = parseReal(Demangled, Mangled); + *Demangled << '+'; + if (Mangled == nullptr || *Mangled != 'c') + return nullptr; + ++Mangled; + Mangled = parseReal(Demangled, Mangled); + *Demangled << 'i'; + break; + default: return nullptr; } @@ -1655,6 +1684,66 @@ const char *Demangler::parseInteger(OutputBuffer *Demangled, return Mangled; } +const char *Demangler::parseReal(OutputBuffer *Demangled, const char *Mangled) { + // Handle NAN and +-INF. + if (strncmp(Mangled, "NAN", 3) == 0) { + *Demangled << "NaN"; + Mangled += 3; + return Mangled; + } + + if (strncmp(Mangled, "INF", 3) == 0) { + *Demangled << "Inf"; + Mangled += 3; + return Mangled; + } + + if (strncmp(Mangled, "NINF", 4) == 0) { + *Demangled << "-Inf"; + Mangled += 4; + return Mangled; + } + + // Hexadecimal prefix and leading bit. + if (*Mangled == 'N') { + *Demangled << '-'; + ++Mangled; + } + + if (!std::isxdigit(*Mangled)) + return nullptr; + + *Demangled << "0x"; + *Demangled << StringView(Mangled, 1); + *Demangled << '.'; + ++Mangled; + + // Significand. + while (std::isxdigit(*Mangled)) { + *Demangled << StringView(Mangled, 1); + ++Mangled; + } + + // Exponent. + if (*Mangled != 'P') + return nullptr; + + *Demangled << 'p'; + ++Mangled; + + if (*Mangled == 'N') { + *Demangled << '-'; + ++Mangled; + } + + while (std::isdigit(*Mangled)) { + *Demangled << StringView(Mangled, 1); + ++Mangled; + } + + return Mangled; +} + Demangler::Demangler(const char *Mangled) : Str(Mangled), LastBackref(strlen(Mangled)) {} diff --git a/llvm/unittests/Demangle/DLangDemangleTest.cpp b/llvm/unittests/Demangle/DLangDemangleTest.cpp index 2216248d2dac..765977b605da 100644 --- a/llvm/unittests/Demangle/DLangDemangleTest.cpp +++ b/llvm/unittests/Demangle/DLangDemangleTest.cpp @@ -392,4 +392,37 @@ INSTANTIATE_TEST_SUITE_P( std::make_pair("_D8demangle16__T4testVui1000Zv", "demangle.test!('\\u03e8')"), std::make_pair("_D8demangle18__T4testVwi100000Zv", - "demangle.test!('\\U000186a0')"))); + "demangle.test!('\\U000186a0')"), + std::make_pair("_D8demangle17__T4testVde0A8P6Zv", + "demangle.test!(0x0.A8p6)"), + std::make_pair("_D8demangle16__T4testVdeA8P2Zv", + "demangle.test!(0xA.8p2)"), + std::make_pair("_D8demangle18__T4testVdeN0A8P6Zv", + "demangle.test!(-0x0.A8p6)"), + std::make_pair("_D8demangle31__T4testVde0F6E978D4FDF3B646P7Zv", + "demangle.test!(0x0.F6E978D4FDF3B646p7)"), + std::make_pair("_D8demangle15__T4testVdeNANZv", "demangle.test!(NaN)"), + std::make_pair("_D8demangle15__T4testVdeINFZv", "demangle.test!(Inf)"), + std::make_pair("_D8demangle16__T4testVdeNINFZv", + "demangle.test!(-Inf)"), + std::make_pair("_D8demangle23__T4testVfe0FFFFFFP128Zv", + "demangle.test!(0x0.FFFFFFp128)"), + std::make_pair("_D8demangle32__T4testVde0FFFFFFFFFFFFF8P1024Zv", + "demangle.test!(0x0.FFFFFFFFFFFFF8p1024)"), + std::make_pair("_D8demangle19__T4testVfe08PN125Zv", + "demangle.test!(0x0.8p-125)"), + std::make_pair("_D8demangle20__T4testVde08PN1021Zv", + "demangle.test!(0x0.8p-1021)"), + std::make_pair( + "_D8demangle51__T4testVrc0C4CCCCCCCCCCCCCDP4c0B666666666666666P6Zv", + "demangle.test!(0x0.C4CCCCCCCCCCCCCDp4+0x0.B666666666666666p6i)"), + std::make_pair( + "_D8demangle52__" + "T4testVrcN0C4CCCCCCCCCCCCCDP4c0B666666666666666P6Zv", + "demangle.test!(-0x0.C4CCCCCCCCCCCCCDp4+0x0.B666666666666666p6i)"), + std::make_pair("_D8demangle91__" + "T4testVde000111222333444555666777888999AAABBBCCCDDDEEEF" + "FFP000111222333444555666777888999Zv", + "demangle.test!(0x0." + "00111222333444555666777888999AAABBBCCCDDDEEEFFFp0001112" + "22333444555666777888999)"))); From 17902dbc21e09f175ed6f2c61b64392dec5834ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Thu, 7 Oct 2021 23:18:56 +0000 Subject: [PATCH 267/293] [Demangle] Add support for D string literals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Differential Revision: https://reviews.llvm.org/D111428 Signed-off-by: Luís Ferreira --- llvm/lib/Demangle/DLangDemangle.cpp | 112 ++++++++++++++++++ llvm/unittests/Demangle/DLangDemangleTest.cpp | 12 +- 2 files changed, 123 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp index 6bb82a3165fc..5ecba3a1ed44 100644 --- a/llvm/lib/Demangle/DLangDemangle.cpp +++ b/llvm/lib/Demangle/DLangDemangle.cpp @@ -70,6 +70,16 @@ struct Demangler { /// \see https://dlang.org/spec/abi.html#Number . const char *decodeNumber(const char *Mangled, unsigned long *Ret); + /// Extract the hex-digit from a given string. + /// + /// \param Mangled string to extract the hex-digit. + /// \param Ret assigned result value. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#HexDigits . + const char *decodeHexdigit(const char *Mangled, char *Ret); + /// Extract the back reference position from a given string. /// /// \param Mangled string to extract the back reference position. @@ -325,6 +335,17 @@ struct Demangler { const char *parseInteger(OutputBuffer *Demangled, const char *Mangled, char Type); + /// Extract and demangle a string value from a given mangled symbol append it + /// to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#Value . + const char *parseString(OutputBuffer *Demangled, const char *Mangled); + /// Extract and demangle a floating-point value from a given mangled symbol /// append it to the output string. /// @@ -383,6 +404,31 @@ const char *Demangler::decodeNumber(const char *Mangled, unsigned long *Ret) { return Mangled; } +const char *Demangler::decodeHexdigit(const char *Mangled, char *Ret) { + char C; + + // Return nullptr if trying to extract something that isn't a hexdigit + if (Mangled == nullptr || !std::isxdigit(Mangled[0]) || + !std::isxdigit(Mangled[1])) + return nullptr; + + C = Mangled[0]; + if (!std::isdigit(C)) + *Ret = C - (std::isupper(C) ? 'A' : 'a') + 10; + else + *Ret = C - '0'; + + C = Mangled[1]; + if (!std::isdigit(C)) + *Ret = (*Ret << 4) | (C - (std::isupper(C) ? 'A' : 'a') + 10); + else + *Ret = (*Ret << 4) | (C - '0'); + + Mangled += 2; + + return Mangled; +} + const char *Demangler::decodeBackrefPos(const char *Mangled, long *Ret) { // Return nullptr if trying to extract something that isn't a digit if (Mangled == nullptr || !std::isalpha(*Mangled)) @@ -1582,6 +1628,13 @@ const char *Demangler::parseValue(OutputBuffer *Demangled, const char *Mangled, *Demangled << 'i'; break; + // String values. + case 'a': // UTF8 + case 'w': // UTF16 + case 'd': // UTF32 + Mangled = parseString(Demangled, Mangled); + break; + default: return nullptr; } @@ -1589,6 +1642,65 @@ const char *Demangler::parseValue(OutputBuffer *Demangled, const char *Mangled, return Mangled; } +const char *Demangler::parseString(OutputBuffer *Demangled, + const char *Mangled) { + char Type = *Mangled; + unsigned long Len; + + ++Mangled; + Mangled = decodeNumber(Mangled, &Len); + if (Mangled == nullptr || *Mangled != '_') + return nullptr; + + ++Mangled; + *Demangled << '\"'; + while (Len--) { + char Val; + const char *Endptr = decodeHexdigit(Mangled, &Val); + + if (Endptr == nullptr) + return nullptr; + + // Sanitize white and non-printable characters. + switch (Val) { + case ' ': + *Demangled << ' '; + break; + case '\t': + *Demangled << "\\t"; + break; + case '\n': + *Demangled << "\\n"; + break; + case '\r': + *Demangled << "\\r"; + break; + case '\f': + *Demangled << "\\f"; + break; + case '\v': + *Demangled << "\\v"; + break; + + default: + if (std::isprint(Val)) + *Demangled << Val; + else { + *Demangled << "\\x"; + *Demangled << StringView(Mangled, 2); + } + } + + Mangled = Endptr; + } + *Demangled << '\"'; + + if (Type != 'a') + *Demangled << Type; + + return Mangled; +} + const char *Demangler::parseInteger(OutputBuffer *Demangled, const char *Mangled, char Type) { if (Type == 'a' || Type == 'u' || Type == 'w') { diff --git a/llvm/unittests/Demangle/DLangDemangleTest.cpp b/llvm/unittests/Demangle/DLangDemangleTest.cpp index 765977b605da..d2e5b2b08527 100644 --- a/llvm/unittests/Demangle/DLangDemangleTest.cpp +++ b/llvm/unittests/Demangle/DLangDemangleTest.cpp @@ -425,4 +425,14 @@ INSTANTIATE_TEST_SUITE_P( "FFP000111222333444555666777888999Zv", "demangle.test!(0x0." "00111222333444555666777888999AAABBBCCCDDDEEEFFFp0001112" - "22333444555666777888999)"))); + "22333444555666777888999)"), + std::make_pair("_D8demangle22__T4testVG3ua3_616263Zv", + "demangle.test!(\"abc\")"), + std::make_pair("_D8demangle22__T4testVG3ud3_616263Zv", + "demangle.test!(\"abc\"d)"), + std::make_pair("_D8demangle22__T4testVG3uw3_616263Zv", + "demangle.test!(\"abc\"w)"), + std::make_pair("_D8demangle16__T4testVAyaa0_Zv", + "demangle.test!(\"\")"), + std::make_pair("_D8demangle32__T4testVAyaa8_20090a0d0c0b00ffZv", + "demangle.test!(\" \\t\\n\\r\\f\\v\\x00\\xff\")"))); From fa96e487fa02f56ebc5f4c9dca6414030892008c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Fri, 8 Oct 2021 00:28:53 +0000 Subject: [PATCH 268/293] [Demangle] Add support for D Array and associative array literals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Differential Revision: https://reviews.llvm.org/D111429 Signed-off-by: Luís Ferreira --- llvm/lib/Demangle/DLangDemangle.cpp | 80 +++++++++++++++++++ llvm/unittests/Demangle/DLangDemangleTest.cpp | 10 ++- 2 files changed, 89 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp index 5ecba3a1ed44..079a047b4a29 100644 --- a/llvm/lib/Demangle/DLangDemangle.cpp +++ b/llvm/lib/Demangle/DLangDemangle.cpp @@ -335,6 +335,28 @@ struct Demangler { const char *parseInteger(OutputBuffer *Demangled, const char *Mangled, char Type); + /// Extract and demangle an array literal value from a given mangled symbol + /// append it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#Value . + const char *parseArrayLiteral(OutputBuffer *Demangled, const char *Mangled); + + /// Extract and demangle an associative array value from a given mangled + /// symbol append it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#Value . + const char *parseAssocArray(OutputBuffer *Demangled, const char *Mangled); + /// Extract and demangle a string value from a given mangled symbol append it /// to the output string. /// @@ -1635,6 +1657,15 @@ const char *Demangler::parseValue(OutputBuffer *Demangled, const char *Mangled, Mangled = parseString(Demangled, Mangled); break; + // Array values. + case 'A': + ++Mangled; + if (Type == 'H') + Mangled = parseAssocArray(Demangled, Mangled); + else + Mangled = parseArrayLiteral(Demangled, Mangled); + break; + default: return nullptr; } @@ -1856,6 +1887,55 @@ const char *Demangler::parseReal(OutputBuffer *Demangled, const char *Mangled) { return Mangled; } +const char *Demangler::parseArrayLiteral(OutputBuffer *Demangled, + const char *Mangled) { + unsigned long Elements; + + Mangled = decodeNumber(Mangled, &Elements); + if (Mangled == nullptr) + return nullptr; + + *Demangled << '['; + while (Elements--) { + Mangled = parseValue(Demangled, Mangled, '\0'); + if (Mangled == nullptr) + return nullptr; + + if (Elements != 0) + *Demangled << ", "; + } + + *Demangled << ']'; + return Mangled; +} + +const char *Demangler::parseAssocArray(OutputBuffer *Demangled, + const char *Mangled) { + unsigned long Elements; + + Mangled = decodeNumber(Mangled, &Elements); + if (Mangled == nullptr) + return nullptr; + + *Demangled << '['; + while (Elements--) { + Mangled = parseValue(Demangled, Mangled, '\0'); + if (Mangled == nullptr) + return nullptr; + + *Demangled << ':'; + Mangled = parseValue(Demangled, Mangled, '\0'); + if (Mangled == nullptr) + return nullptr; + + if (Elements != 0) + *Demangled << ", "; + } + + *Demangled << ']'; + return Mangled; +} + Demangler::Demangler(const char *Mangled) : Str(Mangled), LastBackref(strlen(Mangled)) {} diff --git a/llvm/unittests/Demangle/DLangDemangleTest.cpp b/llvm/unittests/Demangle/DLangDemangleTest.cpp index d2e5b2b08527..c054e740128a 100644 --- a/llvm/unittests/Demangle/DLangDemangleTest.cpp +++ b/llvm/unittests/Demangle/DLangDemangleTest.cpp @@ -435,4 +435,12 @@ INSTANTIATE_TEST_SUITE_P( std::make_pair("_D8demangle16__T4testVAyaa0_Zv", "demangle.test!(\"\")"), std::make_pair("_D8demangle32__T4testVAyaa8_20090a0d0c0b00ffZv", - "demangle.test!(\" \\t\\n\\r\\f\\v\\x00\\xff\")"))); + "demangle.test!(\" \\t\\n\\r\\f\\v\\x00\\xff\")"), + std::make_pair("_D8demangle22__T4testVAiA4i1i2i3i4Zv", + "demangle.test!([1, 2, 3, 4])"), + std::make_pair("_D8demangle25__T4testVAdA2e08P1eN08P1Zv", + "demangle.test!([0x0.8p1, -0x0.8p1])"), + std::make_pair("_D8demangle23__T4testVHiiA2i1i2i3i4Zv", + "demangle.test!([1:2, 3:4])"), + std::make_pair("_D8demangle39__T4testVHAxaiA2a3_616263i1a3_646566i2Zv", + "demangle.test!([\"abc\":1, \"def\":2])"))); From a0fc0613c6801ed9b83923a04f0848d40f4f4a01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Fri, 8 Oct 2021 01:10:53 +0000 Subject: [PATCH 269/293] [Demangle] Add support for D struct literals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Differential Revision: https://reviews.llvm.org/D111430 Signed-off-by: Luís Ferreira --- llvm/lib/Demangle/DLangDemangle.cpp | 58 +++++++++++++++++-- llvm/unittests/Demangle/DLangDemangleTest.cpp | 6 +- 2 files changed, 57 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp index 079a047b4a29..340728df00dc 100644 --- a/llvm/lib/Demangle/DLangDemangle.cpp +++ b/llvm/lib/Demangle/DLangDemangle.cpp @@ -335,6 +335,19 @@ struct Demangler { const char *parseInteger(OutputBuffer *Demangled, const char *Mangled, char Type); + /// Extract and demangle an struct literal value from a given mangled symbol + /// append it to the output string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// \param Name demangled symbol name of the struct literal. + /// + /// \return the remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#Value . + const char *parseStructLiteral(OutputBuffer *Demangled, const char *Mangled, + const char *Name); + /// Extract and demangle an array literal value from a given mangled symbol /// append it to the output string. /// @@ -384,6 +397,7 @@ struct Demangler { /// /// \param Demangled output buffer to write the demangled name. /// \param Mangled mangled symbol to be demangled. + /// \param Name demangled symbol name of the type, if needed. /// \param Type mangled type character in which the type should be /// represented as, if needed. /// @@ -391,7 +405,7 @@ struct Demangler { /// /// \see https://dlang.org/spec/abi.html#Value . const char *parseValue(OutputBuffer *Demangled, const char *Mangled, - char Type); + const char *Name, char Type); /// The string we are demangling. const char *Str; @@ -1530,7 +1544,7 @@ const char *Demangler::parseTemplateArgs(OutputBuffer *Demangled, Name << '\0'; Name.setCurrentPosition(Name.getCurrentPosition() - 1); - Mangled = parseValue(Demangled, Mangled, Type); + Mangled = parseValue(Demangled, Mangled, Name.getBuffer(), Type); std::free(Name.getBuffer()); break; } @@ -1600,7 +1614,7 @@ const char *Demangler::parseTemplate(OutputBuffer *Demangled, } const char *Demangler::parseValue(OutputBuffer *Demangled, const char *Mangled, - char Type) { + const char *Name, char Type) { if (Mangled == nullptr || *Mangled == '\0') return nullptr; @@ -1666,6 +1680,12 @@ const char *Demangler::parseValue(OutputBuffer *Demangled, const char *Mangled, Mangled = parseArrayLiteral(Demangled, Mangled); break; + // Struct values. + case 'S': + ++Mangled; + Mangled = parseStructLiteral(Demangled, Mangled, Name); + break; + default: return nullptr; } @@ -1673,6 +1693,32 @@ const char *Demangler::parseValue(OutputBuffer *Demangled, const char *Mangled, return Mangled; } +const char *Demangler::parseStructLiteral(OutputBuffer *Demangled, + const char *Mangled, + const char *Name) { + unsigned long Args; + + Mangled = decodeNumber(Mangled, &Args); + if (Mangled == nullptr) + return nullptr; + + if (Name != nullptr) + *Demangled << Name; + + *Demangled << '('; + while (Args--) { + Mangled = parseValue(Demangled, Mangled, nullptr, '\0'); + if (Mangled == nullptr) + return nullptr; + + if (Args != 0) + *Demangled << ", "; + } + + *Demangled << ')'; + return Mangled; +} + const char *Demangler::parseString(OutputBuffer *Demangled, const char *Mangled) { char Type = *Mangled; @@ -1897,7 +1943,7 @@ const char *Demangler::parseArrayLiteral(OutputBuffer *Demangled, *Demangled << '['; while (Elements--) { - Mangled = parseValue(Demangled, Mangled, '\0'); + Mangled = parseValue(Demangled, Mangled, nullptr, '\0'); if (Mangled == nullptr) return nullptr; @@ -1919,12 +1965,12 @@ const char *Demangler::parseAssocArray(OutputBuffer *Demangled, *Demangled << '['; while (Elements--) { - Mangled = parseValue(Demangled, Mangled, '\0'); + Mangled = parseValue(Demangled, Mangled, nullptr, '\0'); if (Mangled == nullptr) return nullptr; *Demangled << ':'; - Mangled = parseValue(Demangled, Mangled, '\0'); + Mangled = parseValue(Demangled, Mangled, nullptr, '\0'); if (Mangled == nullptr) return nullptr; diff --git a/llvm/unittests/Demangle/DLangDemangleTest.cpp b/llvm/unittests/Demangle/DLangDemangleTest.cpp index c054e740128a..64a33e8049a5 100644 --- a/llvm/unittests/Demangle/DLangDemangleTest.cpp +++ b/llvm/unittests/Demangle/DLangDemangleTest.cpp @@ -443,4 +443,8 @@ INSTANTIATE_TEST_SUITE_P( std::make_pair("_D8demangle23__T4testVHiiA2i1i2i3i4Zv", "demangle.test!([1:2, 3:4])"), std::make_pair("_D8demangle39__T4testVHAxaiA2a3_616263i1a3_646566i2Zv", - "demangle.test!([\"abc\":1, \"def\":2])"))); + "demangle.test!([\"abc\":1, \"def\":2])"), + std::make_pair("_D8demangle28__T4testVS8demangle1SS2i1i2Zv", + "demangle.test!(demangle.S(1, 2))"), + std::make_pair("_D8demangle35__T4testVS8demangle1SS2i1a3_616263Zv", + "demangle.test!(demangle.S(1, \"abc\"))"))); From 6b35229860ae3ab02edb76b26b1f4ee9965bcf2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Fri, 8 Oct 2021 01:13:09 +0000 Subject: [PATCH 270/293] [Demangle] Add support for D function literals and null literal value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Differential Revision: https://reviews.llvm.org/D111431 Signed-off-by: Luís Ferreira --- llvm/lib/Demangle/DLangDemangle.cpp | 14 ++++++++++++++ llvm/unittests/Demangle/DLangDemangleTest.cpp | 5 ++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp index 340728df00dc..262fcb063366 100644 --- a/llvm/lib/Demangle/DLangDemangle.cpp +++ b/llvm/lib/Demangle/DLangDemangle.cpp @@ -1619,6 +1619,12 @@ const char *Demangler::parseValue(OutputBuffer *Demangled, const char *Mangled, return nullptr; switch (*Mangled) { + // Null value. + case 'n': + ++Mangled; + *Demangled << "null"; + break; + // Integral values. case 'N': ++Mangled; @@ -1686,6 +1692,14 @@ const char *Demangler::parseValue(OutputBuffer *Demangled, const char *Mangled, Mangled = parseStructLiteral(Demangled, Mangled, Name); break; + // Function literal symbol. + case 'f': + ++Mangled; + if (strncmp(Mangled, "_D", 2) != 0 || !isSymbolName(Mangled + 2)) + return nullptr; + Mangled = parseMangle(Demangled, Mangled); + break; + default: return nullptr; } diff --git a/llvm/unittests/Demangle/DLangDemangleTest.cpp b/llvm/unittests/Demangle/DLangDemangleTest.cpp index 64a33e8049a5..58b3759ba4a5 100644 --- a/llvm/unittests/Demangle/DLangDemangleTest.cpp +++ b/llvm/unittests/Demangle/DLangDemangleTest.cpp @@ -447,4 +447,7 @@ INSTANTIATE_TEST_SUITE_P( std::make_pair("_D8demangle28__T4testVS8demangle1SS2i1i2Zv", "demangle.test!(demangle.S(1, 2))"), std::make_pair("_D8demangle35__T4testVS8demangle1SS2i1a3_616263Zv", - "demangle.test!(demangle.S(1, \"abc\"))"))); + "demangle.test!(demangle.S(1, \"abc\"))"), + std::make_pair("_D8demangle13__T4testVPinZv", "demangle.test!(null)"), + std::make_pair("_D8demangle__T3abcS_DQt10__lambda13FNaNbNiNfZiZQBhFZi", + "demangle.abc!(demangle.__lambda13()).abc()"))); From 1f0096d7af0412236d5eeaa160ad075ee14143be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Sun, 28 Nov 2021 00:27:44 +0000 Subject: [PATCH 271/293] [lldb] Move generic DWARFASTParser code out of Clang-specific code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Luís Ferreira --- .../Plugins/SymbolFile/DWARF/CMakeLists.txt | 1 + .../SymbolFile/DWARF/DWARFASTParser.cpp | 99 +++++++++++++++++++ .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 93 ----------------- 3 files changed, 100 insertions(+), 93 deletions(-) create mode 100644 lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp diff --git a/lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt b/lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt index 550404314af5..71fc85012733 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt +++ b/lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt @@ -11,6 +11,7 @@ add_lldb_library(lldbPluginSymbolFileDWARF PLUGIN DebugNamesDWARFIndex.cpp DIERef.cpp DWARFAbbreviationDeclaration.cpp + DWARFASTParser.cpp DWARFASTParserClang.cpp DWARFAttribute.cpp DWARFBaseDIE.cpp diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp new file mode 100644 index 000000000000..aeceede161ec --- /dev/null +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp @@ -0,0 +1,99 @@ +//===-- DWARFASTParser.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "DWARFASTParser.h" +#include "DWARFAttribute.h" +#include "DWARFDIE.h" + +#include "lldb/Core/ValueObject.h" +#include "lldb/Symbol/SymbolFile.h" +#include "lldb/Target/StackFrame.h" + +using namespace lldb; +using namespace lldb_private; + +llvm::Optional +DWARFASTParser::ParseChildArrayInfo(const DWARFDIE &parent_die, + const ExecutionContext *exe_ctx) { + SymbolFile::ArrayInfo array_info; + if (!parent_die) + return llvm::None; + + for (DWARFDIE die : parent_die.children()) { + const dw_tag_t tag = die.Tag(); + if (tag != DW_TAG_subrange_type) + continue; + + DWARFAttributes attributes; + const size_t num_child_attributes = die.GetAttributes(attributes); + if (num_child_attributes > 0) { + uint64_t num_elements = 0; + uint64_t lower_bound = 0; + uint64_t upper_bound = 0; + bool upper_bound_valid = false; + uint32_t i; + for (i = 0; i < num_child_attributes; ++i) { + const dw_attr_t attr = attributes.AttributeAtIndex(i); + DWARFFormValue form_value; + if (attributes.ExtractFormValueAtIndex(i, form_value)) { + switch (attr) { + case DW_AT_name: + break; + + case DW_AT_count: + if (DWARFDIE var_die = die.GetReferencedDIE(DW_AT_count)) { + if (var_die.Tag() == DW_TAG_variable) + if (exe_ctx) { + if (auto frame = exe_ctx->GetFrameSP()) { + Status error; + lldb::VariableSP var_sp; + auto valobj_sp = frame->GetValueForVariableExpressionPath( + var_die.GetName(), eNoDynamicValues, 0, var_sp, error); + if (valobj_sp) { + num_elements = valobj_sp->GetValueAsUnsigned(0); + break; + } + } + } + } else + num_elements = form_value.Unsigned(); + break; + + case DW_AT_bit_stride: + array_info.bit_stride = form_value.Unsigned(); + break; + + case DW_AT_byte_stride: + array_info.byte_stride = form_value.Unsigned(); + break; + + case DW_AT_lower_bound: + lower_bound = form_value.Unsigned(); + break; + + case DW_AT_upper_bound: + upper_bound_valid = true; + upper_bound = form_value.Unsigned(); + break; + + default: + break; + } + } + } + + if (num_elements == 0) { + if (upper_bound_valid && upper_bound >= lower_bound) + num_elements = upper_bound - lower_bound + 1; + } + + array_info.element_orders.push_back(num_elements); + } + } + return array_info; +} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index b90f104c4d21..7612de82d59e 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -3069,99 +3069,6 @@ size_t DWARFASTParserClang::ParseChildParameters( return arg_idx; } -llvm::Optional -DWARFASTParser::ParseChildArrayInfo(const DWARFDIE &parent_die, - const ExecutionContext *exe_ctx) { - SymbolFile::ArrayInfo array_info; - if (!parent_die) - return llvm::None; - - for (DWARFDIE die : parent_die.children()) { - const dw_tag_t tag = die.Tag(); - if (tag != DW_TAG_subrange_type) - continue; - - DWARFAttributes attributes; - const size_t num_child_attributes = die.GetAttributes(attributes); - if (num_child_attributes > 0) { - uint64_t num_elements = 0; - uint64_t lower_bound = 0; - uint64_t upper_bound = 0; - bool upper_bound_valid = false; - uint32_t i; - for (i = 0; i < num_child_attributes; ++i) { - const dw_attr_t attr = attributes.AttributeAtIndex(i); - DWARFFormValue form_value; - if (attributes.ExtractFormValueAtIndex(i, form_value)) { - switch (attr) { - case DW_AT_name: - break; - - case DW_AT_count: - if (DWARFDIE var_die = die.GetReferencedDIE(DW_AT_count)) { - if (var_die.Tag() == DW_TAG_variable) - if (exe_ctx) { - if (auto frame = exe_ctx->GetFrameSP()) { - Status error; - lldb::VariableSP var_sp; - auto valobj_sp = frame->GetValueForVariableExpressionPath( - var_die.GetName(), eNoDynamicValues, 0, var_sp, - error); - if (valobj_sp) { - num_elements = valobj_sp->GetValueAsUnsigned(0); - break; - } - } - } - } else - num_elements = form_value.Unsigned(); - break; - - case DW_AT_bit_stride: - array_info.bit_stride = form_value.Unsigned(); - break; - - case DW_AT_byte_stride: - array_info.byte_stride = form_value.Unsigned(); - break; - - case DW_AT_lower_bound: - lower_bound = form_value.Unsigned(); - break; - - case DW_AT_upper_bound: - upper_bound_valid = true; - upper_bound = form_value.Unsigned(); - break; - - default: - case DW_AT_abstract_origin: - case DW_AT_accessibility: - case DW_AT_allocated: - case DW_AT_associated: - case DW_AT_data_location: - case DW_AT_declaration: - case DW_AT_description: - case DW_AT_sibling: - case DW_AT_threads_scaled: - case DW_AT_type: - case DW_AT_visibility: - break; - } - } - } - - if (num_elements == 0) { - if (upper_bound_valid && upper_bound >= lower_bound) - num_elements = upper_bound - lower_bound + 1; - } - - array_info.element_orders.push_back(num_elements); - } - } - return array_info; -} - Type *DWARFASTParserClang::GetTypeForDIE(const DWARFDIE &die) { if (die) { SymbolFileDWARF *dwarf = die.GetDWARF(); From 737913a9e26e9590a920bad5ff7ea565e8523cb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Mon, 29 Nov 2021 17:33:37 +0000 Subject: [PATCH 272/293] [lldb] Move non-clang specific method to the generic DWARF Parser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch renames DW_ACCESS_to_AccessType function and move it to the abstract DWARFASTParser, since there is no clang-specific code there. This is useful for plugins other than Clang. Signed-off-by: Luís Ferreira --- .../SymbolFile/DWARF/DWARFASTParser.cpp | 15 +++++++++++++ .../Plugins/SymbolFile/DWARF/DWARFASTParser.h | 3 +++ .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 21 ++++--------------- 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp index aeceede161ec..089565bf10db 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp @@ -97,3 +97,18 @@ DWARFASTParser::ParseChildArrayInfo(const DWARFDIE &parent_die, } return array_info; } + +AccessType +DWARFASTParser::GetAccessTypeFromDWARF(uint32_t dwarf_accessibility) { + switch (dwarf_accessibility) { + case DW_ACCESS_public: + return eAccessPublic; + case DW_ACCESS_private: + return eAccessPrivate; + case DW_ACCESS_protected: + return eAccessProtected; + default: + break; + } + return eAccessNone; +} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h index 00123a4b9216..97b0ea1874e1 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h @@ -14,6 +14,7 @@ #include "lldb/Symbol/SymbolFile.h" #include "lldb/Symbol/CompilerDecl.h" #include "lldb/Symbol/CompilerDeclContext.h" +#include "lldb/lldb-enumerations.h" class DWARFDIE; namespace lldb_private { @@ -54,6 +55,8 @@ class DWARFASTParser { static llvm::Optional ParseChildArrayInfo(const DWARFDIE &parent_die, const lldb_private::ExecutionContext *exe_ctx = nullptr); + + static lldb::AccessType GetAccessTypeFromDWARF(uint32_t dwarf_accessibility); }; #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFASTPARSER_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 7612de82d59e..248051c15f09 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -8,6 +8,7 @@ #include +#include "DWARFASTParser.h" #include "DWARFASTParserClang.h" #include "DWARFDebugInfo.h" #include "DWARFDeclContext.h" @@ -62,20 +63,6 @@ DWARFASTParserClang::DWARFASTParserClang(TypeSystemClang &ast) DWARFASTParserClang::~DWARFASTParserClang() = default; -static AccessType DW_ACCESS_to_AccessType(uint32_t dwarf_accessibility) { - switch (dwarf_accessibility) { - case DW_ACCESS_public: - return eAccessPublic; - case DW_ACCESS_private: - return eAccessPrivate; - case DW_ACCESS_protected: - return eAccessProtected; - default: - break; - } - return eAccessNone; -} - static bool DeclKindIsCXXClass(clang::Decl::Kind decl_kind) { switch (decl_kind) { case clang::Decl::CXXRecord: @@ -313,7 +300,7 @@ ParsedDWARFTypeAttributes::ParsedDWARFTypeAttributes(const DWARFDIE &die) { break; case DW_AT_accessibility: - accessibility = DW_ACCESS_to_AccessType(form_value.Unsigned()); + accessibility = DWARFASTParser::GetAccessTypeFromDWARF(form_value.Unsigned()); break; case DW_AT_artificial: @@ -1463,7 +1450,7 @@ void DWARFASTParserClang::ParseInheritance( break; case DW_AT_accessibility: - accessibility = DW_ACCESS_to_AccessType(form_value.Unsigned()); + accessibility = DWARFASTParser::GetAccessTypeFromDWARF(form_value.Unsigned()); break; case DW_AT_virtuality: @@ -2527,7 +2514,7 @@ MemberAttributes::MemberAttributes(const DWARFDIE &die, break; case DW_AT_accessibility: - accessibility = DW_ACCESS_to_AccessType(form_value.Unsigned()); + accessibility = DWARFASTParser::GetAccessTypeFromDWARF(form_value.Unsigned()); break; case DW_AT_artificial: is_artificial = form_value.Boolean(); From dda3ad783d7c6d9338026c318bf7376bc20f49e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Mon, 29 Nov 2021 23:02:37 +0000 Subject: [PATCH 273/293] [lldb][NFC] Generalize ParsedDWARFTypeAttributes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch generalize the ParsedDWARFTypeAttributes struct to be able to share with other DWARF parsers other than clang. Signed-off-by: Luís Ferreira --- .../SymbolFile/DWARF/DWARFASTParser.cpp | 139 +++++++++++++++ .../Plugins/SymbolFile/DWARF/DWARFASTParser.h | 136 ++++++++++++++ .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 167 ++---------------- .../SymbolFile/DWARF/DWARFASTParserClang.h | 35 ---- .../Plugins/SymbolFile/DWARF/DWARFAttribute.h | 3 + 5 files changed, 297 insertions(+), 183 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp index 089565bf10db..2a471b6b3601 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp @@ -9,6 +9,7 @@ #include "DWARFASTParser.h" #include "DWARFAttribute.h" #include "DWARFDIE.h" +#include "DWARFUnit.h" #include "lldb/Core/ValueObject.h" #include "lldb/Symbol/SymbolFile.h" @@ -112,3 +113,141 @@ DWARFASTParser::GetAccessTypeFromDWARF(uint32_t dwarf_accessibility) { } return eAccessNone; } + +ParsedDWARFTypeAttributes::ParsedDWARFTypeAttributes(const DWARFDIE &die) { + DWARFAttributes attributes; + size_t num_attributes = die.GetAttributes(attributes); + for (size_t i = 0; i < num_attributes; ++i) { + dw_attr_t attr = attributes.AttributeAtIndex(i); + DWARFFormValue form_value; + if (!attributes.ExtractFormValueAtIndex(i, form_value)) + continue; + switch (attr) { + case DW_AT_abstract_origin: + abstract_origin = form_value; + break; + + case DW_AT_accessibility: + accessibility = DWARFASTParser::GetAccessTypeFromDWARF(form_value.Unsigned()); + break; + + case DW_AT_artificial: + if (form_value.Boolean()) + setIsArtificial(); + break; + + case DW_AT_bit_stride: + bit_stride = form_value.Unsigned(); + break; + + case DW_AT_byte_size: + byte_size = form_value.Unsigned(); + break; + + case DW_AT_byte_stride: + byte_stride = form_value.Unsigned(); + break; + + case DW_AT_calling_convention: + calling_convention = form_value.Unsigned(); + break; + + case DW_AT_containing_type: + containing_type = form_value; + break; + + case DW_AT_decl_file: + // die.GetCU() can differ if DW_AT_specification uses DW_FORM_ref_addr. + decl.SetFile( + attributes.CompileUnitAtIndex(i)->GetFile(form_value.Unsigned())); + break; + case DW_AT_decl_line: + decl.SetLine(form_value.Unsigned()); + break; + case DW_AT_decl_column: + decl.SetColumn(form_value.Unsigned()); + break; + + case DW_AT_declaration: + if (form_value.Boolean()) + setIsForwardDeclaration(); + break; + + case DW_AT_encoding: + encoding = form_value.Unsigned(); + break; + + case DW_AT_enum_class: + if (form_value.Boolean()) + setIsScopedEnum(); + break; + + case DW_AT_explicit: + if (form_value.Boolean()) + setIsExplicit(); + break; + + case DW_AT_external: + if (form_value.Unsigned()) + setIsExternal(); + break; + + case DW_AT_inline: + if (form_value.Boolean()) + setIsInline(); + break; + + case DW_AT_linkage_name: + case DW_AT_MIPS_linkage_name: + mangled_name = form_value.AsCString(); + break; + + case DW_AT_name: + name.SetCString(form_value.AsCString()); + break; + + case DW_AT_object_pointer: + object_pointer = form_value.Reference(); + break; + + case DW_AT_signature: + signature = form_value; + break; + + case DW_AT_specification: + specification = form_value; + break; + + case DW_AT_type: + type = form_value; + break; + + case DW_AT_virtuality: + if (form_value.Unsigned()) + setIsVirtual(); + break; + + case DW_AT_APPLE_objc_complete_type: + if (form_value.Signed()) + setIsObjCCompleteType(); + break; + + case DW_AT_APPLE_objc_direct: + setIsObjCDirectCall(); + break; + + case DW_AT_APPLE_runtime_class: + class_language = (LanguageType)form_value.Signed(); + break; + + case DW_AT_GNU_vector: + if (form_value.Boolean()) + setIsVector(); + break; + case DW_AT_export_symbols: + if (form_value.Boolean()) + setIsExportsSymbols(); + break; + } + } +} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h index 97b0ea1874e1..d4df79332b56 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h @@ -9,7 +9,11 @@ #ifndef LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFASTPARSER_H #define LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFASTPARSER_H +#include "DWARFDIE.h" #include "DWARFDefines.h" +#include "DWARFFormValue.h" +#include "lldb/Core/Declaration.h" +#include "lldb/Utility/ConstString.h" #include "lldb/Core/PluginInterface.h" #include "lldb/Symbol/SymbolFile.h" #include "lldb/Symbol/CompilerDecl.h" @@ -59,4 +63,136 @@ class DWARFASTParser { static lldb::AccessType GetAccessTypeFromDWARF(uint32_t dwarf_accessibility); }; +/// Parsed form of all attributes that are relevant for type reconstruction. +/// Some attributes are relevant for all kinds of types (declaration), while +/// others are only meaningful to a specific type (is_virtual). +struct ParsedDWARFTypeAttributes { + explicit ParsedDWARFTypeAttributes(const DWARFDIE &die); + + lldb::AccessType accessibility = lldb::eAccessNone; + const char *mangled_name = nullptr; + lldb_private::ConstString name; + lldb_private::Declaration decl; + DWARFDIE object_pointer; + DWARFFormValue abstract_origin; + DWARFFormValue containing_type; + DWARFFormValue signature; + DWARFFormValue specification; + DWARFFormValue type; + lldb::LanguageType class_language = lldb::eLanguageTypeUnknown; + llvm::Optional byte_size; + size_t calling_convention = llvm::dwarf::DW_CC_normal; + uint32_t bit_stride = 0; + uint32_t byte_stride = 0; + uint32_t encoding = 0; + uint32_t attr_flags = 0; + + // clang-format off + enum DWARFAttributeFlags : unsigned { + /// Represents no attributes. + eDWARFAttributeNone = 0u, + /// Whether it is an artificially generated symbol. + eDWARFAttributeArtificial = 1u << 0, + /// Whether it has explicit property of member function. + eDWARFAttributeExplicit = 1u << 1, + /// Whether it is a forward declaration. + eDWARFAttributeForwardDecl = 1u << 2, + /// Whether it is an inlined symbol. + eDWARFAttributeInline = 1u << 3, + /// Whether it is a scoped enumeration (class enum). + eDWARFAttributeScopedEnum = 1u << 4, + /// Whether it has vector attribute. + eDWARFAttributeVector = 1u << 5, + /// Whether it has virtuality attribute. + eDWARFAttributeVirtual = 1u << 6, + /// Whether it is an external symbol. + eDWARFAttributeExternal = 1u << 7, + /// Whether it export symbols to the containing scope. + eDWARFAttributeExportSymbols = 1u << 8, + /// Whether it is an Objective-C direct call. + eDWARFAttributeObjCDirect = 1u << 9, + /// Whether it is an Objective-C complete type. + eDWARFAttributeObjCCompleteType = 1u << 10, + LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/eDWARFAttributeObjCCompleteType), + }; + // clang-format on + + void setIsArtificial() { + attr_flags |= eDWARFAttributeArtificial; + } + bool isArtificial() const { + return attr_flags & eDWARFAttributeArtificial; + } + + void setIsExplicit() { + attr_flags |= eDWARFAttributeExplicit; + } + bool isExplicit() const { + return attr_flags & eDWARFAttributeExplicit; + } + + void setIsForwardDeclaration() { + attr_flags |= eDWARFAttributeForwardDecl; + } + bool isForwardDeclaration() const { + return attr_flags & eDWARFAttributeForwardDecl; + } + + void setIsInline() { + attr_flags |= eDWARFAttributeInline; + } + bool isInline() const { + return attr_flags & eDWARFAttributeInline; + } + + void setIsScopedEnum() { + attr_flags |= eDWARFAttributeScopedEnum; + } + bool isScopedEnum() const { + return attr_flags & eDWARFAttributeScopedEnum; + } + + void setIsVector() { + attr_flags |= eDWARFAttributeVector; + } + bool isVector() const { + return attr_flags & eDWARFAttributeVector; + } + + void setIsVirtual() { + attr_flags |= eDWARFAttributeVirtual; + } + bool isVirtual() const { + return attr_flags & eDWARFAttributeVirtual; + } + + void setIsExternal() { + attr_flags |= eDWARFAttributeExternal; + } + bool isExternal() const { + return attr_flags & eDWARFAttributeExternal; + } + + void setIsExportsSymbols() { + attr_flags |= eDWARFAttributeExportSymbols; + } + bool isExportsSymbols() const { + return attr_flags & eDWARFAttributeExportSymbols; + } + + void setIsObjCDirectCall() { + attr_flags |= eDWARFAttributeObjCDirect; + } + bool isObjCDirectCall() const { + return attr_flags & eDWARFAttributeObjCDirect; + } + + void setIsObjCCompleteType() { + attr_flags |= eDWARFAttributeObjCCompleteType; + } + bool isObjCCompleteType() const { + return attr_flags & eDWARFAttributeObjCCompleteType; + } +}; + #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFASTPARSER_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 248051c15f09..bd0696da604a 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -286,135 +286,6 @@ static void PrepareContextToReceiveMembers(TypeSystemClang &ast, ForcefullyCompleteType(type); } -ParsedDWARFTypeAttributes::ParsedDWARFTypeAttributes(const DWARFDIE &die) { - DWARFAttributes attributes; - size_t num_attributes = die.GetAttributes(attributes); - for (size_t i = 0; i < num_attributes; ++i) { - dw_attr_t attr = attributes.AttributeAtIndex(i); - DWARFFormValue form_value; - if (!attributes.ExtractFormValueAtIndex(i, form_value)) - continue; - switch (attr) { - case DW_AT_abstract_origin: - abstract_origin = form_value; - break; - - case DW_AT_accessibility: - accessibility = DWARFASTParser::GetAccessTypeFromDWARF(form_value.Unsigned()); - break; - - case DW_AT_artificial: - is_artificial = form_value.Boolean(); - break; - - case DW_AT_bit_stride: - bit_stride = form_value.Unsigned(); - break; - - case DW_AT_byte_size: - byte_size = form_value.Unsigned(); - break; - - case DW_AT_byte_stride: - byte_stride = form_value.Unsigned(); - break; - - case DW_AT_calling_convention: - calling_convention = form_value.Unsigned(); - break; - - case DW_AT_containing_type: - containing_type = form_value; - break; - - case DW_AT_decl_file: - // die.GetCU() can differ if DW_AT_specification uses DW_FORM_ref_addr. - decl.SetFile( - attributes.CompileUnitAtIndex(i)->GetFile(form_value.Unsigned())); - break; - case DW_AT_decl_line: - decl.SetLine(form_value.Unsigned()); - break; - case DW_AT_decl_column: - decl.SetColumn(form_value.Unsigned()); - break; - - case DW_AT_declaration: - is_forward_declaration = form_value.Boolean(); - break; - - case DW_AT_encoding: - encoding = form_value.Unsigned(); - break; - - case DW_AT_enum_class: - is_scoped_enum = form_value.Boolean(); - break; - - case DW_AT_explicit: - is_explicit = form_value.Boolean(); - break; - - case DW_AT_external: - if (form_value.Unsigned()) - storage = clang::SC_Extern; - break; - - case DW_AT_inline: - is_inline = form_value.Boolean(); - break; - - case DW_AT_linkage_name: - case DW_AT_MIPS_linkage_name: - mangled_name = form_value.AsCString(); - break; - - case DW_AT_name: - name.SetCString(form_value.AsCString()); - break; - - case DW_AT_object_pointer: - object_pointer = form_value.Reference(); - break; - - case DW_AT_signature: - signature = form_value; - break; - - case DW_AT_specification: - specification = form_value; - break; - - case DW_AT_type: - type = form_value; - break; - - case DW_AT_virtuality: - is_virtual = form_value.Boolean(); - break; - - case DW_AT_APPLE_objc_complete_type: - is_complete_objc_class = form_value.Signed(); - break; - - case DW_AT_APPLE_objc_direct: - is_objc_direct_call = true; - break; - - case DW_AT_APPLE_runtime_class: - class_language = (LanguageType)form_value.Signed(); - break; - - case DW_AT_GNU_vector: - is_vector = form_value.Boolean(); - break; - case DW_AT_export_symbols: - exports_symbols = form_value.Boolean(); - break; - } - } -} - static std::string GetUnitName(const DWARFDIE &die) { if (DWARFUnit *unit = die.GetCU()) return unit->GetAbsolutePath().GetPath(); @@ -764,7 +635,7 @@ TypeSP DWARFASTParserClang::ParseEnum(const SymbolContext &sc, const dw_tag_t tag = die.Tag(); TypeSP type_sp; - if (attrs.is_forward_declaration) { + if (attrs.isForwardDeclaration()) { type_sp = ParseTypeFromClangModule(sc, die, log); if (type_sp) return type_sp; @@ -833,7 +704,7 @@ TypeSP DWARFASTParserClang::ParseEnum(const SymbolContext &sc, attrs.name.GetStringRef(), GetClangDeclContextContainingDIE(die, nullptr), GetOwningClangModule(die), attrs.decl, enumerator_clang_type, - attrs.is_scoped_enum); + attrs.isScopedEnum()); } else { enumerator_clang_type = m_ast.GetEnumerationIntegerType(clang_type); } @@ -1012,8 +883,8 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, clang::ObjCMethodDecl *objc_method_decl = m_ast.AddMethodToObjCObjectType( class_opaque_type, attrs.name.GetCString(), clang_type, - attrs.accessibility, attrs.is_artificial, is_variadic, - attrs.is_objc_direct_call); + attrs.accessibility, attrs.isArtificial(), is_variadic, + attrs.isObjCDirectCall()); type_handled = objc_method_decl != NULL; if (type_handled) { LinkDeclContextToDIE(objc_method_decl, die); @@ -1161,14 +1032,14 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, m_ast.AddMethodToCXXRecordType( class_opaque_type.GetOpaqueQualType(), attrs.name.GetCString(), attrs.mangled_name, - clang_type, attrs.accessibility, attrs.is_virtual, - is_static, attrs.is_inline, attrs.is_explicit, - is_attr_used, attrs.is_artificial); + clang_type, attrs.accessibility, attrs.isVirtual(), + is_static, attrs.isInline(), attrs.isExplicit(), + is_attr_used, attrs.isArtificial()); type_handled = cxx_method_decl != NULL; // Artificial methods are always handled even when we // don't create a new declaration for them. - type_handled |= attrs.is_artificial; + type_handled |= attrs.isArtificial(); if (cxx_method_decl) { LinkDeclContextToDIE(cxx_method_decl, die); @@ -1265,8 +1136,8 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, function_decl = m_ast.CreateFunctionDeclaration( ignore_containing_context ? m_ast.GetTranslationUnitDecl() : containing_decl_ctx, - GetOwningClangModule(die), name, clang_type, attrs.storage, - attrs.is_inline); + GetOwningClangModule(die), name, clang_type, attrs.isExternal() ? clang::SC_Extern : clang::SC_None, + attrs.isInline()); std::free(name_buf); if (has_template_params) { @@ -1276,7 +1147,7 @@ TypeSP DWARFASTParserClang::ParseSubroutine(const DWARFDIE &die, ignore_containing_context ? m_ast.GetTranslationUnitDecl() : containing_decl_ctx, GetOwningClangModule(die), attrs.name.GetStringRef(), clang_type, - attrs.storage, attrs.is_inline); + attrs.isExternal() ? clang::SC_Extern : clang::SC_None, attrs.isInline()); clang::FunctionTemplateDecl *func_template_decl = m_ast.CreateFunctionTemplateDecl( containing_decl_ctx, GetOwningClangModule(die), @@ -1350,7 +1221,7 @@ TypeSP DWARFASTParserClang::ParseArrayType(const DWARFDIE &die, for (auto pos = array_info->element_orders.rbegin(); pos != end; ++pos) { num_elements = *pos; clang_type = m_ast.CreateArrayType(array_element_type, num_elements, - attrs.is_vector); + attrs.isVector()); array_element_type = clang_type; array_element_bit_stride = num_elements ? array_element_bit_stride * num_elements @@ -1358,7 +1229,7 @@ TypeSP DWARFASTParserClang::ParseArrayType(const DWARFDIE &die, } } else { clang_type = - m_ast.CreateArrayType(array_element_type, 0, attrs.is_vector); + m_ast.CreateArrayType(array_element_type, 0, attrs.isVector()); } ConstString empty_name; TypeSP type_sp = std::make_shared( @@ -1613,12 +1484,12 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, // // Note that there is no DW_AT_declaration and there are no children, // and the byte size is zero. - attrs.is_forward_declaration = true; + attrs.setIsForwardDeclaration(); } if (attrs.class_language == eLanguageTypeObjC || attrs.class_language == eLanguageTypeObjC_plus_plus) { - if (!attrs.is_complete_objc_class && + if (!attrs.isObjCCompleteType() && die.Supports_DW_AT_APPLE_objc_complete_type()) { // We have a valid eSymbolTypeObjCClass class symbol whose name // matches the current objective C class that we are trying to find @@ -1659,7 +1530,7 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, } } - if (attrs.is_forward_declaration) { + if (attrs.isForwardDeclaration()) { // We have a forward declaration to a type and we need to try and // find a full declaration. We look in the current type index just in // case we have a forward declaration followed by an actual @@ -1778,7 +1649,7 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, clang_type = m_ast.CreateRecordType( decl_ctx, GetOwningClangModule(die), attrs.accessibility, attrs.name.GetCString(), tag_decl_kind, attrs.class_language, - &metadata, attrs.exports_symbols); + &metadata, attrs.isExportsSymbols()); } } @@ -1790,7 +1661,7 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, die.GetID(), dwarf, attrs.name, attrs.byte_size, nullptr, LLDB_INVALID_UID, Type::eEncodingIsUID, &attrs.decl, clang_type, Type::ResolveState::Forward, - TypePayloadClang(OptionalClangModuleID(), attrs.is_complete_objc_class)); + TypePayloadClang(OptionalClangModuleID(), attrs.isObjCCompleteType())); // Add our type to the unique type map so we don't end up creating many // copies of the same type over and over in the ASTContext for our @@ -1802,7 +1673,7 @@ DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, dwarf->GetUniqueDWARFASTTypeMap().Insert(unique_typename, *unique_ast_entry_up); - if (!attrs.is_forward_declaration) { + if (!attrs.isForwardDeclaration()) { // Always start the definition for a class type so that if the class // has child classes or types that require the class to be created // for use as their decl contexts the class will be ready to accept diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index f97c0c470ab0..61306c7285d4 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -252,39 +252,4 @@ class DWARFASTParserClang : public DWARFASTParser { lldb_private::ClangASTImporter::LayoutInfo &layout_info); }; -/// Parsed form of all attributes that are relevant for type reconstruction. -/// Some attributes are relevant for all kinds of types (declaration), while -/// others are only meaningful to a specific type (is_virtual) -struct ParsedDWARFTypeAttributes { - explicit ParsedDWARFTypeAttributes(const DWARFDIE &die); - - lldb::AccessType accessibility = lldb::eAccessNone; - bool is_artificial = false; - bool is_complete_objc_class = false; - bool is_explicit = false; - bool is_forward_declaration = false; - bool is_inline = false; - bool is_scoped_enum = false; - bool is_vector = false; - bool is_virtual = false; - bool is_objc_direct_call = false; - bool exports_symbols = false; - clang::StorageClass storage = clang::SC_None; - const char *mangled_name = nullptr; - lldb_private::ConstString name; - lldb_private::Declaration decl; - DWARFDIE object_pointer; - DWARFFormValue abstract_origin; - DWARFFormValue containing_type; - DWARFFormValue signature; - DWARFFormValue specification; - DWARFFormValue type; - lldb::LanguageType class_language = lldb::eLanguageTypeUnknown; - llvm::Optional byte_size; - size_t calling_convention = llvm::dwarf::DW_CC_normal; - uint32_t bit_stride = 0; - uint32_t byte_stride = 0; - uint32_t encoding = 0; -}; - #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFASTPARSERCLANG_H diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h index a31ee861179c..d4d9383a2ff7 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFAttribute.h @@ -9,8 +9,11 @@ #ifndef LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFATTRIBUTE_H #define LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFATTRIBUTE_H +#include "DWARFDIE.h" #include "DWARFDefines.h" #include "DWARFFormValue.h" +#include "lldb/Core/Declaration.h" +#include "lldb/Utility/ConstString.h" #include "llvm/ADT/SmallVector.h" #include From 499d8eb1a194940957a96fe8ef4f01e4f50e5ad5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Mon, 6 Dec 2021 17:12:48 +0000 Subject: [PATCH 274/293] [lldb] Move UpdateSymbolContextScopeForType MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Luís Ferreira --- .../SymbolFile/DWARF/DWARFASTParser.cpp | 36 +++++++++++++++++++ .../Plugins/SymbolFile/DWARF/DWARFASTParser.h | 8 +++++ .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 29 --------------- .../SymbolFile/DWARF/DWARFASTParserClang.h | 8 ----- .../SymbolFile/DWARF/SymbolFileDWARF.h | 1 + 5 files changed, 45 insertions(+), 37 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp index 2a471b6b3601..696102162259 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp @@ -11,7 +11,9 @@ #include "DWARFDIE.h" #include "DWARFUnit.h" +#include "lldb/Core/Module.h" #include "lldb/Core/ValueObject.h" +#include "lldb/Symbol/CompileUnit.h" #include "lldb/Symbol/SymbolFile.h" #include "lldb/Target/StackFrame.h" @@ -251,3 +253,37 @@ ParsedDWARFTypeAttributes::ParsedDWARFTypeAttributes(const DWARFDIE &die) { } } } + +TypeSP DWARFASTParser::UpdateSymbolContextScopeForType( + const SymbolContext &sc, const DWARFDIE &die, TypeSP type_sp) { + if (!type_sp) + return type_sp; + + SymbolFileDWARF *dwarf = die.GetDWARF(); + TypeList &type_list = dwarf->GetTypeList(); + DWARFDIE sc_parent_die = SymbolFileDWARF::GetParentSymbolContextDIE(die); + dw_tag_t sc_parent_tag = sc_parent_die.Tag(); + + SymbolContextScope *symbol_context_scope = nullptr; + if (sc_parent_tag == DW_TAG_compile_unit || + sc_parent_tag == DW_TAG_partial_unit) { + symbol_context_scope = sc.comp_unit; + } else if (sc.function != nullptr && sc_parent_die) { + symbol_context_scope = + sc.function->GetBlock(true).FindBlockByID(sc_parent_die.GetID()); + if (symbol_context_scope == nullptr) + symbol_context_scope = sc.function; + } else { + symbol_context_scope = sc.module_sp.get(); + } + + if (symbol_context_scope != nullptr) + type_sp->SetSymbolContextScope(symbol_context_scope); + + // We are ready to put this type into the uniqued list up at the module + // level. + type_list.Insert(type_sp); + + dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); + return type_sp; +} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h index d4df79332b56..1215e4bd7958 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h @@ -61,6 +61,14 @@ class DWARFASTParser { const lldb_private::ExecutionContext *exe_ctx = nullptr); static lldb::AccessType GetAccessTypeFromDWARF(uint32_t dwarf_accessibility); + + /// If \p type_sp is valid, calculate and set its symbol context scope, and + /// update the type list for its backing symbol file. + /// + /// Returns \p type_sp. + lldb::TypeSP + UpdateSymbolContextScopeForType(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, lldb::TypeSP type_sp); }; /// Parsed form of all attributes that are relevant for type reconstruction. diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index bd0696da604a..df27505ace54 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -1382,35 +1382,6 @@ void DWARFASTParserClang::ParseInheritance( } } -TypeSP DWARFASTParserClang::UpdateSymbolContextScopeForType( - const SymbolContext &sc, const DWARFDIE &die, TypeSP type_sp) { - if (!type_sp) - return type_sp; - - SymbolFileDWARF *dwarf = die.GetDWARF(); - DWARFDIE sc_parent_die = SymbolFileDWARF::GetParentSymbolContextDIE(die); - dw_tag_t sc_parent_tag = sc_parent_die.Tag(); - - SymbolContextScope *symbol_context_scope = nullptr; - if (sc_parent_tag == DW_TAG_compile_unit || - sc_parent_tag == DW_TAG_partial_unit) { - symbol_context_scope = sc.comp_unit; - } else if (sc.function != nullptr && sc_parent_die) { - symbol_context_scope = - sc.function->GetBlock(true).FindBlockByID(sc_parent_die.GetID()); - if (symbol_context_scope == nullptr) - symbol_context_scope = sc.function; - } else { - symbol_context_scope = sc.module_sp.get(); - } - - if (symbol_context_scope != nullptr) - type_sp->SetSymbolContextScope(symbol_context_scope); - - dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); - return type_sp; -} - TypeSP DWARFASTParserClang::ParseStructureLikeDIE(const SymbolContext &sc, const DWARFDIE &die, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index 61306c7285d4..798779093dc2 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -153,14 +153,6 @@ class DWARFASTParserClang : public DWARFASTParser { void LinkDeclToDIE(clang::Decl *decl, const DWARFDIE &die); - /// If \p type_sp is valid, calculate and set its symbol context scope, and - /// update the type list for its backing symbol file. - /// - /// Returns \p type_sp. - lldb::TypeSP - UpdateSymbolContextScopeForType(const lldb_private::SymbolContext &sc, - const DWARFDIE &die, lldb::TypeSP type_sp); - /// Follow Clang Module Skeleton CU references to find a type definition. lldb::TypeSP ParseTypeFromClangModule(const lldb_private::SymbolContext &sc, const DWARFDIE &die, diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index e81ce28cb86e..fbdd91847568 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -75,6 +75,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFile, friend class DebugMapModule; friend class DWARFCompileUnit; friend class DWARFDIE; + friend class DWARFASTParser; friend class DWARFASTParserClang; // Static Functions From aa37e3af94165395d24bfdde75b9524822052d4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Fri, 22 Oct 2021 01:25:34 +0000 Subject: [PATCH 275/293] [lldb] Add simple D language plugin MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Luís Ferreira [lldb] Add array formatting for D types Signed-off-by: Luís Ferreira --- lldb/source/Core/CMakeLists.txt | 1 + lldb/source/Plugins/Language/CMakeLists.txt | 1 + lldb/source/Plugins/Language/D/CMakeLists.txt | 13 ++ .../Language/D/DFormatterFunctions.cpp | 139 +++++++++++++++++ .../Plugins/Language/D/DFormatterFunctions.h | 28 ++++ lldb/source/Plugins/Language/D/DLanguage.cpp | 145 ++++++++++++++++++ lldb/source/Plugins/Language/D/DLanguage.h | 58 +++++++ lldb/source/Plugins/Language/D/DTypeUtils.cpp | 106 +++++++++++++ lldb/source/Plugins/Language/D/DTypeUtils.h | 29 ++++ 9 files changed, 520 insertions(+) create mode 100644 lldb/source/Plugins/Language/D/CMakeLists.txt create mode 100644 lldb/source/Plugins/Language/D/DFormatterFunctions.cpp create mode 100644 lldb/source/Plugins/Language/D/DFormatterFunctions.h create mode 100644 lldb/source/Plugins/Language/D/DLanguage.cpp create mode 100644 lldb/source/Plugins/Language/D/DLanguage.h create mode 100644 lldb/source/Plugins/Language/D/DTypeUtils.cpp create mode 100644 lldb/source/Plugins/Language/D/DTypeUtils.h diff --git a/lldb/source/Core/CMakeLists.txt b/lldb/source/Core/CMakeLists.txt index c33aabddc7d1..d707190e0e6c 100644 --- a/lldb/source/Core/CMakeLists.txt +++ b/lldb/source/Core/CMakeLists.txt @@ -83,6 +83,7 @@ add_lldb_library(lldbCore lldbTarget lldbUtility lldbPluginCPlusPlusLanguage + lldbPluginDLanguage lldbPluginObjCLanguage ${LLDB_CURSES_LIBS} diff --git a/lldb/source/Plugins/Language/CMakeLists.txt b/lldb/source/Plugins/Language/CMakeLists.txt index 7869074566d1..6a4284d190be 100644 --- a/lldb/source/Plugins/Language/CMakeLists.txt +++ b/lldb/source/Plugins/Language/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(ClangCommon) add_subdirectory(CPlusPlus) +add_subdirectory(D) add_subdirectory(ObjC) add_subdirectory(ObjCPlusPlus) diff --git a/lldb/source/Plugins/Language/D/CMakeLists.txt b/lldb/source/Plugins/Language/D/CMakeLists.txt new file mode 100644 index 000000000000..b9c33b461a64 --- /dev/null +++ b/lldb/source/Plugins/Language/D/CMakeLists.txt @@ -0,0 +1,13 @@ +add_lldb_library(lldbPluginDLanguage PLUGIN + DLanguage.cpp + DTypeUtils.cpp + DFormatterFunctions.cpp + + LINK_LIBS + lldbCore + lldbDataFormatters + lldbSymbol + lldbTarget + LINK_COMPONENTS + Support +) diff --git a/lldb/source/Plugins/Language/D/DFormatterFunctions.cpp b/lldb/source/Plugins/Language/D/DFormatterFunctions.cpp new file mode 100644 index 000000000000..c117bdd07fb1 --- /dev/null +++ b/lldb/source/Plugins/Language/D/DFormatterFunctions.cpp @@ -0,0 +1,139 @@ +//===-- DFormatterFunctions.cpp -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "DFormatterFunctions.h" +#include "DTypeUtils.h" + +#include "lldb/Utility/ConstString.h" +#include "lldb/DataFormatters/FormatClasses.h" +#include "lldb/DataFormatters/FormattersHelpers.h" +#include "lldb/DataFormatters/StringPrinter.h" + +using namespace lldb_private; +using namespace lldb_private::formatters; +using namespace lldb; + +/* DStringSliceSummaryProvider */ + +namespace { +class DSliceSyntheticFrontEnd : public SyntheticChildrenFrontEnd { +public: + DSliceSyntheticFrontEnd(ValueObject &valobj) + : SyntheticChildrenFrontEnd(valobj) { + Update(); + } + + ~DSliceSyntheticFrontEnd() override = default; + + size_t CalculateNumChildren() override { return m_len; } + + lldb::ValueObjectSP GetChildAtIndex(size_t idx) override { + if (idx < m_len) { + ValueObjectSP &cached = m_children[idx]; + if (!cached) { + StreamString idx_name; + idx_name.Printf("[%" PRIu64 "]", (uint64_t)idx); + lldb::addr_t object_at_idx = m_base_data_address; + object_at_idx += idx * m_type.GetByteSize(nullptr).getValueOr(1); + cached = CreateValueObjectFromAddress( + idx_name.GetString(), object_at_idx, + m_backend.GetExecutionContextRef(), m_type); + } + return cached; + } + return ValueObjectSP(); + } + + bool Update() override { + size_t old_count = m_len; + + if (!IsDSlice(m_backend, &m_type, &m_base_data_address, &m_len)) + return old_count == 0; + + m_children.clear(); // ptr + m_children.clear(); // length + + return old_count == m_len; + } + + bool MightHaveChildren() override { return true; } + + size_t GetIndexOfChildWithName(ConstString name) override { + return ExtractIndexFromString(name.AsCString()); + } + +private: + CompilerType m_type; + lldb::addr_t m_base_data_address; + uint64_t m_len; + std::map m_children; +}; + +} // end of anonymous namespace + +SyntheticChildrenFrontEnd * lldb_private::formatters::DSliceSyntheticFrontEndCreator( + CXXSyntheticChildren *, lldb::ValueObjectSP valobj_sp) { + if (!valobj_sp) + return nullptr; + + lldb::ProcessSP process_sp(valobj_sp->GetProcessSP()); + if (!process_sp) + return nullptr; + return new DSliceSyntheticFrontEnd(*valobj_sp); +} + +bool lldb_private::formatters::DStringSliceSummaryProvider( + ValueObject &valobj, Stream &stream, const TypeSummaryOptions &) { + DataExtractor data; + Status error; + valobj.GetData(data, error); + + if (error.Fail()) + return false; + + CompilerType Type; + addr_t AddrData; + uint64_t Len; + // is a D slice for sure, just fetching values + IsDSlice(valobj, &Type, &AddrData, &Len); + + TargetSP target_sp = valobj.GetTargetSP(); + if (!target_sp) + return false; + + StringPrinter::ReadStringAndDumpToStreamOptions options(valobj); + options.SetLocation(AddrData); + options.SetTargetSP(target_sp); + options.SetStream(&stream); + options.SetQuote('"'); + options.SetNeedsZeroTermination(false); + options.SetBinaryZeroIsTerminator(true); + options.SetSourceSize(Len); + options.SetHasSourceSize(true); + + llvm::Optional SizeOpt = Type.GetByteSize(nullptr); + uint64_t ByteSize = SizeOpt.hasValue() ? *SizeOpt : 1; + + switch (ByteSize) { + case 1: + return StringPrinter::ReadStringAndDumpToStream< + StringPrinter::StringElementType::UTF8>(options); + case 2: + options.SetSuffixToken("w"); + return StringPrinter::ReadStringAndDumpToStream< + StringPrinter::StringElementType::UTF16>(options); + case 4: + options.SetSuffixToken("d"); + return StringPrinter::ReadStringAndDumpToStream< + StringPrinter::StringElementType::UTF32>(options); + default: + stream.Printf("size for string element is not valid"); + return true; + } + return true; +} diff --git a/lldb/source/Plugins/Language/D/DFormatterFunctions.h b/lldb/source/Plugins/Language/D/DFormatterFunctions.h new file mode 100644 index 000000000000..df843e7a1c67 --- /dev/null +++ b/lldb/source/Plugins/Language/D/DFormatterFunctions.h @@ -0,0 +1,28 @@ +//===-- DFormatterFunctions.h ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_LANGUAGE_D_DFORMATTERFUNCTIONS_H +#define LLDB_SOURCE_PLUGINS_LANGUAGE_D_DFORMATTERFUNCTIONS_H + +#include "lldb/lldb-forward.h" +#include "lldb/DataFormatters/TypeSynthetic.h" + +namespace lldb_private { +namespace formatters { + +SyntheticChildrenFrontEnd * +DSliceSyntheticFrontEndCreator(CXXSyntheticChildren *, + lldb::ValueObjectSP valobj_sp); + +bool DStringSliceSummaryProvider( + ValueObject &valobj, Stream &stream, const TypeSummaryOptions &); + +} // namespace formatters +} // namespace lldb_private + +#endif // LLDB_SOURCE_PLUGINS_LANGUAGE_D_DFORMATTERFUNCTIONS_H diff --git a/lldb/source/Plugins/Language/D/DLanguage.cpp b/lldb/source/Plugins/Language/D/DLanguage.cpp new file mode 100644 index 000000000000..75dd8bc0a822 --- /dev/null +++ b/lldb/source/Plugins/Language/D/DLanguage.cpp @@ -0,0 +1,145 @@ +//===-- DLanguage.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "DLanguage.h" +#include "DTypeUtils.h" +#include "DFormatterFunctions.h" + +#include "lldb/Core/PluginManager.h" +#include "lldb/Core/Mangled.h" +#include "lldb/DataFormatters/DataVisualization.h" +#include "lldb/DataFormatters/FormattersHelpers.h" +#include "lldb/lldb-enumerations.h" + +using namespace lldb; +using namespace lldb_private; + +LLDB_PLUGIN_DEFINE(DLanguage) + +void DLanguage::Initialize() { + PluginManager::RegisterPlugin(GetPluginNameStatic(), "D Language", + CreateInstance); +} + +void DLanguage::Terminate() { + PluginManager::UnregisterPlugin(CreateInstance); +} + +bool DLanguage::IsNilReference(ValueObject &valobj) { + // not an object from D + if (valobj.GetObjectRuntimeLanguage() != eLanguageTypeD) + return false; + + // a normal pointer + if (valobj.IsPointerType()) + { + bool canReadValue; + bool isZero = valobj.GetValueAsUnsigned(0, &canReadValue) == 0; + return canReadValue && isZero; + } + + lldb::addr_t valobj_addr; + uint64_t length; + + // not a slice + if (!IsDSlice(valobj, nullptr, &valobj_addr, &length)) + return false; + + // in D an empty array == null + if (valobj_addr == 0 || length == 0) + return true; + + // non-empty array with a non-null pointer + return false; +} + +HardcodedFormatters::HardcodedSyntheticFinder +DLanguage::GetHardcodedSynthetics() { + static llvm::once_flag g_initialize; + static HardcodedFormatters::HardcodedSyntheticFinder g_formatters; + + llvm::call_once(g_initialize, []() -> void { + g_formatters.push_back([](lldb_private::ValueObject &valobj, + lldb::DynamicValueType, + FormatManager & + fmt_mgr) -> SyntheticChildren::SharedPointer { + static CXXSyntheticChildren::SharedPointer formatter_sp( + new CXXSyntheticChildren( + SyntheticChildren::Flags() + .SetCascades(true) + .SetSkipPointers(true) + .SetSkipReferences(true) + .SetNonCacheable(true), + "D array slice synthetic children", + lldb_private::formatters::DSliceSyntheticFrontEndCreator)); + + if (IsDSlice(valobj, nullptr, nullptr, nullptr) && !IsDStringSlice(valobj, nullptr, nullptr, nullptr)) + return formatter_sp; + + return nullptr; + }); + }); + + return g_formatters; +} + +HardcodedFormatters::HardcodedSummaryFinder +DLanguage::GetHardcodedSummaries() { + static llvm::once_flag g_initialize; + static HardcodedFormatters::HardcodedSummaryFinder g_formatters; + + llvm::call_once(g_initialize, []() -> void { + g_formatters.push_back( + [](lldb_private::ValueObject &valobj, lldb::DynamicValueType, + FormatManager &fmt_mgr) -> TypeSummaryImpl::SharedPointer { + static CXXFunctionSummaryFormat::SharedPointer formatter_sp( + new CXXFunctionSummaryFormat( + TypeSummaryImpl::Flags() + .SetCascades(true) + .SetSkipPointers(true) + .SetSkipReferences(true) + .SetNonCacheable(true) + .SetDontShowChildren(true) + .SetDontShowValue(true) + .SetShowMembersOneLiner(false) + .SetHideItemNames(false), + lldb_private::formatters::DStringSliceSummaryProvider, + "D string slice summary provider")); + + if (IsDStringSlice(valobj, nullptr, nullptr, nullptr)) + return formatter_sp; + + return nullptr; + }); + }); + + return g_formatters; +} + +bool DLanguage::IsSourceFile(llvm::StringRef file_path) const { + const auto suffixes = {".d", ".dd", ".di"}; + for (const auto *const suffix : suffixes) + if (file_path.endswith_insensitive(suffix)) + return true; + + // not a D source file + return false; +} + +Language *DLanguage::CreateInstance(lldb::LanguageType language) { + if (language == eLanguageTypeD) + return new DLanguage(); + + // not D + return nullptr; +} + +bool DLanguage::SymbolNameFitsToLanguage(Mangled mangled) const { + const char *mangled_name = mangled.GetMangledName().GetCString(); + return mangled_name && Mangled::GetManglingScheme(mangled_name) == Mangled::eManglingSchemeD; +} diff --git a/lldb/source/Plugins/Language/D/DLanguage.h b/lldb/source/Plugins/Language/D/DLanguage.h new file mode 100644 index 000000000000..2fd70cb09f88 --- /dev/null +++ b/lldb/source/Plugins/Language/D/DLanguage.h @@ -0,0 +1,58 @@ +//===-- DLanguage.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_LANGUAGE_D_DLANGUAGE_H +#define LLDB_SOURCE_PLUGINS_LANGUAGE_D_DLANGUAGE_H + +#include "lldb/Target/Language.h" +#include "llvm/ADT/StringRef.h" + +namespace lldb_private { + +class DLanguage : public Language { + public: + DLanguage() = default; + + ~DLanguage() override = default; + + // Static Functions + static void Initialize(); + + static void Terminate(); + + static lldb_private::Language *CreateInstance(lldb::LanguageType language); + + lldb::LanguageType GetLanguageType() const override { + return lldb::eLanguageTypeD; + } + + bool IsNilReference(ValueObject &valobj) override; + + llvm::StringRef GetNilReferenceSummaryString() override { return "null"; } + + static llvm::StringRef GetPluginNameStatic() { return "dlang"; } + + llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } + + bool IsSourceFile(llvm::StringRef file_path) const override; + + /* lldb::TypeCategoryImplSP GetFormatters() override; */ + + HardcodedFormatters::HardcodedSyntheticFinder + GetHardcodedSynthetics() override; + + HardcodedFormatters::HardcodedSummaryFinder + GetHardcodedSummaries() override; + + bool SymbolNameFitsToLanguage(Mangled mangled) const override; + +}; + +} // namespace lldb_private + +#endif // LLDB_SOURCE_PLUGINS_LANGUAGE_D_DLANGUAGE_H diff --git a/lldb/source/Plugins/Language/D/DTypeUtils.cpp b/lldb/source/Plugins/Language/D/DTypeUtils.cpp new file mode 100644 index 000000000000..cdea6f88823b --- /dev/null +++ b/lldb/source/Plugins/Language/D/DTypeUtils.cpp @@ -0,0 +1,106 @@ +//===-- DTypeUtils.cpp ------------------------------------------*---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "DTypeUtils.h" + +#include "lldb/Symbol/CompilerType.h" +#include "lldb/Utility/ConstString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" + +using namespace lldb; +using namespace llvm; + +namespace lldb_private { + +bool IsDSlice(ValueObject &Valobj, CompilerType* Type, addr_t* AddrData, + uint64_t* Len) +{ + if (Valobj.MightHaveChildren()) + { + size_t nchildren = Valobj.GetNumChildren(); + + // doesn't have exactly two childrens + if (nchildren != 2) + return false; + + ConstString ptr_name("ptr"); + ConstString length_name("length"); + + // fetch slice fields + ValueObjectSP ptr_sp = Valobj.GetChildMemberWithName(ptr_name, true); + ValueObjectSP length_sp = Valobj.GetChildMemberWithName(length_name, true); + + // doesn't have those two exact childrens + if (!ptr_sp || !length_sp) + return false; + + CompilerType PteType = ptr_sp->GetCompilerType().GetPointeeType(); + // isn't a pointer + if (!PteType.IsValid()) + return false; + + bool success; + uint64_t lenVal = length_sp->GetValueAsUnsigned(0, &success); + // can't fetch unsigned value + if (!success) + return false; + + if (Type != nullptr) + *Type = PteType; + if (AddrData != nullptr) + *AddrData = ptr_sp->GetPointerValue(); + if (Len != nullptr) + *Len = lenVal; + + return true; + } + return false; +} + +bool IsDStringSlice(ValueObject &Valobj, CompilerType* Type, addr_t* AddrData, + uint64_t* Len) +{ + CompilerType TypeRet; + bool IsSlice = IsDSlice(Valobj, &TypeRet, AddrData, Len); + if (!IsSlice) + return false; + + if (Type != nullptr) + *Type = TypeRet; + + return IsCharType(TypeRet); +} + +bool IsCharType(CompilerType Type) +{ + switch(Type.GetFormat()) + { + case eFormatUnicode8: + case eFormatUnicode16: + case eFormatUnicode32: + case eFormatChar: + case eFormatCharPrintable: + return true; + case eFormatInvalid: break; + default: + return false; + } + + // fallback to char type names if no proper enconding found + return StringSwitch(Type.GetDisplayTypeName().GetCString()) + .Cases( + "char", "char8_t", // UTF-8 + "wchar", "char16_t", // UTF-16 + "dchar", "char32_t", // UTF-32 + "wchar_t", // platform dependent + true) + .Default(false); +} + +} // namespace lldb_private diff --git a/lldb/source/Plugins/Language/D/DTypeUtils.h b/lldb/source/Plugins/Language/D/DTypeUtils.h new file mode 100644 index 000000000000..3dabac665345 --- /dev/null +++ b/lldb/source/Plugins/Language/D/DTypeUtils.h @@ -0,0 +1,29 @@ +//===-- DTypeUtils.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_LANGUAGE_D_DTYPEUTILS_H +#define LLDB_SOURCE_PLUGINS_LANGUAGE_D_DTYPEUTILS_H + +#include "lldb/lldb-forward.h" +#include "lldb/Core/ValueObject.h" + +using namespace lldb; + +namespace lldb_private { + +bool IsDSlice(ValueObject &Valobj, CompilerType* Type, addr_t* AddrData, + uint64_t* Len); + +bool IsDStringSlice(ValueObject &Valobj, CompilerType* Type, addr_t* AddrData, + uint64_t* Len); + +bool IsCharType(CompilerType Type); + +} // namespace lldb_private + +#endif // LLDB_SOURCE_PLUGINS_LANGUAGE_D_DTYPEUTILS_H From 4d8bf710b2d75757262fd0ce23cb5ecc4372b8b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Mon, 25 Oct 2021 02:31:49 +0000 Subject: [PATCH 276/293] [lldb] Add support for usage of Clang TypeSystem for D MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Luís Ferreira --- lldb/source/DataFormatters/FormatManager.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lldb/source/DataFormatters/FormatManager.cpp b/lldb/source/DataFormatters/FormatManager.cpp index 0ef5f0adc832..07bec0326f5d 100644 --- a/lldb/source/DataFormatters/FormatManager.cpp +++ b/lldb/source/DataFormatters/FormatManager.cpp @@ -575,7 +575,12 @@ FormatManager::GetCandidateLanguages(lldb::LanguageType lang_type) { case lldb::eLanguageTypeC_plus_plus_03: case lldb::eLanguageTypeC_plus_plus_11: case lldb::eLanguageTypeC_plus_plus_14: - return {lldb::eLanguageTypeC_plus_plus, lldb::eLanguageTypeObjC}; + return { + lldb::eLanguageTypeC_plus_plus, + lldb::eLanguageTypeObjC, + // FIXME: Add a proper TypeSystem for D + lldb::eLanguageTypeD + }; default: return {lang_type}; } From 8984304f743e377917ca65d44301cfe2e5508def Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Wed, 24 Nov 2021 18:21:23 +0000 Subject: [PATCH 277/293] [lldb] Add a minimal TypeSystem for D MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Luís Ferreira --- lldb/source/Plugins/TypeSystem/CMakeLists.txt | 1 + .../Plugins/TypeSystem/D/CMakeLists.txt | 15 + .../Plugins/TypeSystem/D/TypeSystemD.cpp | 539 ++++++++++++++++++ .../source/Plugins/TypeSystem/D/TypeSystemD.h | 337 +++++++++++ 4 files changed, 892 insertions(+) create mode 100644 lldb/source/Plugins/TypeSystem/D/CMakeLists.txt create mode 100644 lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp create mode 100644 lldb/source/Plugins/TypeSystem/D/TypeSystemD.h diff --git a/lldb/source/Plugins/TypeSystem/CMakeLists.txt b/lldb/source/Plugins/TypeSystem/CMakeLists.txt index 17c40aee44cc..bdd55515a5f8 100644 --- a/lldb/source/Plugins/TypeSystem/CMakeLists.txt +++ b/lldb/source/Plugins/TypeSystem/CMakeLists.txt @@ -1 +1,2 @@ add_subdirectory(Clang) +add_subdirectory(D) diff --git a/lldb/source/Plugins/TypeSystem/D/CMakeLists.txt b/lldb/source/Plugins/TypeSystem/D/CMakeLists.txt new file mode 100644 index 000000000000..9795689d7b4f --- /dev/null +++ b/lldb/source/Plugins/TypeSystem/D/CMakeLists.txt @@ -0,0 +1,15 @@ +add_lldb_library(lldbPluginTypeSystemD PLUGIN + TypeSystemD.cpp + + LINK_LIBS + lldbCore + lldbSymbol + lldbTarget + lldbUtility + lldbPluginExpressionParserClang + lldbPluginSymbolFileDWARF + lldbPluginSymbolFilePDB + lldbPluginObjCRuntime + LINK_COMPONENTS + Support +) diff --git a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp new file mode 100644 index 000000000000..123520e1024b --- /dev/null +++ b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp @@ -0,0 +1,539 @@ +#include "TypeSystemD.h" +#include "lldb/lldb-enumerations.h" + +#include "lldb/Core/Module.h" +#include "lldb/Core/PluginManager.h" +#include "lldb/Symbol/Type.h" +#include "lldb/Utility/ArchSpec.h" + +LLDB_PLUGIN_DEFINE(TypeSystemD) + +namespace lldb_private { + +// LLVM RTTI support +char TypeSystemD::ID; + +TypeSystemD::TypeSystemD() = default; + +TypeSystemD::~TypeSystemD() { Finalize(); } +void TypeSystemD::Finalize() {} + +void TypeSystemD::Initialize() { + PluginManager::RegisterPlugin( + GetPluginNameStatic(), "D base AST context plug-in", CreateInstance, + GetSupportedLanguages(), GetSupportedLanguages()); +} + +void TypeSystemD::Terminate() { + PluginManager::UnregisterPlugin(CreateInstance); +} + +LanguageSet TypeSystemD::GetSupportedLanguages() { + LanguageSet languages; + languages.Insert(lldb::eLanguageTypeD); + return languages; +} + +lldb::LanguageType +TypeSystemD::GetMinimumLanguage(lldb::opaque_compiler_type_t /*unused*/) { + return lldb::eLanguageTypeD; +} + +bool TypeSystemD::SupportsLanguage(lldb::LanguageType language) { + return language == lldb::eLanguageTypeD; +} + +lldb::TypeSystemSP TypeSystemD::CreateInstance(lldb::LanguageType language, + Module *module, Target *target) { + if (language != lldb::eLanguageTypeD) + return lldb::TypeSystemSP(); + + ArchSpec arch; + if (module) + arch = module->GetArchitecture(); + else if (target) + arch = target->GetArchitecture(); + + if (!arch.IsValid()) + return lldb::TypeSystemSP(); + + if (module) + return std::shared_ptr(new TypeSystemD); + + return lldb::TypeSystemSP(); +} + +#ifndef NDEBUG +LLVM_DUMP_METHOD void +TypeSystemD::dump(lldb::opaque_compiler_type_t type) const {} + +bool TypeSystemD::Verify(lldb::opaque_compiler_type_t type) { return true; } +#endif + +void TypeSystemD::Dump(llvm::raw_ostream &output) {} + +void TypeSystemD::DumpValue(lldb::opaque_compiler_type_t type, + ExecutionContext *exe_ctx, Stream *s, + lldb::Format format, const DataExtractor &data, + lldb::offset_t data_offset, size_t data_byte_size, + uint32_t bitfield_bit_size, + uint32_t bitfield_bit_offset, bool show_types, + bool show_summary, bool verbose, uint32_t depth) {} + +bool TypeSystemD::DumpTypeValue(lldb::opaque_compiler_type_t type, Stream *s, + lldb::Format format, const DataExtractor &data, + lldb::offset_t data_offset, + size_t data_byte_size, + uint32_t bitfield_bit_size, + uint32_t bitfield_bit_offset, + ExecutionContextScope *exe_scope) { + return false; +} + +void TypeSystemD::DumpSummary(lldb::opaque_compiler_type_t type, + ExecutionContext *exe_ctx, Stream *s, + const DataExtractor &data, + lldb::offset_t data_offset, + size_t data_byte_size) {} + +void TypeSystemD::DumpTypeDescription(lldb::opaque_compiler_type_t type, + lldb::DescriptionLevel level) {} + +void TypeSystemD::DumpTypeDescription(lldb::opaque_compiler_type_t type, + Stream *s, lldb::DescriptionLevel level) { +} + +bool TypeSystemD::IsVectorType(lldb::opaque_compiler_type_t type, + CompilerType *element_type, uint64_t *size) { + return false; +} + +uint32_t TypeSystemD::IsHomogeneousAggregate(lldb::opaque_compiler_type_t type, + CompilerType *base_type_ptr) { + return 0; +} + +bool TypeSystemD::IsReferenceType(lldb::opaque_compiler_type_t type, + CompilerType *pointee_type, bool *is_rvalue) { + return false; +} + +bool TypeSystemD::IsPossibleDynamicType( + lldb::opaque_compiler_type_t type, + CompilerType *target_type, // Can pass nullptr + bool check_cplusplus, bool check_objc) { + return false; +} + +bool TypeSystemD::IsRuntimeGeneratedType(lldb::opaque_compiler_type_t type) { + return false; +} + +bool TypeSystemD::IsPointerType(lldb::opaque_compiler_type_t type, + CompilerType *pointee_type) { + return false; +} + +bool TypeSystemD::IsPointerOrReferenceType(lldb::opaque_compiler_type_t type, + CompilerType *pointee_type) { + return false; +} +bool TypeSystemD::IsArrayType(lldb::opaque_compiler_type_t type, + CompilerType *element_type, uint64_t *size, + bool *is_incomplete) { + return false; +} + +bool TypeSystemD::IsAggregateType(lldb::opaque_compiler_type_t type) { + return false; +} + +bool TypeSystemD::IsAnonymousType(lldb::opaque_compiler_type_t type) { + return false; +} + +bool TypeSystemD::IsBeingDefined(lldb::opaque_compiler_type_t type) { + return false; +} + +bool TypeSystemD::IsCharType(lldb::opaque_compiler_type_t type) { + return false; +} + +bool TypeSystemD::IsCompleteType(lldb::opaque_compiler_type_t type) { + if (!type) + return false; + return true; +} + +bool TypeSystemD::IsConst(lldb::opaque_compiler_type_t type) { return false; } + +bool TypeSystemD::IsCStringType(lldb::opaque_compiler_type_t type, + uint32_t &length) { + return false; +} + +bool TypeSystemD::IsTypedefType(lldb::opaque_compiler_type_t type) { + return false; +} + +bool TypeSystemD::IsPolymorphicClass(lldb::opaque_compiler_type_t type) { + return false; +} + +bool TypeSystemD::IsFunctionPointerType(lldb::opaque_compiler_type_t type) { + return false; +} + +bool TypeSystemD::IsBlockPointerType(lldb::opaque_compiler_type_t type, + CompilerType *function_pointer_type_ptr) { + return false; +} + +bool TypeSystemD::IsIntegerType(lldb::opaque_compiler_type_t type, + bool &is_signed) { + return false; +} + +bool TypeSystemD::IsEnumerationType(lldb::opaque_compiler_type_t type, + bool &is_signed) { + return false; +} + +bool TypeSystemD::IsScopedEnumerationType(lldb::opaque_compiler_type_t type) { + return false; +} + +uint32_t IsHomogeneousAggregate(lldb::opaque_compiler_type_t type, + CompilerType *base_type_ptr) { + return 0; +} + +bool TypeSystemD::IsScalarType(lldb::opaque_compiler_type_t type) { + return false; +} + +bool TypeSystemD::IsFunctionType(lldb::opaque_compiler_type_t type) { + return false; +} + +bool TypeSystemD::IsVoidType(lldb::opaque_compiler_type_t type) { + return false; +} + +bool TypeSystemD::IsDefined(lldb::opaque_compiler_type_t type) { return false; } + +bool TypeSystemD::IsFloatingPointType(lldb::opaque_compiler_type_t type, + uint32_t &count, bool &is_complex) { + return false; +} + +bool TypeSystemD::CanPassInRegisters(const CompilerType &type) { return false; } + +void TypeSystemD::ForEachEnumerator( + lldb::opaque_compiler_type_t type, + std::function const &callback) {} + +CompilerType TypeSystemD::GetTypedefedType(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +CompilerType +TypeSystemD::GetBuiltinTypeForEncodingAndBitSize(lldb::Encoding encoding, + size_t bit_size) { + return CompilerType(); +} + +CompilerType TypeSystemD::GetBasicTypeFromAST(lldb::BasicType basic_type) { + return CompilerType(); +} + +llvm::Optional +TypeSystemD::GetBitSize(lldb::opaque_compiler_type_t type, + ExecutionContextScope *exe_scope) { + return llvm::None; +} + +lldb::Encoding TypeSystemD::GetEncoding(lldb::opaque_compiler_type_t type, + uint64_t &count) { + return lldb::eEncodingInvalid; +} + +lldb::Format TypeSystemD::GetFormat(lldb::opaque_compiler_type_t type) { + return lldb::eFormatDefault; +} + +lldb::TypeClass TypeSystemD::GetTypeClass(lldb::opaque_compiler_type_t type) { + return lldb::eTypeClassInvalid; +} + +unsigned TypeSystemD::GetTypeQualifiers(lldb::opaque_compiler_type_t type) { + return 0; +} + +CompilerType TypeSystemD::GetTypeForDecl(void *opaque_decl) { + return CompilerType(); +} + +CompilerType TypeSystemD::GetChildCompilerTypeAtIndex( + lldb::opaque_compiler_type_t type, ExecutionContext *exe_ctx, size_t idx, + bool transparent_pointers, bool omit_empty_base_classes, + bool ignore_array_bounds, std::string &child_name, + uint32_t &child_byte_size, int32_t &child_byte_offset, + uint32_t &child_bitfield_bit_size, uint32_t &child_bitfield_bit_offset, + bool &child_is_base_class, bool &child_is_deref_of_parent, + ValueObject *valobj, uint64_t &language_flags) { + return CompilerType(); +} + +uint32_t TypeSystemD::GetNumFields(lldb::opaque_compiler_type_t type) { + return 0; +} + +CompilerType TypeSystemD::GetFieldAtIndex(lldb::opaque_compiler_type_t type, + size_t idx, std::string &name, + uint64_t *bit_offset_ptr, + uint32_t *bitfield_bit_size_ptr, + bool *is_bitfield_ptr) { + return CompilerType(); +} + +uint32_t +TypeSystemD::GetNumDirectBaseClasses(lldb::opaque_compiler_type_t type) { + return 0; +} + +uint32_t +TypeSystemD::GetNumVirtualBaseClasses(lldb::opaque_compiler_type_t type) { + return 0; +} + +CompilerType +TypeSystemD::GetDirectBaseClassAtIndex(lldb::opaque_compiler_type_t type, + size_t idx, uint32_t *bit_offset_ptr) { + return CompilerType(); +} + +CompilerType +TypeSystemD::GetVirtualBaseClassAtIndex(lldb::opaque_compiler_type_t type, + size_t idx, uint32_t *bit_offset_ptr) { + return CompilerType(); +} + +uint32_t TypeSystemD::GetIndexOfChildWithName(lldb::opaque_compiler_type_t type, + const char *name, + bool omit_empty_base_classes) { + return 0; +} + +size_t TypeSystemD::GetIndexOfChildMemberWithName( + lldb::opaque_compiler_type_t type, const char *name, + bool omit_empty_base_classes, std::vector &child_indexes) { + return 0; +} + +size_t TypeSystemD::GetNumTemplateArguments(lldb::opaque_compiler_type_t type) { + return 0; +} + +lldb::TemplateArgumentKind +TypeSystemD::GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, + size_t idx) { + return lldb::eTemplateArgumentKindNull; +} +CompilerType +TypeSystemD::GetTypeTemplateArgument(lldb::opaque_compiler_type_t type, + size_t idx) { + return CompilerType(); +} +llvm::Optional +TypeSystemD::GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, + size_t idx) { + return llvm::None; +} + +CompilerType TypeSystemD::GetTypeForFormatters(void *type) { + return CompilerType(); +} + +llvm::Optional +TypeSystemD::GetTypeBitAlign(lldb::opaque_compiler_type_t type, + ExecutionContextScope *exe_scope) { + return llvm::None; +} + +uint32_t TypeSystemD::GetNumChildren(lldb::opaque_compiler_type_t type, + bool omit_empty_base_classes, + const ExecutionContext *exe_ctx) { + return 0; +} + +CompilerType TypeSystemD::GetBuiltinTypeByName(ConstString name) { + return CompilerType(); +} + +lldb::BasicType +TypeSystemD::GetBasicTypeEnumeration(lldb::opaque_compiler_type_t type) { + return lldb::eBasicTypeInvalid; +} + +CompilerType +TypeSystemD::GetArrayElementType(lldb::opaque_compiler_type_t type, + ExecutionContextScope *exe_scope) { + return CompilerType(); +} + +CompilerType TypeSystemD::GetArrayType(lldb::opaque_compiler_type_t type, + uint64_t size) { + return CompilerType(); +} + +CompilerType TypeSystemD::GetCanonicalType(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +CompilerType +TypeSystemD::GetFullyUnqualifiedType(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +CompilerType +TypeSystemD::GetEnumerationIntegerType(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +// Returns -1 if this isn't a function of if the function doesn't have a +// prototype Returns a value >= 0 if there is a prototype. +int TypeSystemD::GetFunctionArgumentCount(lldb::opaque_compiler_type_t type) { + return -1; +} + +CompilerType +TypeSystemD::GetFunctionArgumentTypeAtIndex(lldb::opaque_compiler_type_t type, + size_t idx) { + return CompilerType(); +} + +CompilerType +TypeSystemD::GetFunctionReturnType(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +size_t TypeSystemD::GetNumMemberFunctions(lldb::opaque_compiler_type_t type) { + return 0; +} + +TypeMemberFunctionImpl +TypeSystemD::GetMemberFunctionAtIndex(lldb::opaque_compiler_type_t type, + size_t idx) { + return TypeMemberFunctionImpl(); +} + +CompilerType +TypeSystemD::GetNonReferenceType(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +CompilerType TypeSystemD::GetPointeeType(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +CompilerType TypeSystemD::GetPointerType(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +CompilerType +TypeSystemD::GetLValueReferenceType(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +CompilerType +TypeSystemD::GetRValueReferenceType(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +CompilerType TypeSystemD::GetAtomicType(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +CompilerType TypeSystemD::AddConstModifier(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +CompilerType +TypeSystemD::AddVolatileModifier(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +CompilerType +TypeSystemD::AddRestrictModifier(lldb::opaque_compiler_type_t type) { + return CompilerType(); +} + +const llvm::fltSemantics &TypeSystemD::GetFloatTypeSemantics(size_t byte_size) { + return llvm::APFloatBase::Bogus(); +} + +bool TypeSystemD::GetCompleteType(lldb::opaque_compiler_type_t type) { + return false; +} + +ConstString TypeSystemD::GetTypeName(lldb::opaque_compiler_type_t type) { + return ConstString(); +} + +ConstString TypeSystemD::GetDisplayTypeName(lldb::opaque_compiler_type_t type) { + return ConstString(); +} + +uint32_t +TypeSystemD::GetTypeInfo(lldb::opaque_compiler_type_t type, + CompilerType *pointee_or_element_compiler_type) { + return 0; +} + +uint32_t TypeSystemD::GetPointerByteSize() { return 0; } + +size_t +TypeSystemD::GetNumberOfFunctionArguments(lldb::opaque_compiler_type_t type) { + return 0; +} + +CompilerType +TypeSystemD::GetFunctionArgumentAtIndex(lldb::opaque_compiler_type_t type, + const size_t index) { + return CompilerType(); +} + +ConstString TypeSystemD::DeclGetName(void *opaque_decl) { + return ConstString(); +} + +std::vector +TypeSystemD::DeclContextFindDeclByName(void *opaque_decl_ctx, ConstString name, + const bool ignore_using_decls) { + return std::vector(); +} + +ConstString TypeSystemD::DeclContextGetName(void *opaque_decl_ctx) { + return ConstString(); +} + +ConstString +TypeSystemD::DeclContextGetScopeQualifiedName(void *opaque_decl_ctx) { + return ConstString(); +} + +bool TypeSystemD::DeclContextIsClassMethod( + void *opaque_decl_ctx, lldb::LanguageType *language_ptr, + bool *is_instance_method_ptr, ConstString *language_object_name_ptr) { + return false; +} + +bool TypeSystemD::DeclContextIsContainedInLookup(void *opaque_decl_ctx, + void *other_opaque_decl_ctx) { + return false; +} + +} // namespace lldb_private diff --git a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h new file mode 100644 index 000000000000..479d641a9f62 --- /dev/null +++ b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h @@ -0,0 +1,337 @@ +//===-- TypeSystemD.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_TYPESYSTEM_D_TYPESYSTEMD_H +#define LLDB_SOURCE_PLUGINS_TYPESYSTEM_D_TYPESYSTEMD_H + +#include "lldb/Core/Module.h" +#include "lldb/Symbol/TypeSystem.h" +#include "lldb/Target/Target.h" +#include "lldb/lldb-enumerations.h" + +namespace lldb_private { + +class TypeSystemD : public TypeSystem { + // LLVM RTTI support + static char ID; + +public: + // llvm casting support + bool isA(const void *ClassID) const override { return ClassID == &ID; } + static bool classof(const TypeSystem *ts) { return ts->isA(&ID); } + + explicit TypeSystemD(); + ~TypeSystemD() override; + + void Finalize() override; + + static lldb::TypeSystemSP CreateInstance(lldb::LanguageType language, + Module *module, Target *target); + + // PluginInterface functions + llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } + + static llvm::StringRef GetPluginNameStatic() { return "dlang"; } + + static LanguageSet GetSupportedLanguages(); + + static void Initialize(); + + static void Terminate(); + + // Dumping types +#ifndef NDEBUG + LLVM_DUMP_METHOD void dump(lldb::opaque_compiler_type_t type) const override; + + bool Verify(lldb::opaque_compiler_type_t type) override; +#endif + + /// \see lldb_private::TypeSystem::Dump + void Dump(llvm::raw_ostream &output) override; + + void DumpValue(lldb::opaque_compiler_type_t type, ExecutionContext *exe_ctx, + Stream *s, lldb::Format format, const DataExtractor &data, + lldb::offset_t data_offset, size_t data_byte_size, + uint32_t bitfield_bit_size, uint32_t bitfield_bit_offset, + bool show_types, bool show_summary, bool verbose, + uint32_t depth) override; + + bool DumpTypeValue(lldb::opaque_compiler_type_t type, Stream *s, + lldb::Format format, const DataExtractor &data, + lldb::offset_t data_offset, size_t data_byte_size, + uint32_t bitfield_bit_size, uint32_t bitfield_bit_offset, + ExecutionContextScope *exe_scope) override; + + void DumpSummary(lldb::opaque_compiler_type_t type, ExecutionContext *exe_ctx, + Stream *s, const DataExtractor &data, + lldb::offset_t data_offset, size_t data_byte_size) override; + + void DumpTypeDescription( + lldb::opaque_compiler_type_t type, + lldb::DescriptionLevel level = lldb::eDescriptionLevelFull) override; + + void DumpTypeDescription( + lldb::opaque_compiler_type_t type, Stream *s, + lldb::DescriptionLevel level = lldb::eDescriptionLevelFull) override; + + ConstString DeclGetName(void *opaque_decl) override; + + std::vector + DeclContextFindDeclByName(void *opaque_decl_ctx, ConstString name, + const bool ignore_using_decls) override; + + ConstString DeclContextGetName(void *opaque_decl_ctx) override; + + ConstString DeclContextGetScopeQualifiedName(void *opaque_decl_ctx) override; + + bool DeclContextIsClassMethod(void *opaque_decl_ctx, + lldb::LanguageType *language_ptr, + bool *is_instance_method_ptr, + ConstString *language_object_name_ptr) override; + + bool DeclContextIsContainedInLookup(void *opaque_decl_ctx, + void *other_opaque_decl_ctx) override; + + bool IsPossibleDynamicType(lldb::opaque_compiler_type_t type, + CompilerType *target_type, // Can pass nullptr + bool check_cplusplus, bool check_objc) override; + + bool IsRuntimeGeneratedType(lldb::opaque_compiler_type_t type) override; + + bool IsPointerType(lldb::opaque_compiler_type_t type, + CompilerType *pointee_type) override; + + bool IsPointerOrReferenceType(lldb::opaque_compiler_type_t type, + CompilerType *pointee_type) override; + + bool IsReferenceType(lldb::opaque_compiler_type_t type, + CompilerType *pointee_type, bool *is_rvalue) override; + + bool IsArrayType(lldb::opaque_compiler_type_t type, + CompilerType *element_type, uint64_t *size, + bool *is_incomplete) override; + + bool IsVectorType(lldb::opaque_compiler_type_t type, + CompilerType *element_type, uint64_t *size) override; + + bool IsAggregateType(lldb::opaque_compiler_type_t type) override; + + bool IsAnonymousType(lldb::opaque_compiler_type_t type) override; + + bool IsBeingDefined(lldb::opaque_compiler_type_t type) override; + + bool IsCharType(lldb::opaque_compiler_type_t type) override; + + bool IsCompleteType(lldb::opaque_compiler_type_t type) override; + + bool IsConst(lldb::opaque_compiler_type_t type) override; + + bool IsCStringType(lldb::opaque_compiler_type_t type, + uint32_t &length) override; + + bool IsTypedefType(lldb::opaque_compiler_type_t type) override; + + bool IsPolymorphicClass(lldb::opaque_compiler_type_t type) override; + + bool IsFunctionPointerType(lldb::opaque_compiler_type_t type) override; + + bool IsBlockPointerType(lldb::opaque_compiler_type_t type, + CompilerType *function_pointer_type_ptr) override; + + bool IsIntegerType(lldb::opaque_compiler_type_t type, + bool &is_signed) override; + + bool IsEnumerationType(lldb::opaque_compiler_type_t type, + bool &is_signed) override; + + bool IsScopedEnumerationType(lldb::opaque_compiler_type_t type) override; + + uint32_t IsHomogeneousAggregate(lldb::opaque_compiler_type_t type, + CompilerType *base_type_ptr) override; + + bool IsScalarType(lldb::opaque_compiler_type_t type) override; + + bool IsFunctionType(lldb::opaque_compiler_type_t type) override; + + bool IsVoidType(lldb::opaque_compiler_type_t type) override; + + bool IsDefined(lldb::opaque_compiler_type_t type) override; + + bool IsFloatingPointType(lldb::opaque_compiler_type_t type, uint32_t &count, + bool &is_complex) override; + + bool CanPassInRegisters(const CompilerType &type) override; + + bool SupportsLanguage(lldb::LanguageType language) override; + + void ForEachEnumerator( + lldb::opaque_compiler_type_t type, + std::function const &callback) override; + + CompilerType GetTypedefedType(lldb::opaque_compiler_type_t type) override; + + CompilerType GetBuiltinTypeForEncodingAndBitSize(lldb::Encoding encoding, + size_t bit_size) override; + + CompilerType GetBasicTypeFromAST(lldb::BasicType basic_type) override; + + llvm::Optional + GetBitSize(lldb::opaque_compiler_type_t type, + ExecutionContextScope *exe_scope) override; + + lldb::Encoding GetEncoding(lldb::opaque_compiler_type_t type, + uint64_t &count) override; + + lldb::Format GetFormat(lldb::opaque_compiler_type_t type) override; + + lldb::LanguageType + GetMinimumLanguage(lldb::opaque_compiler_type_t type) override; + + lldb::TypeClass GetTypeClass(lldb::opaque_compiler_type_t type) override; + + unsigned GetTypeQualifiers(lldb::opaque_compiler_type_t type) override; + + CompilerType GetTypeForDecl(void *opaque_decl) override; + + CompilerType GetChildCompilerTypeAtIndex( + lldb::opaque_compiler_type_t type, ExecutionContext *exe_ctx, size_t idx, + bool transparent_pointers, bool omit_empty_base_classes, + bool ignore_array_bounds, std::string &child_name, + uint32_t &child_byte_size, int32_t &child_byte_offset, + uint32_t &child_bitfield_bit_size, uint32_t &child_bitfield_bit_offset, + bool &child_is_base_class, bool &child_is_deref_of_parent, + ValueObject *valobj, uint64_t &language_flags) override; + + uint32_t GetNumFields(lldb::opaque_compiler_type_t type) override; + + CompilerType GetFieldAtIndex(lldb::opaque_compiler_type_t type, size_t idx, + std::string &name, uint64_t *bit_offset_ptr, + uint32_t *bitfield_bit_size_ptr, + bool *is_bitfield_ptr) override; + + uint32_t GetNumDirectBaseClasses(lldb::opaque_compiler_type_t type) override; + + uint32_t GetNumVirtualBaseClasses(lldb::opaque_compiler_type_t type) override; + + CompilerType GetDirectBaseClassAtIndex(lldb::opaque_compiler_type_t type, + size_t idx, + uint32_t *bit_offset_ptr) override; + + CompilerType GetVirtualBaseClassAtIndex(lldb::opaque_compiler_type_t type, + size_t idx, + uint32_t *bit_offset_ptr) override; + + uint32_t GetIndexOfChildWithName(lldb::opaque_compiler_type_t type, + const char *name, + bool omit_empty_base_classes) override; + + size_t + GetIndexOfChildMemberWithName(lldb::opaque_compiler_type_t type, + const char *name, bool omit_empty_base_classes, + std::vector &child_indexes) override; + + size_t GetNumTemplateArguments(lldb::opaque_compiler_type_t type) override; + + lldb::TemplateArgumentKind + GetTemplateArgumentKind(lldb::opaque_compiler_type_t type, + size_t idx) override; + CompilerType GetTypeTemplateArgument(lldb::opaque_compiler_type_t type, + size_t idx) override; + llvm::Optional + GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, + size_t idx) override; + + CompilerType GetTypeForFormatters(void *type) override; + + llvm::Optional + GetTypeBitAlign(lldb::opaque_compiler_type_t type, + ExecutionContextScope *exe_scope) override; + + uint32_t GetNumChildren(lldb::opaque_compiler_type_t type, + bool omit_empty_base_classes, + const ExecutionContext *exe_ctx) override; + + CompilerType GetBuiltinTypeByName(ConstString name) override; + + lldb::BasicType + GetBasicTypeEnumeration(lldb::opaque_compiler_type_t type) override; + + CompilerType GetArrayElementType(lldb::opaque_compiler_type_t type, + ExecutionContextScope *exe_scope) override; + + CompilerType GetArrayType(lldb::opaque_compiler_type_t type, + uint64_t size) override; + + CompilerType GetCanonicalType(lldb::opaque_compiler_type_t type) override; + + CompilerType + GetFullyUnqualifiedType(lldb::opaque_compiler_type_t type) override; + + CompilerType + GetEnumerationIntegerType(lldb::opaque_compiler_type_t type) override; + + // Returns -1 if this isn't a function of if the function doesn't have a + // prototype Returns a value >= 0 if there is a prototype. + int GetFunctionArgumentCount(lldb::opaque_compiler_type_t type) override; + + CompilerType GetFunctionArgumentTypeAtIndex(lldb::opaque_compiler_type_t type, + size_t idx) override; + + CompilerType + GetFunctionReturnType(lldb::opaque_compiler_type_t type) override; + + size_t GetNumMemberFunctions(lldb::opaque_compiler_type_t type) override; + + TypeMemberFunctionImpl + GetMemberFunctionAtIndex(lldb::opaque_compiler_type_t type, + size_t idx) override; + + CompilerType GetNonReferenceType(lldb::opaque_compiler_type_t type) override; + + CompilerType GetPointeeType(lldb::opaque_compiler_type_t type) override; + + CompilerType GetPointerType(lldb::opaque_compiler_type_t type) override; + + CompilerType + GetLValueReferenceType(lldb::opaque_compiler_type_t type) override; + + CompilerType + GetRValueReferenceType(lldb::opaque_compiler_type_t type) override; + + CompilerType GetAtomicType(lldb::opaque_compiler_type_t type) override; + + CompilerType AddConstModifier(lldb::opaque_compiler_type_t type) override; + + CompilerType AddVolatileModifier(lldb::opaque_compiler_type_t type) override; + + CompilerType AddRestrictModifier(lldb::opaque_compiler_type_t type) override; + + const llvm::fltSemantics &GetFloatTypeSemantics(size_t byte_size) override; + + bool GetCompleteType(lldb::opaque_compiler_type_t type) override; + + ConstString GetTypeName(lldb::opaque_compiler_type_t type) override; + + ConstString GetDisplayTypeName(lldb::opaque_compiler_type_t type) override; + + uint32_t GetTypeInfo(lldb::opaque_compiler_type_t type, + CompilerType *pointee_or_element_compiler_type) override; + + uint32_t GetPointerByteSize() override; + + size_t + GetNumberOfFunctionArguments(lldb::opaque_compiler_type_t type) override; + + CompilerType GetFunctionArgumentAtIndex(lldb::opaque_compiler_type_t type, + const size_t index) override; +}; + +} // namespace lldb_private + +#endif // LLDB_SOURCE_PLUGINS_TYPESYSTEM_D_TYPESYSTEMD_H From e8ef4edf6e691697d7dd5144f42a4fdeaf8fe724 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Thu, 25 Nov 2021 00:11:17 +0000 Subject: [PATCH 278/293] [lldb] Remove D from Clang TypeSystem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Luís Ferreira --- lldb/source/DataFormatters/FormatManager.cpp | 4 +++- lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp | 2 -- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lldb/source/DataFormatters/FormatManager.cpp b/lldb/source/DataFormatters/FormatManager.cpp index 07bec0326f5d..0a60f8e97707 100644 --- a/lldb/source/DataFormatters/FormatManager.cpp +++ b/lldb/source/DataFormatters/FormatManager.cpp @@ -578,7 +578,9 @@ FormatManager::GetCandidateLanguages(lldb::LanguageType lang_type) { return { lldb::eLanguageTypeC_plus_plus, lldb::eLanguageTypeObjC, - // FIXME: Add a proper TypeSystem for D + }; + case lldb::eLanguageTypeD: + return { lldb::eLanguageTypeD }; default: diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 126009c93bb7..3a3e8681c5ed 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -106,8 +106,6 @@ TypeSystemClangSupportsLanguage(lldb::LanguageType language) { // Use Clang for Rust until there is a proper language plugin for it language == eLanguageTypeRust || language == eLanguageTypeExtRenderScript || - // Use Clang for D until there is a proper language plugin for it - language == eLanguageTypeD || // Open Dylan compiler debug info is designed to be Clang-compatible language == eLanguageTypeDylan; } From eb8b391b5f7727d4d8c853199de718d691a55361 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Mon, 29 Nov 2021 00:13:58 +0000 Subject: [PATCH 279/293] [lldb] Add a minimal DWARF AST Parser for D MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Luís Ferreira --- .../Plugins/SymbolFile/DWARF/CMakeLists.txt | 1 + .../SymbolFile/DWARF/DWARFASTParserD.cpp | 59 +++++++++++++++++++ .../SymbolFile/DWARF/DWARFASTParserD.h | 56 ++++++++++++++++++ .../Plugins/TypeSystem/D/TypeSystemD.cpp | 6 ++ .../source/Plugins/TypeSystem/D/TypeSystemD.h | 7 +++ 5 files changed, 129 insertions(+) create mode 100644 lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.cpp create mode 100644 lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.h diff --git a/lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt b/lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt index 71fc85012733..04504c7f55a7 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt +++ b/lldb/source/Plugins/SymbolFile/DWARF/CMakeLists.txt @@ -13,6 +13,7 @@ add_lldb_library(lldbPluginSymbolFileDWARF PLUGIN DWARFAbbreviationDeclaration.cpp DWARFASTParser.cpp DWARFASTParserClang.cpp + DWARFASTParserD.cpp DWARFAttribute.cpp DWARFBaseDIE.cpp DWARFCompileUnit.cpp diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.cpp new file mode 100644 index 000000000000..6f20b08da13d --- /dev/null +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.cpp @@ -0,0 +1,59 @@ +//===-- DWARFASTParserD.cpp -----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "DWARFASTParserD.h" +#include "Plugins/TypeSystem/D/TypeSystemD.h" + +using namespace lldb_private; +using namespace lldb; + +DWARFASTParserD::DWARFASTParserD(TypeSystemD &ast) + : m_ast(ast) {} + +TypeSP DWARFASTParserD::ParseTypeFromDWARF(const SymbolContext &sc, + const DWARFDIE &die, + bool *type_is_new_ptr) +{ + return TypeSP(); +} + +Function * +DWARFASTParserD::ParseFunctionFromDWARF(CompileUnit &comp_unit, + const DWARFDIE &die, + const AddressRange &range) +{ + return nullptr; +} + +bool +DWARFASTParserD::CompleteTypeFromDWARF(const DWARFDIE &die, Type *type, + lldb_private::CompilerType &compiler_type) +{ + return false; +} + +CompilerDecl +DWARFASTParserD::GetDeclForUIDFromDWARF(const DWARFDIE &die) +{ + return CompilerDecl(); +} + +CompilerDeclContext +DWARFASTParserD::GetDeclContextForUIDFromDWARF(const DWARFDIE &die) +{ + return CompilerDeclContext(); +} + +CompilerDeclContext +DWARFASTParserD::GetDeclContextContainingUIDFromDWARF(const DWARFDIE &die) +{ + return CompilerDeclContext(); +} + +void DWARFASTParserD::EnsureAllDIEsInDeclContextHaveBeenParsed( + lldb_private::CompilerDeclContext decl_context) {} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.h new file mode 100644 index 000000000000..9acd8886270c --- /dev/null +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.h @@ -0,0 +1,56 @@ +//===-- DWARFASTParser.h ----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFASTPARSERD_H +#define LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFASTPARSERD_H + +#include "DWARFASTParser.h" +#include "DWARFDefines.h" +#include "lldb/Core/PluginInterface.h" +#include "lldb/Symbol/CompilerDecl.h" +#include "lldb/Symbol/CompilerDeclContext.h" +#include "lldb/Symbol/SymbolFile.h" + +namespace lldb_private { + class TypeSystemD; +} // namespace lldb_private + +class DWARFASTParserD : public DWARFASTParser { +public: + DWARFASTParserD(lldb_private::TypeSystemD &ast); + ~DWARFASTParserD() override = default; + + lldb::TypeSP ParseTypeFromDWARF(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, + bool *type_is_new_ptr) override; + + lldb_private::Function * + ParseFunctionFromDWARF(lldb_private::CompileUnit &comp_unit, + const DWARFDIE &die, + const lldb_private::AddressRange &range) override; + + bool + CompleteTypeFromDWARF(const DWARFDIE &die, lldb_private::Type *type, + lldb_private::CompilerType &compiler_type) override; + + lldb_private::CompilerDecl + GetDeclForUIDFromDWARF(const DWARFDIE &die) override; + + lldb_private::CompilerDeclContext + GetDeclContextForUIDFromDWARF(const DWARFDIE &die) override; + + lldb_private::CompilerDeclContext + GetDeclContextContainingUIDFromDWARF(const DWARFDIE &die) override; + + void EnsureAllDIEsInDeclContextHaveBeenParsed( + lldb_private::CompilerDeclContext decl_context) override; +private: + lldb_private::TypeSystemD &m_ast; +}; + +#endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_DWARFASTPARSERD_H diff --git a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp index 123520e1024b..04f5a24d0928 100644 --- a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp +++ b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp @@ -63,6 +63,12 @@ lldb::TypeSystemSP TypeSystemD::CreateInstance(lldb::LanguageType language, return lldb::TypeSystemSP(); } +DWARFASTParser *TypeSystemD::GetDWARFParser() { + if (!m_dwarf_ast_parser_up) + m_dwarf_ast_parser_up = std::make_unique(*this); + return m_dwarf_ast_parser_up.get(); +} + #ifndef NDEBUG LLVM_DUMP_METHOD void TypeSystemD::dump(lldb::opaque_compiler_type_t type) const {} diff --git a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h index 479d641a9f62..d054cef381dd 100644 --- a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h +++ b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h @@ -14,6 +14,8 @@ #include "lldb/Target/Target.h" #include "lldb/lldb-enumerations.h" +#include "Plugins/SymbolFile/DWARF/DWARFASTParserD.h" + namespace lldb_private { class TypeSystemD : public TypeSystem { @@ -44,6 +46,8 @@ class TypeSystemD : public TypeSystem { static void Terminate(); + DWARFASTParser *GetDWARFParser() override; + // Dumping types #ifndef NDEBUG LLVM_DUMP_METHOD void dump(lldb::opaque_compiler_type_t type) const override; @@ -330,6 +334,9 @@ class TypeSystemD : public TypeSystem { CompilerType GetFunctionArgumentAtIndex(lldb::opaque_compiler_type_t type, const size_t index) override; + +private: + std::unique_ptr m_dwarf_ast_parser_up; }; } // namespace lldb_private From 679ca1f751b3a3350195640b15dd3fa14b9a6bdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Thu, 2 Dec 2021 01:42:46 +0000 Subject: [PATCH 280/293] [lldb] Add support for basic types for D MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Luís Ferreira --- .../SymbolFile/DWARF/DWARFASTParserD.cpp | 92 ++++++++++++++++++- .../SymbolFile/DWARF/DWARFASTParserD.h | 4 + .../SymbolFile/DWARF/SymbolFileDWARF.h | 1 + .../Plugins/TypeSystem/D/CMakeLists.txt | 1 + lldb/source/Plugins/TypeSystem/D/DType.cpp | 13 +++ lldb/source/Plugins/TypeSystem/D/DType.h | 64 +++++++++++++ .../Plugins/TypeSystem/D/TypeSystemD.cpp | 44 ++++++++- .../source/Plugins/TypeSystem/D/TypeSystemD.h | 9 ++ 8 files changed, 223 insertions(+), 5 deletions(-) create mode 100644 lldb/source/Plugins/TypeSystem/D/DType.cpp create mode 100644 lldb/source/Plugins/TypeSystem/D/DType.h diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.cpp index 6f20b08da13d..a118bf97b11e 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.cpp @@ -7,6 +7,15 @@ //===----------------------------------------------------------------------===// #include "DWARFASTParserD.h" +#include "DWARFASTParserClang.h" +#include "DWARFDebugInfo.h" +#include "DWARFDeclContext.h" +#include "DWARFDefines.h" +#include "SymbolFileDWARF.h" +#include "SymbolFileDWARFDebugMap.h" +#include "SymbolFileDWARFDwo.h" +#include "UniqueDWARFASTType.h" + #include "Plugins/TypeSystem/D/TypeSystemD.h" using namespace lldb_private; @@ -19,7 +28,88 @@ TypeSP DWARFASTParserD::ParseTypeFromDWARF(const SymbolContext &sc, const DWARFDIE &die, bool *type_is_new_ptr) { - return TypeSP(); + if (type_is_new_ptr) + *type_is_new_ptr = false; + + if (!die) + return nullptr; + + // TODO: Add logging + + SymbolFileDWARF *dwarf = die.GetDWARF(); + + Type *type_ptr = dwarf->GetDIEToType().lookup(die.GetDIE()); + if (type_ptr == DIE_IS_BEING_PARSED) + return nullptr; + + if (type_ptr) + return type_ptr->shared_from_this(); + // Set a bit that lets us know that we are currently parsing this + dwarf->GetDIEToType()[die.GetDIE()] = DIE_IS_BEING_PARSED; + + ParsedDWARFTypeAttributes attrs(die); + + if (DWARFDIE signature_die = attrs.signature.Reference()) { + if (TypeSP type_sp = + ParseTypeFromDWARF(sc, signature_die, type_is_new_ptr)) { + dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); + return type_sp; + } + return nullptr; + } + + if (type_is_new_ptr) + *type_is_new_ptr = true; + + const dw_tag_t tag = die.Tag(); + + TypeSP type_sp; + switch(tag) { + case DW_TAG_base_type: + type_sp = ParseSimpleType(sc, die, attrs); + break; + case DW_TAG_typedef: + case DW_TAG_pointer_type: + case DW_TAG_reference_type: + case DW_TAG_rvalue_reference_type: + case DW_TAG_const_type: + case DW_TAG_restrict_type: + case DW_TAG_volatile_type: + case DW_TAG_atomic_type: + case DW_TAG_unspecified_type: + return type_sp = TypeSP(); + break; + + default: break; + } + + // TODO: Use Update Symbol function + return UpdateSymbolContextScopeForType(sc, die, type_sp); +} + +lldb::TypeSP +DWARFASTParserD::ParseSimpleType(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs) +{ + SymbolFileDWARF *dwarf = die.GetDWARF(); + CompilerType cp_type; + Type::ResolveState resolve_state = Type::ResolveState::Unresolved; + Type::EncodingDataType encoding_data_type = Type::eEncodingIsUID; + + const dw_tag_t tag = die.Tag(); + switch(tag) + { + case DW_TAG_base_type: + cp_type = m_ast.GetBuiltinTypeForDWARFEncodingAndBitSize( + attrs.encoding, attrs.byte_size.getValueOr(0) * 8); + break; + default: break; + } + return std::make_shared( + die.GetID(), dwarf, attrs.name, attrs.byte_size, nullptr, + dwarf->GetUID(attrs.type.Reference()), encoding_data_type, &attrs.decl, + cp_type, resolve_state); } Function * diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.h index 9acd8886270c..d75a2090ba2f 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.h @@ -50,6 +50,10 @@ class DWARFASTParserD : public DWARFASTParser { void EnsureAllDIEsInDeclContextHaveBeenParsed( lldb_private::CompilerDeclContext decl_context) override; private: + lldb::TypeSP ParseSimpleType(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); + lldb_private::TypeSystemD &m_ast; }; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h index fbdd91847568..80a38f77c2ca 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.h @@ -77,6 +77,7 @@ class SymbolFileDWARF : public lldb_private::SymbolFile, friend class DWARFDIE; friend class DWARFASTParser; friend class DWARFASTParserClang; + friend class DWARFASTParserD; // Static Functions static void Initialize(); diff --git a/lldb/source/Plugins/TypeSystem/D/CMakeLists.txt b/lldb/source/Plugins/TypeSystem/D/CMakeLists.txt index 9795689d7b4f..19967fb6764d 100644 --- a/lldb/source/Plugins/TypeSystem/D/CMakeLists.txt +++ b/lldb/source/Plugins/TypeSystem/D/CMakeLists.txt @@ -1,5 +1,6 @@ add_lldb_library(lldbPluginTypeSystemD PLUGIN TypeSystemD.cpp + DType.cpp LINK_LIBS lldbCore diff --git a/lldb/source/Plugins/TypeSystem/D/DType.cpp b/lldb/source/Plugins/TypeSystem/D/DType.cpp new file mode 100644 index 000000000000..efc1d566fd60 --- /dev/null +++ b/lldb/source/Plugins/TypeSystem/D/DType.cpp @@ -0,0 +1,13 @@ +//===-- DType.cpp ---------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "DType.h" + +bool DType::IsBuiltIn() const { + return m_kind & eDTypeKindBuiltin; +} diff --git a/lldb/source/Plugins/TypeSystem/D/DType.h b/lldb/source/Plugins/TypeSystem/D/DType.h new file mode 100644 index 000000000000..c2babcabff7b --- /dev/null +++ b/lldb/source/Plugins/TypeSystem/D/DType.h @@ -0,0 +1,64 @@ +//===-- DType.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_SOURCE_PLUGINS_TYPESYSTEM_D_DTYPE_H +#define LLDB_SOURCE_PLUGINS_TYPESYSTEM_D_DTYPE_H + +#include "lldb/Utility/ConstString.h" + +enum DTypeKind : uint8_t { + eDTypeKindInvalid = 0, + eDTypeKindVoid, + eDTypeKindPtr, + eDTypeKindSlice, + + eDTypeKindBuiltin = 1 << 7, // 128 + eDTypeKindBool = eDTypeKindBuiltin, + eDTypeKindByte, + eDTypeKindUByte, + eDTypeKindShort, + eDTypeKindUShort, + eDTypeKindInt, + eDTypeKindUInt, + eDTypeKindLong, + eDTypeKindULong, + eDTypeKindCent, + eDTypeKindUCent, + eDTypeKindChar, + eDTypeKindWChar, + eDTypeKindDChar, + eDTypeKindFloat, + eDTypeKindDouble, + eDTypeKindReal, + eDTypeKindIFloat, + eDTypeKindIDouble, + eDTypeKindIReal, + eDTypeKindCFloat, + eDTypeKindCDouble, + eDTypeKindCReal, + eDTypeKindMax, +}; + +using namespace lldb_private; + +class DType { +public: + DType(DTypeKind kind, const ConstString &name) : m_kind(kind), m_name(name) {} + virtual ~DType() = default; + + DTypeKind GetKind() { return m_kind; } + const ConstString &GetName() const { return m_name; } + + bool IsBuiltIn() const; + +private: + DTypeKind m_kind; + ConstString m_name; +}; + +#endif // LLDB_SOURCE_PLUGINS_TYPESYSTEM_D_DTYPE_H diff --git a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp index 04f5a24d0928..f86e3c9ff9d7 100644 --- a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp +++ b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp @@ -360,13 +360,16 @@ TypeSystemD::GetIntegralTemplateArgument(lldb::opaque_compiler_type_t type, } CompilerType TypeSystemD::GetTypeForFormatters(void *type) { - return CompilerType(); + if(!type) + return CompilerType(); + return CompilerType(this, type); } llvm::Optional TypeSystemD::GetTypeBitAlign(lldb::opaque_compiler_type_t type, ExecutionContextScope *exe_scope) { - return llvm::None; + //TODO: Support types with different alignment + return 0; } uint32_t TypeSystemD::GetNumChildren(lldb::opaque_compiler_type_t type, @@ -401,7 +404,7 @@ CompilerType TypeSystemD::GetCanonicalType(lldb::opaque_compiler_type_t type) { CompilerType TypeSystemD::GetFullyUnqualifiedType(lldb::opaque_compiler_type_t type) { - return CompilerType(); + return CompilerType(this, type); } CompilerType @@ -482,15 +485,24 @@ const llvm::fltSemantics &TypeSystemD::GetFloatTypeSemantics(size_t byte_size) { } bool TypeSystemD::GetCompleteType(lldb::opaque_compiler_type_t type) { + if (!type) + return false; + DType* dtype = static_cast(type); + if (dtype->IsBuiltIn()) + return true; + + //TODO: Implement this for other kinds return false; } ConstString TypeSystemD::GetTypeName(lldb::opaque_compiler_type_t type) { + if (type) + return static_cast(type)->GetName(); return ConstString(); } ConstString TypeSystemD::GetDisplayTypeName(lldb::opaque_compiler_type_t type) { - return ConstString(); + return GetTypeName(type); } uint32_t @@ -542,4 +554,28 @@ bool TypeSystemD::DeclContextIsContainedInLookup(void *opaque_decl_ctx, return false; } +CompilerType +TypeSystemD::CreateBaseType(DTypeKind kind, + const lldb_private::ConstString &name) { + DType *type = new DType(kind, name); + return CompilerType(this, type); +} + +CompilerType +TypeSystemD::GetBuiltinTypeForDWARFEncodingAndBitSize(uint32_t dw_ate, uint32_t bit_size) +{ + //TODO: Use bit_size + + switch(dw_ate) + { + case DW_ATE_boolean: + return CreateBaseType(eDTypeKindBool, ConstString("bool")); + default: + break; + } + + // unknown suitable builtin type + return CompilerType(); +} + } // namespace lldb_private diff --git a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h index d054cef381dd..f13b25b496ce 100644 --- a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h +++ b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h @@ -9,6 +9,8 @@ #ifndef LLDB_SOURCE_PLUGINS_TYPESYSTEM_D_TYPESYSTEMD_H #define LLDB_SOURCE_PLUGINS_TYPESYSTEM_D_TYPESYSTEMD_H +#include "DType.h" + #include "lldb/Core/Module.h" #include "lldb/Symbol/TypeSystem.h" #include "lldb/Target/Target.h" @@ -19,6 +21,8 @@ namespace lldb_private { class TypeSystemD : public TypeSystem { + friend DWARFASTParserD; + // LLVM RTTI support static char ID; @@ -336,6 +340,11 @@ class TypeSystemD : public TypeSystem { const size_t index) override; private: + CompilerType CreateBaseType(DTypeKind kind, + const lldb_private::ConstString &name); + + CompilerType GetBuiltinTypeForDWARFEncodingAndBitSize(uint32_t dw_ate, uint32_t bit_size); + std::unique_ptr m_dwarf_ast_parser_up; }; From 1a4291df36685a8402f2d5b4c22bd16e00de696e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Tue, 21 Dec 2021 02:09:25 +0000 Subject: [PATCH 281/293] [lldb] Add mapping for DType kind and lldb::Format --- lldb/source/Plugins/TypeSystem/D/DType.cpp | 31 +++++++++++++++++++ lldb/source/Plugins/TypeSystem/D/DType.h | 3 ++ .../Plugins/TypeSystem/D/TypeSystemD.cpp | 5 ++- 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/lldb/source/Plugins/TypeSystem/D/DType.cpp b/lldb/source/Plugins/TypeSystem/D/DType.cpp index efc1d566fd60..e794a5b65bea 100644 --- a/lldb/source/Plugins/TypeSystem/D/DType.cpp +++ b/lldb/source/Plugins/TypeSystem/D/DType.cpp @@ -11,3 +11,34 @@ bool DType::IsBuiltIn() const { return m_kind & eDTypeKindBuiltin; } + +lldb::Format DType::GetFormat() const { + switch (m_kind) { + case eDTypeKindBool: + return lldb::eFormatBoolean; + case eDTypeKindChar: + return lldb::eFormatUnicode8; + case eDTypeKindWChar: + return lldb::eFormatUnicode16; + case eDTypeKindDChar: + return lldb::eFormatUnicode32; + case eDTypeKindUShort: + case eDTypeKindUInt: + case eDTypeKindULong: + case eDTypeKindUCent: + return lldb::eFormatUnsigned; + case eDTypeKindShort: + case eDTypeKindInt: + case eDTypeKindLong: + case eDTypeKindCent: + return lldb::eFormatDecimal; + case eDTypeKindFloat: + case eDTypeKindDouble: + case eDTypeKindReal: + return lldb::eFormatFloat; + case eDTypeKindVoid: + return lldb::eFormatVoid; + default: + return lldb::eFormatBytes; + } +} diff --git a/lldb/source/Plugins/TypeSystem/D/DType.h b/lldb/source/Plugins/TypeSystem/D/DType.h index c2babcabff7b..85fe03485c7e 100644 --- a/lldb/source/Plugins/TypeSystem/D/DType.h +++ b/lldb/source/Plugins/TypeSystem/D/DType.h @@ -10,6 +10,7 @@ #define LLDB_SOURCE_PLUGINS_TYPESYSTEM_D_DTYPE_H #include "lldb/Utility/ConstString.h" +#include "lldb/lldb-enumerations.h" enum DTypeKind : uint8_t { eDTypeKindInvalid = 0, @@ -56,6 +57,8 @@ class DType { bool IsBuiltIn() const; + lldb::Format GetFormat() const; + private: DTypeKind m_kind; ConstString m_name; diff --git a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp index f86e3c9ff9d7..6d1c3d9193bc 100644 --- a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp +++ b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp @@ -267,7 +267,10 @@ lldb::Encoding TypeSystemD::GetEncoding(lldb::opaque_compiler_type_t type, } lldb::Format TypeSystemD::GetFormat(lldb::opaque_compiler_type_t type) { - return lldb::eFormatDefault; + if (!type) + return lldb::eFormatDefault; + + return static_cast(type)->GetFormat(); } lldb::TypeClass TypeSystemD::GetTypeClass(lldb::opaque_compiler_type_t type) { From 7bdd9e9ceb6cc2abe154c51786be8590d8a70af9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Tue, 21 Dec 2021 15:53:15 +0000 Subject: [PATCH 282/293] [lldb] Add mapping for DType kind and lldb::Encoding --- lldb/source/Plugins/TypeSystem/D/DType.cpp | 30 +++++++++++++++++++ lldb/source/Plugins/TypeSystem/D/DType.h | 1 + .../Plugins/TypeSystem/D/TypeSystemD.cpp | 7 ++++- 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/lldb/source/Plugins/TypeSystem/D/DType.cpp b/lldb/source/Plugins/TypeSystem/D/DType.cpp index e794a5b65bea..dd975bc93923 100644 --- a/lldb/source/Plugins/TypeSystem/D/DType.cpp +++ b/lldb/source/Plugins/TypeSystem/D/DType.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "DType.h" +#include "lldb/lldb-enumerations.h" bool DType::IsBuiltIn() const { return m_kind & eDTypeKindBuiltin; @@ -42,3 +43,32 @@ lldb::Format DType::GetFormat() const { return lldb::eFormatBytes; } } + +lldb::Encoding DType::GetEncoding(uint64_t &count) const { + count = 1; + switch (m_kind) { + case eDTypeKindByte: + case eDTypeKindShort: + case eDTypeKindInt: + case eDTypeKindLong: + case eDTypeKindCent: + return lldb::eEncodingSint; + case eDTypeKindBool: + case eDTypeKindUByte: + case eDTypeKindUShort: + case eDTypeKindUInt: + case eDTypeKindULong: + case eDTypeKindUCent: + case eDTypeKindChar: + case eDTypeKindWChar: + case eDTypeKindDChar: + return lldb::eEncodingUint; + case eDTypeKindFloat: + case eDTypeKindDouble: + case eDTypeKindReal: + return lldb::eEncodingIEEE754; + default: + count = 0; + return lldb::eEncodingInvalid; + } +} diff --git a/lldb/source/Plugins/TypeSystem/D/DType.h b/lldb/source/Plugins/TypeSystem/D/DType.h index 85fe03485c7e..7debb8281740 100644 --- a/lldb/source/Plugins/TypeSystem/D/DType.h +++ b/lldb/source/Plugins/TypeSystem/D/DType.h @@ -58,6 +58,7 @@ class DType { bool IsBuiltIn() const; lldb::Format GetFormat() const; + lldb::Encoding GetEncoding(uint64_t &count) const; private: DTypeKind m_kind; diff --git a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp index 6d1c3d9193bc..943f1b8036d0 100644 --- a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp +++ b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp @@ -263,7 +263,12 @@ TypeSystemD::GetBitSize(lldb::opaque_compiler_type_t type, lldb::Encoding TypeSystemD::GetEncoding(lldb::opaque_compiler_type_t type, uint64_t &count) { - return lldb::eEncodingInvalid; + if (!type) { + count = 0; + return lldb::eEncodingInvalid; + } + + return static_cast(type)->GetEncoding(count); } lldb::Format TypeSystemD::GetFormat(lldb::opaque_compiler_type_t type) { From 806ad10ef2080b0147817018f9d4259c965728f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Tue, 21 Dec 2021 16:18:56 +0000 Subject: [PATCH 283/293] [lldb] Add mapping for DType kind and lldb::BasicType --- lldb/source/Plugins/TypeSystem/D/DType.cpp | 52 +++++++++++++++++++ lldb/source/Plugins/TypeSystem/D/DType.h | 1 + .../Plugins/TypeSystem/D/TypeSystemD.cpp | 5 +- 3 files changed, 57 insertions(+), 1 deletion(-) diff --git a/lldb/source/Plugins/TypeSystem/D/DType.cpp b/lldb/source/Plugins/TypeSystem/D/DType.cpp index dd975bc93923..8db585d72d1b 100644 --- a/lldb/source/Plugins/TypeSystem/D/DType.cpp +++ b/lldb/source/Plugins/TypeSystem/D/DType.cpp @@ -72,3 +72,55 @@ lldb::Encoding DType::GetEncoding(uint64_t &count) const { return lldb::eEncodingInvalid; } } + +lldb::BasicType DType::GetBasicType() const { + if(!IsBuiltIn()) + return lldb::eBasicTypeOther; + + switch (m_kind) { + case eDTypeKindVoid: + return lldb::eBasicTypeVoid; + case eDTypeKindBool: + return lldb::eBasicTypeBool; + case eDTypeKindByte: + return lldb::eBasicTypeSignedChar; + case eDTypeKindUByte: + return lldb::eBasicTypeUnsignedChar; + case eDTypeKindShort: + return lldb::eBasicTypeShort; + case eDTypeKindUShort: + return lldb::eBasicTypeUnsignedShort; + case eDTypeKindInt: + return lldb::eBasicTypeInt; + case eDTypeKindUInt: + return lldb::eBasicTypeUnsignedInt; + case eDTypeKindLong: + return lldb::eBasicTypeLong; + case eDTypeKindULong: + return lldb::eBasicTypeUnsignedLong; + case eDTypeKindCent: + return lldb::eBasicTypeInt128; + case eDTypeKindUCent: + return lldb::eBasicTypeUnsignedInt128; + case eDTypeKindChar: + return lldb::eBasicTypeChar8; + case eDTypeKindWChar: + return lldb::eBasicTypeChar16; + case eDTypeKindDChar: + return lldb::eBasicTypeChar32; + case eDTypeKindFloat: + return lldb::eBasicTypeFloat; + case eDTypeKindDouble: + return lldb::eBasicTypeDouble; + case eDTypeKindReal: + return lldb::eBasicTypeLongDouble; + case eDTypeKindCFloat: + return lldb::eBasicTypeFloatComplex; + case eDTypeKindCDouble: + return lldb::eBasicTypeDoubleComplex; + case eDTypeKindCReal: + return lldb::eBasicTypeLongDoubleComplex; + default: + return lldb::eBasicTypeOther; + } +} diff --git a/lldb/source/Plugins/TypeSystem/D/DType.h b/lldb/source/Plugins/TypeSystem/D/DType.h index 7debb8281740..524ad984ae40 100644 --- a/lldb/source/Plugins/TypeSystem/D/DType.h +++ b/lldb/source/Plugins/TypeSystem/D/DType.h @@ -59,6 +59,7 @@ class DType { lldb::Format GetFormat() const; lldb::Encoding GetEncoding(uint64_t &count) const; + lldb::BasicType GetBasicType() const; private: DTypeKind m_kind; diff --git a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp index 943f1b8036d0..a86c5b0f5c14 100644 --- a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp +++ b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp @@ -392,7 +392,10 @@ CompilerType TypeSystemD::GetBuiltinTypeByName(ConstString name) { lldb::BasicType TypeSystemD::GetBasicTypeEnumeration(lldb::opaque_compiler_type_t type) { - return lldb::eBasicTypeInvalid; + if (!type) + return lldb::eBasicTypeInvalid; + + return static_cast(type)->GetBasicType(); } CompilerType From e429ca3da0c662df099caf4d6c971857facad6c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Thu, 23 Dec 2021 04:22:42 +0000 Subject: [PATCH 284/293] [lldb] Implement Dump for D TypeSystem --- .../Plugins/TypeSystem/D/TypeSystemD.cpp | 33 +++++++++++++++++-- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp index a86c5b0f5c14..0286c08664d4 100644 --- a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp +++ b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp @@ -1,8 +1,10 @@ #include "TypeSystemD.h" #include "lldb/lldb-enumerations.h" +#include "lldb/Core/DumpDataExtractor.h" #include "lldb/Core/Module.h" #include "lldb/Core/PluginManager.h" +#include "lldb/Core/StreamFile.h" #include "lldb/Symbol/Type.h" #include "lldb/Utility/ArchSpec.h" @@ -84,7 +86,23 @@ void TypeSystemD::DumpValue(lldb::opaque_compiler_type_t type, lldb::offset_t data_offset, size_t data_byte_size, uint32_t bitfield_bit_size, uint32_t bitfield_bit_offset, bool show_types, - bool show_summary, bool verbose, uint32_t depth) {} + bool show_summary, bool verbose, uint32_t depth) { + if (!type) + return; + DType *dtype = static_cast(type); + + switch(dtype->GetKind()) + { + default: + // We are down to a scalar type that we just need to display. + DumpDataExtractor(data, s, data_offset, format, data_byte_size, 1, + UINT32_MAX, LLDB_INVALID_ADDRESS, bitfield_bit_size, + bitfield_bit_offset); + + if (show_summary) + DumpSummary(type, exe_ctx, s, data, data_offset, data_byte_size); + } +} bool TypeSystemD::DumpTypeValue(lldb::opaque_compiler_type_t type, Stream *s, lldb::Format format, const DataExtractor &data, @@ -93,7 +111,13 @@ bool TypeSystemD::DumpTypeValue(lldb::opaque_compiler_type_t type, Stream *s, uint32_t bitfield_bit_size, uint32_t bitfield_bit_offset, ExecutionContextScope *exe_scope) { - return false; + if (!type) + return false; + + uint32_t item_count = 1; + return DumpDataExtractor(data, s, data_offset, format, data_byte_size, + item_count, UINT32_MAX, LLDB_INVALID_ADDRESS, + bitfield_bit_size, bitfield_bit_offset, exe_scope); } void TypeSystemD::DumpSummary(lldb::opaque_compiler_type_t type, @@ -103,7 +127,10 @@ void TypeSystemD::DumpSummary(lldb::opaque_compiler_type_t type, size_t data_byte_size) {} void TypeSystemD::DumpTypeDescription(lldb::opaque_compiler_type_t type, - lldb::DescriptionLevel level) {} + lldb::DescriptionLevel level) { + StreamFile s(stdout, false); + DumpTypeDescription(type, &s, level); +} void TypeSystemD::DumpTypeDescription(lldb::opaque_compiler_type_t type, Stream *s, lldb::DescriptionLevel level) { From 806fe93cf86f94dd2bbe07ef81c26d26ec448a85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Fri, 24 Dec 2021 00:40:35 +0000 Subject: [PATCH 285/293] [lldb] Add minimal type information for builtin types in D TypeSystem --- .../Plugins/TypeSystem/D/TypeSystemD.cpp | 51 ++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp index 0286c08664d4..1c2a555b2c3b 100644 --- a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp +++ b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp @@ -546,7 +546,56 @@ ConstString TypeSystemD::GetDisplayTypeName(lldb::opaque_compiler_type_t type) { uint32_t TypeSystemD::GetTypeInfo(lldb::opaque_compiler_type_t type, CompilerType *pointee_or_element_compiler_type) { - return 0; + if (!type) + return 0; + + DType* dtype = static_cast(type); + uint32_t typeinfo_flags = 0; + + if(dtype->IsBuiltIn()) + { + typeinfo_flags = lldb::eTypeIsBuiltIn | lldb::eTypeHasValue; + switch(dtype->GetKind()) + { + case eDTypeKindIFloat: + case eDTypeKindIDouble: + case eDTypeKindIReal: + case eDTypeKindCFloat: + case eDTypeKindCDouble: + case eDTypeKindCReal: + typeinfo_flags |= lldb::eTypeIsComplex; + LLVM_FALLTHROUGH; + case eDTypeKindFloat: + case eDTypeKindDouble: + case eDTypeKindReal: + typeinfo_flags |= lldb::eTypeIsFloat; + goto Lscalar; + case eDTypeKindByte: + case eDTypeKindShort: + case eDTypeKindInt: + case eDTypeKindLong: + case eDTypeKindCent: + typeinfo_flags |= lldb::eTypeIsSigned; + LLVM_FALLTHROUGH; + case eDTypeKindUByte: + case eDTypeKindUShort: + case eDTypeKindUInt: + case eDTypeKindULong: + case eDTypeKindUCent: + typeinfo_flags |= lldb::eTypeIsInteger; + LLVM_FALLTHROUGH; + case eDTypeKindBool: + case eDTypeKindChar: + case eDTypeKindWChar: + case eDTypeKindDChar: + Lscalar: + typeinfo_flags |= lldb::eTypeIsScalar; + break; + default: + break; + } + } + return typeinfo_flags; } uint32_t TypeSystemD::GetPointerByteSize() { return 0; } From 38e5f7169aeadb48b7ee00ba03ab8220f06bee8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Fri, 24 Dec 2021 20:36:39 +0000 Subject: [PATCH 286/293] [lldb] Add target triple on D TypeSystem --- .../Plugins/TypeSystem/D/TypeSystemD.cpp | 19 +++++++++++++++++-- .../source/Plugins/TypeSystem/D/TypeSystemD.h | 3 ++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp index 1c2a555b2c3b..e9d91689f27c 100644 --- a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp +++ b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp @@ -15,7 +15,7 @@ namespace lldb_private { // LLVM RTTI support char TypeSystemD::ID; -TypeSystemD::TypeSystemD() = default; +TypeSystemD::TypeSystemD(llvm::Triple target_triple) : m_target_triple(target_triple) {} TypeSystemD::~TypeSystemD() { Finalize(); } void TypeSystemD::Finalize() {} @@ -59,8 +59,23 @@ lldb::TypeSystemSP TypeSystemD::CreateInstance(lldb::LanguageType language, if (!arch.IsValid()) return lldb::TypeSystemSP(); + llvm::Triple triple = arch.GetTriple(); + // LLVM wants this to be set to iOS or MacOSX; if we're working on + // a bare-boards type image, change the triple for llvm's benefit. + if (triple.getVendor() == llvm::Triple::Apple && + triple.getOS() == llvm::Triple::UnknownOS) { + if (triple.getArch() == llvm::Triple::arm || + triple.getArch() == llvm::Triple::aarch64 || + triple.getArch() == llvm::Triple::aarch64_32 || + triple.getArch() == llvm::Triple::thumb) { + triple.setOS(llvm::Triple::IOS); + } else { + triple.setOS(llvm::Triple::MacOSX); + } + } + if (module) - return std::shared_ptr(new TypeSystemD); + return std::shared_ptr(new TypeSystemD(triple)); return lldb::TypeSystemSP(); } diff --git a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h index f13b25b496ce..0bfa7d8ef69d 100644 --- a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h +++ b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h @@ -31,7 +31,7 @@ class TypeSystemD : public TypeSystem { bool isA(const void *ClassID) const override { return ClassID == &ID; } static bool classof(const TypeSystem *ts) { return ts->isA(&ID); } - explicit TypeSystemD(); + explicit TypeSystemD(llvm::Triple); ~TypeSystemD() override; void Finalize() override; @@ -346,6 +346,7 @@ class TypeSystemD : public TypeSystem { CompilerType GetBuiltinTypeForDWARFEncodingAndBitSize(uint32_t dw_ate, uint32_t bit_size); std::unique_ptr m_dwarf_ast_parser_up; + llvm::Triple m_target_triple; }; } // namespace lldb_private From 51638b562ed99fd62d130161aa5ec4b23b4ec987 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Fri, 24 Dec 2021 00:42:16 +0000 Subject: [PATCH 287/293] [lldb] Implement GetBitSize for builtin types in D TypeSystem --- lldb/source/Plugins/TypeSystem/D/DType.cpp | 64 +++++++++++++++++++ lldb/source/Plugins/TypeSystem/D/DType.h | 5 ++ .../Plugins/TypeSystem/D/TypeSystemD.cpp | 5 +- 3 files changed, 73 insertions(+), 1 deletion(-) diff --git a/lldb/source/Plugins/TypeSystem/D/DType.cpp b/lldb/source/Plugins/TypeSystem/D/DType.cpp index 8db585d72d1b..7dd07ea2bfb8 100644 --- a/lldb/source/Plugins/TypeSystem/D/DType.cpp +++ b/lldb/source/Plugins/TypeSystem/D/DType.cpp @@ -8,6 +8,7 @@ #include "DType.h" #include "lldb/lldb-enumerations.h" +#include "llvm/ADT/Triple.h" bool DType::IsBuiltIn() const { return m_kind & eDTypeKindBuiltin; @@ -124,3 +125,66 @@ lldb::BasicType DType::GetBasicType() const { return lldb::eBasicTypeOther; } } + +static uint64_t GetRealBitSize(llvm::Triple &target_triple) { + if (target_triple.isX86() && !target_triple.isWindowsMSVCEnvironment() && !target_triple.isAndroid()) + // Assume x87 support: x87 FPU register size + return 80; + + if ((target_triple.isAArch64() && !target_triple.isOSDarwin()) || + (target_triple.isAndroid() && target_triple.getArch() == llvm::Triple::x86_64)) + // Assume 128-bit quadruple precision + return 128; + + // at least size of a double type kind + return 64; +} + +llvm::Optional DType::GetBitSize(llvm::Triple &target_triple) const { + return DType::GetBitSize(m_kind, target_triple); +} + +llvm::Optional DType::GetBitSize(DTypeKind kind, llvm::Triple &target_triple) { + switch(kind) + { + case eDTypeKindPtr: + if (target_triple.isArch64Bit()) + return 64; + if (target_triple.isArch32Bit()) + return 32; + if (target_triple.isArch16Bit()) + return 16; + + // unknown arch bit size + return llvm::None; + case eDTypeKindBool: + case eDTypeKindByte: + case eDTypeKindUByte: + case eDTypeKindChar: + return 8; + case eDTypeKindShort: + case eDTypeKindUShort: + case eDTypeKindWChar: + return 16; + case eDTypeKindInt: + case eDTypeKindUInt: + case eDTypeKindDChar: + case eDTypeKindFloat: + return 32; + case eDTypeKindLong: + case eDTypeKindULong: + case eDTypeKindDouble: + case eDTypeKindCFloat: + return 64; + case eDTypeKindCent: + case eDTypeKindUCent: + case eDTypeKindCDouble: + return 128; + case eDTypeKindReal: + return GetRealBitSize(target_triple); + case eDTypeKindCReal: + return GetRealBitSize(target_triple) * 2; + default: + return llvm::None; + } +} diff --git a/lldb/source/Plugins/TypeSystem/D/DType.h b/lldb/source/Plugins/TypeSystem/D/DType.h index 524ad984ae40..a72ee4f67ee3 100644 --- a/lldb/source/Plugins/TypeSystem/D/DType.h +++ b/lldb/source/Plugins/TypeSystem/D/DType.h @@ -11,6 +11,8 @@ #include "lldb/Utility/ConstString.h" #include "lldb/lldb-enumerations.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/Triple.h" enum DTypeKind : uint8_t { eDTypeKindInvalid = 0, @@ -61,6 +63,9 @@ class DType { lldb::Encoding GetEncoding(uint64_t &count) const; lldb::BasicType GetBasicType() const; + static llvm::Optional GetBitSize(DTypeKind kind, llvm::Triple &target_triple); + llvm::Optional GetBitSize(llvm::Triple &target_triple) const; + private: DTypeKind m_kind; ConstString m_name; diff --git a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp index e9d91689f27c..5c338183ee9b 100644 --- a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp +++ b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp @@ -300,7 +300,10 @@ CompilerType TypeSystemD::GetBasicTypeFromAST(lldb::BasicType basic_type) { llvm::Optional TypeSystemD::GetBitSize(lldb::opaque_compiler_type_t type, ExecutionContextScope *exe_scope) { - return llvm::None; + if(!GetCompleteType(type)) + return llvm::None; + + return static_cast(type)->GetBitSize(m_target_triple); } lldb::Encoding TypeSystemD::GetEncoding(lldb::opaque_compiler_type_t type, From 83aeeb66b819d79f19d6cdabcca2506a69d7dd4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Fri, 24 Dec 2021 19:43:08 +0000 Subject: [PATCH 288/293] [lldb] Add type name mapping for DType --- lldb/source/Plugins/TypeSystem/D/DType.cpp | 59 +++++++++++++++++++ lldb/source/Plugins/TypeSystem/D/DType.h | 1 + .../Plugins/TypeSystem/D/TypeSystemD.cpp | 8 ++- .../source/Plugins/TypeSystem/D/TypeSystemD.h | 1 + 4 files changed, 68 insertions(+), 1 deletion(-) diff --git a/lldb/source/Plugins/TypeSystem/D/DType.cpp b/lldb/source/Plugins/TypeSystem/D/DType.cpp index 7dd07ea2bfb8..f27641537d0d 100644 --- a/lldb/source/Plugins/TypeSystem/D/DType.cpp +++ b/lldb/source/Plugins/TypeSystem/D/DType.cpp @@ -188,3 +188,62 @@ llvm::Optional DType::GetBitSize(DTypeKind kind, llvm::Triple &target_ return llvm::None; } } + +ConstString DType::GetName(DTypeKind kind) +{ + switch(kind) + { + case eDTypeKindPtr: + return ConstString("void*"); + case eDTypeKindVoid: + return ConstString("void"); + case eDTypeKindBool: + return ConstString("bool"); + case eDTypeKindByte: + return ConstString("byte"); + case eDTypeKindUByte: + return ConstString("ubyte"); + case eDTypeKindShort: + return ConstString("short"); + case eDTypeKindUShort: + return ConstString("ushort"); + case eDTypeKindInt: + return ConstString("int"); + case eDTypeKindUInt: + return ConstString("uint"); + case eDTypeKindLong: + return ConstString("long"); + case eDTypeKindULong: + return ConstString("ulong"); + case eDTypeKindCent: + return ConstString("cent"); + case eDTypeKindUCent: + return ConstString("ucent"); + case eDTypeKindChar: + return ConstString("char"); + case eDTypeKindWChar: + return ConstString("wchar"); + case eDTypeKindDChar: + return ConstString("dchar"); + case eDTypeKindFloat: + return ConstString("float"); + case eDTypeKindDouble: + return ConstString("double"); + case eDTypeKindReal: + return ConstString("real"); + case eDTypeKindIFloat: + return ConstString("ifloat"); + case eDTypeKindIDouble: + return ConstString("idouble"); + case eDTypeKindIReal: + return ConstString("ireal"); + case eDTypeKindCFloat: + return ConstString("cfloat"); + case eDTypeKindCDouble: + return ConstString("cdouble"); + case eDTypeKindCReal: + return ConstString("creal"); + default: + return ConstString(); + } +} diff --git a/lldb/source/Plugins/TypeSystem/D/DType.h b/lldb/source/Plugins/TypeSystem/D/DType.h index a72ee4f67ee3..09b6d1d31846 100644 --- a/lldb/source/Plugins/TypeSystem/D/DType.h +++ b/lldb/source/Plugins/TypeSystem/D/DType.h @@ -56,6 +56,7 @@ class DType { DTypeKind GetKind() { return m_kind; } const ConstString &GetName() const { return m_name; } + static ConstString GetName(DTypeKind); bool IsBuiltIn() const; diff --git a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp index 5c338183ee9b..84eb06c56cc2 100644 --- a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp +++ b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp @@ -666,6 +666,12 @@ TypeSystemD::CreateBaseType(DTypeKind kind, return CompilerType(this, type); } +CompilerType +TypeSystemD::CreateBaseType(DTypeKind kind) { + DType *type = new DType(kind, DType::GetName(kind)); + return CompilerType(this, type); +} + CompilerType TypeSystemD::GetBuiltinTypeForDWARFEncodingAndBitSize(uint32_t dw_ate, uint32_t bit_size) { @@ -674,7 +680,7 @@ TypeSystemD::GetBuiltinTypeForDWARFEncodingAndBitSize(uint32_t dw_ate, uint32_t switch(dw_ate) { case DW_ATE_boolean: - return CreateBaseType(eDTypeKindBool, ConstString("bool")); + return CreateBaseType(eDTypeKindBool); default: break; } diff --git a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h index 0bfa7d8ef69d..956b4381f950 100644 --- a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h +++ b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h @@ -342,6 +342,7 @@ class TypeSystemD : public TypeSystem { private: CompilerType CreateBaseType(DTypeKind kind, const lldb_private::ConstString &name); + CompilerType CreateBaseType(DTypeKind kind); CompilerType GetBuiltinTypeForDWARFEncodingAndBitSize(uint32_t dw_ate, uint32_t bit_size); From b60e9067449a06e23c7365fcef29aacaf80109bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Thu, 30 Dec 2021 03:12:13 +0000 Subject: [PATCH 289/293] [lldb] Use bit size on DWARF encoding --- lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp index 84eb06c56cc2..3bd7542c846b 100644 --- a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp +++ b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp @@ -675,12 +675,20 @@ TypeSystemD::CreateBaseType(DTypeKind kind) { CompilerType TypeSystemD::GetBuiltinTypeForDWARFEncodingAndBitSize(uint32_t dw_ate, uint32_t bit_size) { - //TODO: Use bit_size + auto IsMatch = [&](DTypeKind kind) { + llvm::Optional BS = DType::GetBitSize(kind, m_target_triple); + if (BS.hasValue()) + return BS.getValue() == bit_size; + + return false; + }; switch(dw_ate) { case DW_ATE_boolean: - return CreateBaseType(eDTypeKindBool); + if (IsMatch(eDTypeKindBool)) + return CreateBaseType(eDTypeKindBool); + break; default: break; } From 6b4138d4efe6e8bbe7093ce24e0a040a4288ccdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Tue, 4 Jan 2022 18:40:04 +0000 Subject: [PATCH 290/293] [lldb] Add hardcoded bitsize version of real builtin type --- lldb/source/Plugins/TypeSystem/D/DType.cpp | 27 ++++++++++++++++++++++ lldb/source/Plugins/TypeSystem/D/DType.h | 6 +++++ 2 files changed, 33 insertions(+) diff --git a/lldb/source/Plugins/TypeSystem/D/DType.cpp b/lldb/source/Plugins/TypeSystem/D/DType.cpp index f27641537d0d..0ed150fb3a9b 100644 --- a/lldb/source/Plugins/TypeSystem/D/DType.cpp +++ b/lldb/source/Plugins/TypeSystem/D/DType.cpp @@ -37,6 +37,9 @@ lldb::Format DType::GetFormat() const { case eDTypeKindFloat: case eDTypeKindDouble: case eDTypeKindReal: + case eDTypeKindReal64: + case eDTypeKindReal80: + case eDTypeKindReal128: return lldb::eFormatFloat; case eDTypeKindVoid: return lldb::eFormatVoid; @@ -67,6 +70,9 @@ lldb::Encoding DType::GetEncoding(uint64_t &count) const { case eDTypeKindFloat: case eDTypeKindDouble: case eDTypeKindReal: + case eDTypeKindReal64: + case eDTypeKindReal80: + case eDTypeKindReal128: return lldb::eEncodingIEEE754; default: count = 0; @@ -114,12 +120,18 @@ lldb::BasicType DType::GetBasicType() const { case eDTypeKindDouble: return lldb::eBasicTypeDouble; case eDTypeKindReal: + case eDTypeKindReal64: + case eDTypeKindReal80: + case eDTypeKindReal128: return lldb::eBasicTypeLongDouble; case eDTypeKindCFloat: return lldb::eBasicTypeFloatComplex; case eDTypeKindCDouble: return lldb::eBasicTypeDoubleComplex; case eDTypeKindCReal: + case eDTypeKindCReal64: + case eDTypeKindCReal80: + case eDTypeKindCReal128: return lldb::eBasicTypeLongDoubleComplex; default: return lldb::eBasicTypeOther; @@ -175,11 +187,20 @@ llvm::Optional DType::GetBitSize(DTypeKind kind, llvm::Triple &target_ case eDTypeKindULong: case eDTypeKindDouble: case eDTypeKindCFloat: + case eDTypeKindReal64: return 64; + case eDTypeKindReal80: + return 80; case eDTypeKindCent: case eDTypeKindUCent: case eDTypeKindCDouble: + case eDTypeKindReal128: + case eDTypeKindCReal64: return 128; + case eDTypeKindCReal80: + return 160; + case eDTypeKindCReal128: + return 256; case eDTypeKindReal: return GetRealBitSize(target_triple); case eDTypeKindCReal: @@ -230,6 +251,9 @@ ConstString DType::GetName(DTypeKind kind) case eDTypeKindDouble: return ConstString("double"); case eDTypeKindReal: + case eDTypeKindReal64: + case eDTypeKindReal80: + case eDTypeKindReal128: return ConstString("real"); case eDTypeKindIFloat: return ConstString("ifloat"); @@ -242,6 +266,9 @@ ConstString DType::GetName(DTypeKind kind) case eDTypeKindCDouble: return ConstString("cdouble"); case eDTypeKindCReal: + case eDTypeKindCReal64: + case eDTypeKindCReal80: + case eDTypeKindCReal128: return ConstString("creal"); default: return ConstString(); diff --git a/lldb/source/Plugins/TypeSystem/D/DType.h b/lldb/source/Plugins/TypeSystem/D/DType.h index 09b6d1d31846..9d4441f00a07 100644 --- a/lldb/source/Plugins/TypeSystem/D/DType.h +++ b/lldb/source/Plugins/TypeSystem/D/DType.h @@ -38,12 +38,18 @@ enum DTypeKind : uint8_t { eDTypeKindFloat, eDTypeKindDouble, eDTypeKindReal, + eDTypeKindReal64, + eDTypeKindReal80, + eDTypeKindReal128, eDTypeKindIFloat, eDTypeKindIDouble, eDTypeKindIReal, eDTypeKindCFloat, eDTypeKindCDouble, eDTypeKindCReal, + eDTypeKindCReal64, + eDTypeKindCReal80, + eDTypeKindCReal128, eDTypeKindMax, }; From 47403dd72435d9eb9f5d60ceed63d6e84fae79dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Wed, 5 Jan 2022 01:57:09 +0000 Subject: [PATCH 291/293] [lldb] Add support for the rest of the builtin types --- .../SymbolFile/DWARF/DWARFASTParserD.cpp | 1 + .../Plugins/TypeSystem/D/TypeSystemD.cpp | 107 +++++++++++++++++- .../source/Plugins/TypeSystem/D/TypeSystemD.h | 2 +- 3 files changed, 108 insertions(+), 2 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.cpp index a118bf97b11e..f96a316a93bf 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.cpp @@ -102,6 +102,7 @@ DWARFASTParserD::ParseSimpleType(const lldb_private::SymbolContext &sc, { case DW_TAG_base_type: cp_type = m_ast.GetBuiltinTypeForDWARFEncodingAndBitSize( + attrs.name.GetStringRef(), attrs.encoding, attrs.byte_size.getValueOr(0) * 8); break; default: break; diff --git a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp index 3bd7542c846b..45ada00cecac 100644 --- a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp +++ b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.cpp @@ -7,6 +7,7 @@ #include "lldb/Core/StreamFile.h" #include "lldb/Symbol/Type.h" #include "lldb/Utility/ArchSpec.h" +#include "llvm/ADT/StringSwitch.h" LLDB_PLUGIN_DEFINE(TypeSystemD) @@ -673,7 +674,7 @@ TypeSystemD::CreateBaseType(DTypeKind kind) { } CompilerType -TypeSystemD::GetBuiltinTypeForDWARFEncodingAndBitSize(uint32_t dw_ate, uint32_t bit_size) +TypeSystemD::GetBuiltinTypeForDWARFEncodingAndBitSize(llvm::StringRef type_name, uint32_t dw_ate, uint32_t bit_size) { auto IsMatch = [&](DTypeKind kind) { llvm::Optional BS = DType::GetBitSize(kind, m_target_triple); @@ -685,10 +686,114 @@ TypeSystemD::GetBuiltinTypeForDWARFEncodingAndBitSize(uint32_t dw_ate, uint32_t switch(dw_ate) { + case DW_ATE_address: + if (IsMatch(eDTypeKindPtr)) + return CreateBaseType(eDTypeKindPtr); + break; + case DW_ATE_boolean: if (IsMatch(eDTypeKindBool)) return CreateBaseType(eDTypeKindBool); break; + + case DW_ATE_complex_float: + if (IsMatch(eDTypeKindCFloat)) + return CreateBaseType(eDTypeKindCFloat); + // check for creal and complex long double type name to be backward + // compatible with old DMD backend. + if (!type_name.empty() && (type_name == "creal" || type_name == "complex long double")) + { + if (IsMatch(eDTypeKindCReal80)) + return CreateBaseType(eDTypeKindCReal80); + if (IsMatch(eDTypeKindCReal128)) + return CreateBaseType(eDTypeKindCReal128); + if (IsMatch(eDTypeKindCReal64)) + return CreateBaseType(eDTypeKindCReal64); + // fallback to ABI-specific bit size + if (IsMatch(eDTypeKindCReal)) + return CreateBaseType(eDTypeKindCReal); + } + + if (IsMatch(eDTypeKindCDouble)) + return CreateBaseType(eDTypeKindCDouble); + break; + + case DW_ATE_float: + if (IsMatch(eDTypeKindFloat)) + return CreateBaseType(eDTypeKindFloat); + // check for real and long double type name to be backward compatible + // with old DMD backend. + if (!type_name.empty() && (type_name == "real" || type_name == "long double")) + { + if (IsMatch(eDTypeKindReal80)) + return CreateBaseType(eDTypeKindReal80); + if (IsMatch(eDTypeKindReal128)) + return CreateBaseType(eDTypeKindReal128); + if (IsMatch(eDTypeKindReal64)) + return CreateBaseType(eDTypeKindReal64); + // fallback to ABI-specific bit size + if (IsMatch(eDTypeKindReal)) + return CreateBaseType(eDTypeKindReal); + } + + if (IsMatch(eDTypeKindDouble)) + return CreateBaseType(eDTypeKindDouble); + break; + + case DW_ATE_signed: + if (IsMatch(eDTypeKindByte)) + return CreateBaseType(eDTypeKindByte); + if (IsMatch(eDTypeKindShort)) + return CreateBaseType(eDTypeKindShort); + if (IsMatch(eDTypeKindInt)) + return CreateBaseType(eDTypeKindInt); + if (IsMatch(eDTypeKindLong)) + return CreateBaseType(eDTypeKindLong); + if (IsMatch(eDTypeKindCent)) + return CreateBaseType(eDTypeKindCent); + break; + case DW_ATE_signed_char: + if (IsMatch(eDTypeKindByte)) + return CreateBaseType(eDTypeKindByte); + break; + case DW_ATE_unsigned: + if (IsMatch(eDTypeKindUByte)) + return CreateBaseType(eDTypeKindUByte); + if (IsMatch(eDTypeKindUShort)) + return CreateBaseType(eDTypeKindUShort); + if (IsMatch(eDTypeKindUInt)) + return CreateBaseType(eDTypeKindUInt); + if (IsMatch(eDTypeKindULong)) + return CreateBaseType(eDTypeKindULong); + if (IsMatch(eDTypeKindUCent)) + return CreateBaseType(eDTypeKindUCent); + break; + case DW_ATE_unsigned_char: + if (IsMatch(eDTypeKindUByte)) + return CreateBaseType(eDTypeKindUByte); + break; + case DW_ATE_UTF: + switch(bit_size) { + case 8: // UTF-8 + return CreateBaseType(eDTypeKindChar); + case 16: // UTF-16 + return CreateBaseType(eDTypeKindWChar); + case 32: // UTF-32 + return CreateBaseType(eDTypeKindDChar); + } + if (!type_name.empty()) + { + // use a string switch with backward compatible type names + DTypeKind kind = llvm::StringSwitch(type_name) + .Cases("char8_t", "char", eDTypeKindChar) + .Cases("char16_t", "wchar_t", "wchar", eDTypeKindWChar) + .Cases("char32_t", "dchar", eDTypeKindDChar) + .Default(eDTypeKindInvalid); + + if (kind != eDTypeKindInvalid) + return CreateBaseType(kind); + } + break; default: break; } diff --git a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h index 956b4381f950..efad41a45759 100644 --- a/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h +++ b/lldb/source/Plugins/TypeSystem/D/TypeSystemD.h @@ -344,7 +344,7 @@ class TypeSystemD : public TypeSystem { const lldb_private::ConstString &name); CompilerType CreateBaseType(DTypeKind kind); - CompilerType GetBuiltinTypeForDWARFEncodingAndBitSize(uint32_t dw_ate, uint32_t bit_size); + CompilerType GetBuiltinTypeForDWARFEncodingAndBitSize(llvm::StringRef type_name, uint32_t dw_ate, uint32_t bit_size); std::unique_ptr m_dwarf_ast_parser_up; llvm::Triple m_target_triple; From 78f464e25e8d912aabfc93a86fc3582252091613 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Mon, 13 Dec 2021 17:59:32 +0000 Subject: [PATCH 292/293] [lldb][DWARF] Generalize logic to get encoding from a DWARF Type tag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch generalizes the logic to represent an EncodingDataType from a DWARF tag, decoupling from the current clang-specific DAWRF parser, allowing other languages to be integrated without code duplication. Signed-off-by: Luís Ferreira --- .../SymbolFile/DWARF/DWARFASTParser.cpp | 24 +++++++++++++++++ .../Plugins/SymbolFile/DWARF/DWARFASTParser.h | 3 +++ .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 27 +------------------ 3 files changed, 28 insertions(+), 26 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp index 696102162259..bd933cbe7099 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.cpp @@ -287,3 +287,27 @@ TypeSP DWARFASTParser::UpdateSymbolContextScopeForType( dwarf->GetDIEToType()[die.GetDIE()] = type_sp.get(); return type_sp; } + +lldb_private::Type::EncodingDataType DWARFASTParser::GetEncodingFromDWARFTypeTag(dw_tag_t tag) +{ + switch(tag) { + case DW_TAG_pointer_type: + return Type::eEncodingIsPointerUID; + case DW_TAG_reference_type: + return Type::eEncodingIsLValueReferenceUID; + case DW_TAG_rvalue_reference_type: + return Type::eEncodingIsRValueReferenceUID; + case DW_TAG_typedef: + return Type::eEncodingIsTypedefUID; + case DW_TAG_const_type: + return Type::eEncodingIsConstUID; + case DW_TAG_restrict_type: + return Type::eEncodingIsRestrictUID; + case DW_TAG_volatile_type: + return Type::eEncodingIsVolatileUID; + case DW_TAG_atomic_type: + return Type::eEncodingIsAtomicUID; + default: + return Type::eEncodingIsUID; + } +} diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h index 1215e4bd7958..fe1945dff375 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParser.h @@ -16,6 +16,7 @@ #include "lldb/Utility/ConstString.h" #include "lldb/Core/PluginInterface.h" #include "lldb/Symbol/SymbolFile.h" +#include "lldb/Symbol/Type.h" #include "lldb/Symbol/CompilerDecl.h" #include "lldb/Symbol/CompilerDeclContext.h" #include "lldb/lldb-enumerations.h" @@ -62,6 +63,8 @@ class DWARFASTParser { static lldb::AccessType GetAccessTypeFromDWARF(uint32_t dwarf_accessibility); + static lldb_private::Type::EncodingDataType GetEncodingFromDWARFTypeTag(dw_tag_t tag); + /// If \p type_sp is valid, calculate and set its symbol context scope, and /// update the type list for its backing symbol file. /// diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index df27505ace54..0b096f2a4dc4 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -412,7 +412,7 @@ DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc, const dw_tag_t tag = die.Tag(); LanguageType cu_language = SymbolFileDWARF::GetLanguage(*die.GetCU()); Type::ResolveState resolve_state = Type::ResolveState::Unresolved; - Type::EncodingDataType encoding_data_type = Type::eEncodingIsUID; + Type::EncodingDataType encoding_data_type = GetEncodingFromDWARFTypeTag(tag); TypeSP type_sp; CompilerType clang_type; @@ -488,31 +488,6 @@ DWARFASTParserClang::ParseTypeModifier(const SymbolContext &sc, attrs.name.GetStringRef(), attrs.encoding, attrs.byte_size.getValueOr(0) * 8); break; - - case DW_TAG_pointer_type: - encoding_data_type = Type::eEncodingIsPointerUID; - break; - case DW_TAG_reference_type: - encoding_data_type = Type::eEncodingIsLValueReferenceUID; - break; - case DW_TAG_rvalue_reference_type: - encoding_data_type = Type::eEncodingIsRValueReferenceUID; - break; - case DW_TAG_typedef: - encoding_data_type = Type::eEncodingIsTypedefUID; - break; - case DW_TAG_const_type: - encoding_data_type = Type::eEncodingIsConstUID; - break; - case DW_TAG_restrict_type: - encoding_data_type = Type::eEncodingIsRestrictUID; - break; - case DW_TAG_volatile_type: - encoding_data_type = Type::eEncodingIsVolatileUID; - break; - case DW_TAG_atomic_type: - encoding_data_type = Type::eEncodingIsAtomicUID; - break; } if (!clang_type && (encoding_data_type == Type::eEncodingIsPointerUID || From 6f1192ae9cfd513b195ecf0a4efc55fb5c5c9f82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=ADs=20Ferreira?= Date: Fri, 14 Jan 2022 17:00:54 +0000 Subject: [PATCH 293/293] [lldb] Add support for Derived types --- .../SymbolFile/DWARF/DWARFASTParserD.cpp | 53 +++++++++++++++---- .../SymbolFile/DWARF/DWARFASTParserD.h | 6 +++ 2 files changed, 49 insertions(+), 10 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.cpp index f96a316a93bf..2a0f5ad8fc01 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.cpp @@ -8,6 +8,7 @@ #include "DWARFASTParserD.h" #include "DWARFASTParserClang.h" +#include "DWARFASTParser.h" #include "DWARFDebugInfo.h" #include "DWARFDeclContext.h" #include "DWARFDefines.h" @@ -69,6 +70,8 @@ TypeSP DWARFASTParserD::ParseTypeFromDWARF(const SymbolContext &sc, type_sp = ParseSimpleType(sc, die, attrs); break; case DW_TAG_typedef: + type_sp = ParseDerivedType(sc, die, attrs); + break; case DW_TAG_pointer_type: case DW_TAG_reference_type: case DW_TAG_rvalue_reference_type: @@ -94,22 +97,52 @@ DWARFASTParserD::ParseSimpleType(const lldb_private::SymbolContext &sc, { SymbolFileDWARF *dwarf = die.GetDWARF(); CompilerType cp_type; - Type::ResolveState resolve_state = Type::ResolveState::Unresolved; - Type::EncodingDataType encoding_data_type = Type::eEncodingIsUID; + cp_type = m_ast.GetBuiltinTypeForDWARFEncodingAndBitSize( + attrs.name.GetStringRef(), + attrs.encoding, attrs.byte_size.getValueOr(0) * 8); + + return std::make_shared( + die.GetID(), dwarf, attrs.name, attrs.byte_size, nullptr, + dwarf->GetUID(attrs.type.Reference()), Type::eEncodingIsUID, &attrs.decl, + cp_type, Type::ResolveState::Full); +} + + +TypeSP +DWARFASTParserD::ParseTypeFromModule(const lldb_private::SymbolContext &sc, + const DWARFDIE &die) { + return TypeSP(); +} + +lldb::TypeSP +DWARFASTParserD::ParseDerivedType(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs) { + SymbolFileDWARF *dwarf = die.GetDWARF(); const dw_tag_t tag = die.Tag(); - switch(tag) - { - case DW_TAG_base_type: - cp_type = m_ast.GetBuiltinTypeForDWARFEncodingAndBitSize( - attrs.name.GetStringRef(), - attrs.encoding, attrs.byte_size.getValueOr(0) * 8); + CompilerType cp_type; + Type::ResolveState resolve_state = Type::ResolveState::Unresolved; + + switch(tag) { + case DW_TAG_typedef: { + const DWARFDIE encoding_die = attrs.type.Reference(); + if (encoding_die && + encoding_die.GetAttributeValueAsUnsigned(DW_AT_declaration, 0) == 1) { + TypeSP type_sp = ParseTypeFromModule(sc, die); + if (type_sp) + return type_sp; + } + + break; + } + default: break; - default: break; } + return std::make_shared( die.GetID(), dwarf, attrs.name, attrs.byte_size, nullptr, - dwarf->GetUID(attrs.type.Reference()), encoding_data_type, &attrs.decl, + dwarf->GetUID(attrs.type.Reference()), GetEncodingFromDWARFTypeTag(tag), &attrs.decl, cp_type, resolve_state); } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.h index d75a2090ba2f..8e84ce7417f7 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserD.h @@ -54,6 +54,12 @@ class DWARFASTParserD : public DWARFASTParser { const DWARFDIE &die, ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP ParseDerivedType(const lldb_private::SymbolContext &sc, + const DWARFDIE &die, + ParsedDWARFTypeAttributes &attrs); + lldb::TypeSP ParseTypeFromModule(const lldb_private::SymbolContext &sc, + const DWARFDIE &die); + lldb_private::TypeSystemD &m_ast; };